From 86bea9e0016b2890db8ba83049dd4e89653a0a5e Mon Sep 17 00:00:00 2001
From: James Moger <james.moger@gitblit.com>
Date: Fri, 16 Mar 2012 17:29:39 -0400
Subject: [PATCH] Ensure that the welcome message is interpreted as UTF-8 (issue 74)

---
 src/com/gitblit/LuceneExecutor.java |  271 +++++++++++++++++++++++++++++++++--------------------
 1 files changed, 167 insertions(+), 104 deletions(-)

diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java
index 527609e..e4ee0b6 100644
--- a/src/com/gitblit/LuceneExecutor.java
+++ b/src/com/gitblit/LuceneExecutor.java
@@ -21,6 +21,7 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.lang.reflect.Method;
 import java.text.MessageFormat;
 import java.text.ParseException;
 import java.util.ArrayList;
@@ -88,6 +89,7 @@
 import com.gitblit.models.IssueModel.Attachment;
 import com.gitblit.models.PathModel.PathChangeModel;
 import com.gitblit.models.RefModel;
+import com.gitblit.models.RepositoryModel;
 import com.gitblit.models.SearchResult;
 import com.gitblit.utils.ArrayUtils;
 import com.gitblit.utils.IssueUtils;
@@ -103,14 +105,13 @@
 public class LuceneExecutor implements Runnable {
 	
 		
-	private static final int INDEX_VERSION = 1;
+	private static final int INDEX_VERSION = 2;
 
 	private static final String FIELD_OBJECT_TYPE = "type";
 	private static final String FIELD_ISSUE = "issue";
 	private static final String FIELD_PATH = "path";
 	private static final String FIELD_COMMIT = "commit";
 	private static final String FIELD_BRANCH = "branch";
-	private static final String FIELD_REPOSITORY = "repository";
 	private static final String FIELD_SUMMARY = "summary";
 	private static final String FIELD_CONTENT = "content";
 	private static final String FIELD_AUTHOR = "author";
@@ -141,9 +142,6 @@
 			"arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
 			"lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
 
-	private final Set<String> excludedBranches = new TreeSet<String>(
-			Arrays.asList("/refs/heads/gb-issues"));
-	
 	public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {
 		this.storedSettings = settings;
 		this.repositoriesFolder = repositoriesFolder;
@@ -170,18 +168,14 @@
 			return;
 		}
 
-		for (String repositoryName : GitBlit.self().getRepositoryList()) {
-			Repository repository = GitBlit.self().getRepository(repositoryName);
-			if (repository == null) {
-				logger.warn(MessageFormat.format(
-						"Lucene executor could not find repository {0}. Skipping.",
-						repositoryName));
-				continue;
+		for (String repositoryName: GitBlit.self().getRepositoryList()) {
+			RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName);
+			if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
+				Repository repository = GitBlit.self().getRepository(model.name);
+				index(model, repository);				
+				repository.close();
+				System.gc();
 			}
-			// TODO allow repository to bypass Lucene indexing				
-			index(repositoryName, repository);
-			repository.close();
-			System.gc();
 		}
 	}
 
@@ -194,43 +188,63 @@
 	 * @param repository
 	 *            the repository object
 	 */
-	protected void index(String name, Repository repository) {
+	protected void index(RepositoryModel model, Repository repository) {
 		try {
-			if (JGitUtils.hasCommits(repository)) {
-				if (shouldReindex(repository)) {
-					// (re)build the entire index					
-					IndexResult result = reindex(name, repository);
-					
-					if (result.success) {
-						if (result.commitCount > 0) {
-							String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
-							logger.info(MessageFormat.format(msg, name,
-									result.commitCount, result.blobCount, result.branchCount, result.duration()));
-						}
-					} else {
-						String msg = "Could not build {0} Lucene index!";
-						logger.error(MessageFormat.format(msg, name));
+			if (shouldReindex(repository)) {
+				// (re)build the entire index
+				IndexResult result = reindex(model, repository);
+
+				if (result.success) {
+					if (result.commitCount > 0) {
+						String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
+						logger.info(MessageFormat.format(msg, model.name, result.commitCount,
+								result.blobCount, result.branchCount, result.duration()));
 					}
 				} else {
-					// update the index with latest commits					
-					IndexResult result = updateIndex(name, repository);
-					if (result.success) {
-						if (result.commitCount > 0) {
-							String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
-							logger.info(MessageFormat.format(msg, name,
-									result.commitCount, result.blobCount, result.branchCount, result.duration()));
-						}
-					} else {
-						String msg = "Could not update {0} Lucene index!";
-						logger.error(MessageFormat.format(msg, name));
-					}
+					String msg = "Could not build {0} Lucene index!";
+					logger.error(MessageFormat.format(msg, model.name));
 				}
 			} else {
-				logger.info(MessageFormat.format("Skipped Lucene index of empty repository {0}",
-						name));
+				// update the index with latest commits
+				IndexResult result = updateIndex(model, repository);
+				if (result.success) {
+					if (result.commitCount > 0) {
+						String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
+						logger.info(MessageFormat.format(msg, model.name, result.commitCount,
+								result.blobCount, result.branchCount, result.duration()));
+					}
+				} else {
+					String msg = "Could not update {0} Lucene index!";
+					logger.error(MessageFormat.format(msg, model.name));
+				}
 			}
 		} catch (Throwable t) {
-			logger.error(MessageFormat.format("Lucene indexing failure for {0}", name), t);
+			logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
+		}
+	}
+	
+	/**
+	 * Close the writer/searcher objects for a repository.
+	 * 
+	 * @param repositoryName
+	 */
+	public void close(String repositoryName) {
+		try {
+			IndexWriter writer = writers.remove(repositoryName);
+			if (writer != null) {
+				writer.close();
+			}
+		} catch (Exception e) {
+			logger.error("Failed to close index writer for " + repositoryName, e);
+		}
+
+		try {
+			IndexSearcher searcher = searchers.remove(repositoryName);
+			if (searcher != null) {
+				searcher.close();
+			}
+		} catch (Exception e) {
+			logger.error("Failed to close index searcher for " + repositoryName, e);
 		}
 	}
 
@@ -335,6 +349,24 @@
 		}
 		return name;
 	}
+	
+	/**
+	 * Get the tree associated with the given commit.
+	 *
+	 * @param walk
+	 * @param commit
+	 * @return tree
+	 * @throws IOException
+	 */
+	protected RevTree getTree(final RevWalk walk, final RevCommit commit)
+			throws IOException {
+		final RevTree tree = commit.getTree();
+		if (tree != null) {
+			return tree;
+		}
+		walk.parseHeaders(commit);
+		return commit.getTree();
+	}
 
 	/**
 	 * Construct a keyname from the branch.
@@ -387,15 +419,15 @@
 	 * @param repository
 	 * @return IndexResult
 	 */
-	public IndexResult reindex(String repositoryName, Repository repository) {
+	public IndexResult reindex(RepositoryModel model, Repository repository) {
 		IndexResult result = new IndexResult();
-		if (!deleteIndex(repositoryName)) {
+		if (!deleteIndex(model.name)) {
 			return result;
 		}
 		try {			
 			FileBasedConfig config = getConfig(repository);
 			Set<String> indexedCommits = new TreeSet<String>();
-			IndexWriter writer = getIndexWriter(repositoryName);
+			IndexWriter writer = getIndexWriter(model.name);
 			// build a quick lookup of tags
 			Map<String, List<String>> tags = new HashMap<String, List<String>>();
 			for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
@@ -436,7 +468,9 @@
 			
 			// walk through each branch
 			for (RefModel branch : branches) {
-				if (excludedBranches.contains(branch.getName())) {
+
+				// if this branch is not specifically indexed then skip
+				if (!model.indexedBranches.contains(branch.getName())) {
 					continue;
 				}
 
@@ -501,7 +535,6 @@
 						
 						Document doc = new Document();
 						doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
-						doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
 						doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
 						doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
 						doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
@@ -542,7 +575,6 @@
 				// index the tip commit object
 				if (indexedCommits.add(tipId)) {
 					Document doc = createDocument(tip, tags.get(tipId));
-					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
 					doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
 					writer.addDocument(doc);
 					result.commitCount += 1;
@@ -557,7 +589,6 @@
 					String hash = rev.getId().getName();
 					if (indexedCommits.add(hash)) {
 						Document doc = createDocument(rev, tags.get(hash));
-						doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
 						doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
 						writer.addDocument(doc);
 						result.commitCount += 1;
@@ -577,7 +608,6 @@
 				for (IssueModel issue : issues) {
 					result.issueCount++;
 					Document doc = createDocument(issue);
-					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
 					writer.addDocument(doc);
 				}
 			}
@@ -585,33 +615,15 @@
 			// commit all changes and reset the searcher
 			config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
 			config.save();
-			resetIndexSearcher(repositoryName);
+			resetIndexSearcher(model.name);
 			writer.commit();
 			result.success();
 		} catch (Exception e) {
-			logger.error("Exception while reindexing " + repositoryName, e);
+			logger.error("Exception while reindexing " + model.name, e);
 		}
 		return result;
 	}
 	
-	/**
-	 * Get the tree associated with the given commit.
-	 *
-	 * @param walk
-	 * @param commit
-	 * @return tree
-	 * @throws IOException
-	 */
-	protected RevTree getTree(final RevWalk walk, final RevCommit commit)
-			throws IOException {
-		final RevTree tree = commit.getTree();
-		if (tree != null) {
-			return tree;
-		}
-		walk.parseHeaders(commit);
-		return commit.getTree();
-	}
-
 	/**
 	 * Incrementally update the index with the specified commit for the
 	 * repository.
@@ -627,24 +639,6 @@
 			String branch, RevCommit commit) {
 		IndexResult result = new IndexResult();
 		try {
-			if (excludedBranches.contains(branch)) {
-				if (IssueUtils.GB_ISSUES.equals(branch)) {
-					// index an issue
-					String issueId = commit.getShortMessage().substring(2).trim();
-					IssueModel issue = IssueUtils.getIssue(repository, issueId);
-					if (issue == null) {
-						// issue was deleted, remove from index
-						deleteIssue(repositoryName, issueId);
-						result.success = true;
-						return result;
-					}
-					result.success = index(repositoryName, issue);
-					result.issueCount++;
-					return result;
-					
-				}
-				return result;
-			}
 			List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
 			String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
 					Resolution.MINUTE);
@@ -659,7 +653,6 @@
 					Document doc = new Document();
 					doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
 							Index.NOT_ANALYZED));
-					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
 					doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
 					doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
 					doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
@@ -759,11 +752,11 @@
 	/**
 	 * Updates a repository index incrementally from the last indexed commits.
 	 * 
-	 * @param repositoryName
+	 * @param model
 	 * @param repository
 	 * @return IndexResult
 	 */
-	protected IndexResult updateIndex(String repositoryName, Repository repository) {
+	protected IndexResult updateIndex(RepositoryModel model, Repository repository) {
 		IndexResult result = new IndexResult();
 		try {
 			FileBasedConfig config = getConfig(repository);
@@ -796,6 +789,12 @@
 			for (RefModel branch : branches) {
 				String branchName = branch.getName();
 
+				// determine if we should skip this branch
+				if (!IssueUtils.GB_ISSUES.equals(branch)
+						&& !model.indexedBranches.contains(branch.getName())) {
+					continue;
+				}
+				
 				// remove this branch from the deletedBranches set
 				deletedBranches.remove(branchName);
 
@@ -816,10 +815,33 @@
 					result.branchCount += 1;
 				}
 				
+				// track the issue ids that we have already indexed
+				Set<String> indexedIssues = new TreeSet<String>();
+				
 				// reverse the list of commits so we start with the first commit				
 				Collections.reverse(revs);
-				for (RevCommit commit : revs) {
-					result.add(index(repositoryName, repository, branchName, commit));					
+				for (RevCommit commit : revs) {					
+					if (IssueUtils.GB_ISSUES.equals(branch)) {
+						// only index an issue once during updateIndex
+						String issueId = commit.getShortMessage().substring(2).trim();
+						if (indexedIssues.contains(issueId)) {
+							continue;
+						}
+						indexedIssues.add(issueId);
+						
+						IssueModel issue = IssueUtils.getIssue(repository, issueId);
+						if (issue == null) {
+							// issue was deleted, remove from index
+							deleteIssue(model.name, issueId);
+						} else {
+							// issue was updated
+							index(model.name, issue);
+							result.issueCount++;
+						}
+					} else {
+						// index a commit
+						result.add(index(model.name, repository, branchName, commit));
+					}
 				}
 
 				// update the config
@@ -833,18 +855,18 @@
 			// unless a branch really was deleted and no longer exists
 			if (deletedBranches.size() > 0) {
 				for (String branch : deletedBranches) {
-					IndexWriter writer = getIndexWriter(repositoryName);
+					IndexWriter writer = getIndexWriter(model.name);
 					writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
 					writer.commit();
 				}
 			}
 			result.success = true;
 		} catch (Throwable t) {
-			logger.error(MessageFormat.format("Exception while updating {0} Lucene index", repositoryName), t);
+			logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
 		}
 		return result;
 	}
-
+	
 	/**
 	 * Creates a Lucene document from an issue.
 	 * 
@@ -906,7 +928,6 @@
 	 */
 	private boolean index(String repositoryName, Document doc) {
 		try {			
-			doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));
 			IndexWriter writer = getIndexWriter(repositoryName);
 			writer.addDocument(doc);
 			resetIndexSearcher(repositoryName);
@@ -926,7 +947,6 @@
 		result.author = doc.get(FIELD_AUTHOR);
 		result.committer = doc.get(FIELD_COMMITTER);
 		result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
-		result.repository = doc.get(FIELD_REPOSITORY);
 		result.branch = doc.get(FIELD_BRANCH);
 		result.commitId = doc.get(FIELD_COMMIT);
 		result.issueId = doc.get(FIELD_ISSUE);
@@ -1057,7 +1077,7 @@
 					readers.add(repositoryIndex.getIndexReader());
 				}
 				IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
-				MultiReader reader = new MultiReader(rdrs);
+				MultiSourceReader reader = new MultiSourceReader(rdrs);
 				searcher = new IndexSearcher(reader);
 			}
 			Query rewrittenQuery = searcher.rewrite(query);
@@ -1067,8 +1087,17 @@
 			for (int i = 0; i < hits.length; i++) {
 				int docId = hits[i].doc;
 				Document doc = searcher.doc(docId);
-				// TODO identify the source index for the doc, then eliminate FIELD_REPOSITORY
+				// TODO identify the source index for the doc, then eliminate FIELD_REPOSITORY				
 				SearchResult result = createSearchResult(doc, hits[i].score);
+				if (repositories.length == 1) {
+					// single repository search
+					result.repository = repositories[0];
+				} else {
+					// multi-repository search
+					MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
+					int index = reader.getSourceIndex(docId);
+					result.repository = repositories[index];
+				}
 				String content = doc.get(FIELD_CONTENT);				
 				result.fragment = getHighlightedFragment(analyzer, query, content, result);
 				results.add(result);
@@ -1188,4 +1217,38 @@
 			return (endTime - startTime)/1000f;
 		}
 	}
+	
+	/**
+	 * Custom subclass of MultiReader to identify the source index for a given
+	 * doc id.  This would not be necessary of there was a public method to
+	 * obtain this information.
+	 *  
+	 */
+	private class MultiSourceReader extends MultiReader {
+		
+		final Method method;
+		
+		MultiSourceReader(IndexReader[] subReaders) {
+			super(subReaders);
+			Method m = null;
+			try {
+				m = MultiReader.class.getDeclaredMethod("readerIndex", int.class);
+				m.setAccessible(true);
+			} catch (Exception e) {
+				logger.error("Error getting readerIndex method", e);
+			}
+			method = m;
+		}
+		
+		int getSourceIndex(int docId) {
+			int index = -1;
+			try {
+				Object o = method.invoke(this, docId);
+				index = (Integer) o;
+			} catch (Exception e) {
+				logger.error("Error getting source index", e);
+			}
+			return index;
+		}
+	}
 }

--
Gitblit v1.9.1