From 86bea9e0016b2890db8ba83049dd4e89653a0a5e Mon Sep 17 00:00:00 2001 From: James Moger <james.moger@gitblit.com> Date: Fri, 16 Mar 2012 17:29:39 -0400 Subject: [PATCH] Ensure that the welcome message is interpreted as UTF-8 (issue 74) --- src/com/gitblit/LuceneExecutor.java | 271 +++++++++++++++++++++++++++++++++-------------------- 1 files changed, 167 insertions(+), 104 deletions(-) diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java index 527609e..e4ee0b6 100644 --- a/src/com/gitblit/LuceneExecutor.java +++ b/src/com/gitblit/LuceneExecutor.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.lang.reflect.Method; import java.text.MessageFormat; import java.text.ParseException; import java.util.ArrayList; @@ -88,6 +89,7 @@ import com.gitblit.models.IssueModel.Attachment; import com.gitblit.models.PathModel.PathChangeModel; import com.gitblit.models.RefModel; +import com.gitblit.models.RepositoryModel; import com.gitblit.models.SearchResult; import com.gitblit.utils.ArrayUtils; import com.gitblit.utils.IssueUtils; @@ -103,14 +105,13 @@ public class LuceneExecutor implements Runnable { - private static final int INDEX_VERSION = 1; + private static final int INDEX_VERSION = 2; private static final String FIELD_OBJECT_TYPE = "type"; private static final String FIELD_ISSUE = "issue"; private static final String FIELD_PATH = "path"; private static final String FIELD_COMMIT = "commit"; private static final String FIELD_BRANCH = "branch"; - private static final String FIELD_REPOSITORY = "repository"; private static final String FIELD_SUMMARY = "summary"; private static final String FIELD_CONTENT = "content"; private static final String FIELD_AUTHOR = "author"; @@ -141,9 +142,6 @@ "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip")); - private final Set<String> excludedBranches = new TreeSet<String>( - Arrays.asList("/refs/heads/gb-issues")); - public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) { this.storedSettings = settings; this.repositoriesFolder = repositoriesFolder; @@ -170,18 +168,14 @@ return; } - for (String repositoryName : GitBlit.self().getRepositoryList()) { - Repository repository = GitBlit.self().getRepository(repositoryName); - if (repository == null) { - logger.warn(MessageFormat.format( - "Lucene executor could not find repository {0}. Skipping.", - repositoryName)); - continue; + for (String repositoryName: GitBlit.self().getRepositoryList()) { + RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName); + if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) { + Repository repository = GitBlit.self().getRepository(model.name); + index(model, repository); + repository.close(); + System.gc(); } - // TODO allow repository to bypass Lucene indexing - index(repositoryName, repository); - repository.close(); - System.gc(); } } @@ -194,43 +188,63 @@ * @param repository * the repository object */ - protected void index(String name, Repository repository) { + protected void index(RepositoryModel model, Repository repository) { try { - if (JGitUtils.hasCommits(repository)) { - if (shouldReindex(repository)) { - // (re)build the entire index - IndexResult result = reindex(name, repository); - - if (result.success) { - if (result.commitCount > 0) { - String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs"; - logger.info(MessageFormat.format(msg, name, - result.commitCount, result.blobCount, result.branchCount, result.duration())); - } - } else { - String msg = "Could not build {0} Lucene index!"; - logger.error(MessageFormat.format(msg, name)); + if (shouldReindex(repository)) { + // (re)build the entire index + IndexResult result = reindex(model, repository); + + if (result.success) { + if (result.commitCount > 0) { + String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs"; + logger.info(MessageFormat.format(msg, model.name, result.commitCount, + result.blobCount, result.branchCount, result.duration())); } } else { - // update the index with latest commits - IndexResult result = updateIndex(name, repository); - if (result.success) { - if (result.commitCount > 0) { - String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs"; - logger.info(MessageFormat.format(msg, name, - result.commitCount, result.blobCount, result.branchCount, result.duration())); - } - } else { - String msg = "Could not update {0} Lucene index!"; - logger.error(MessageFormat.format(msg, name)); - } + String msg = "Could not build {0} Lucene index!"; + logger.error(MessageFormat.format(msg, model.name)); } } else { - logger.info(MessageFormat.format("Skipped Lucene index of empty repository {0}", - name)); + // update the index with latest commits + IndexResult result = updateIndex(model, repository); + if (result.success) { + if (result.commitCount > 0) { + String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs"; + logger.info(MessageFormat.format(msg, model.name, result.commitCount, + result.blobCount, result.branchCount, result.duration())); + } + } else { + String msg = "Could not update {0} Lucene index!"; + logger.error(MessageFormat.format(msg, model.name)); + } } } catch (Throwable t) { - logger.error(MessageFormat.format("Lucene indexing failure for {0}", name), t); + logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t); + } + } + + /** + * Close the writer/searcher objects for a repository. + * + * @param repositoryName + */ + public void close(String repositoryName) { + try { + IndexWriter writer = writers.remove(repositoryName); + if (writer != null) { + writer.close(); + } + } catch (Exception e) { + logger.error("Failed to close index writer for " + repositoryName, e); + } + + try { + IndexSearcher searcher = searchers.remove(repositoryName); + if (searcher != null) { + searcher.close(); + } + } catch (Exception e) { + logger.error("Failed to close index searcher for " + repositoryName, e); } } @@ -335,6 +349,24 @@ } return name; } + + /** + * Get the tree associated with the given commit. + * + * @param walk + * @param commit + * @return tree + * @throws IOException + */ + protected RevTree getTree(final RevWalk walk, final RevCommit commit) + throws IOException { + final RevTree tree = commit.getTree(); + if (tree != null) { + return tree; + } + walk.parseHeaders(commit); + return commit.getTree(); + } /** * Construct a keyname from the branch. @@ -387,15 +419,15 @@ * @param repository * @return IndexResult */ - public IndexResult reindex(String repositoryName, Repository repository) { + public IndexResult reindex(RepositoryModel model, Repository repository) { IndexResult result = new IndexResult(); - if (!deleteIndex(repositoryName)) { + if (!deleteIndex(model.name)) { return result; } try { FileBasedConfig config = getConfig(repository); Set<String> indexedCommits = new TreeSet<String>(); - IndexWriter writer = getIndexWriter(repositoryName); + IndexWriter writer = getIndexWriter(model.name); // build a quick lookup of tags Map<String, List<String>> tags = new HashMap<String, List<String>>(); for (RefModel tag : JGitUtils.getTags(repository, false, -1)) { @@ -436,7 +468,9 @@ // walk through each branch for (RefModel branch : branches) { - if (excludedBranches.contains(branch.getName())) { + + // if this branch is not specifically indexed then skip + if (!model.indexedBranches.contains(branch.getName())) { continue; } @@ -501,7 +535,6 @@ Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED)); @@ -542,7 +575,6 @@ // index the tip commit object if (indexedCommits.add(tipId)) { Document doc = createDocument(tip, tags.get(tipId)); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); result.commitCount += 1; @@ -557,7 +589,6 @@ String hash = rev.getId().getName(); if (indexedCommits.add(hash)) { Document doc = createDocument(rev, tags.get(hash)); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); result.commitCount += 1; @@ -577,7 +608,6 @@ for (IssueModel issue : issues) { result.issueCount++; Document doc = createDocument(issue); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); } } @@ -585,33 +615,15 @@ // commit all changes and reset the searcher config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION); config.save(); - resetIndexSearcher(repositoryName); + resetIndexSearcher(model.name); writer.commit(); result.success(); } catch (Exception e) { - logger.error("Exception while reindexing " + repositoryName, e); + logger.error("Exception while reindexing " + model.name, e); } return result; } - /** - * Get the tree associated with the given commit. - * - * @param walk - * @param commit - * @return tree - * @throws IOException - */ - protected RevTree getTree(final RevWalk walk, final RevCommit commit) - throws IOException { - final RevTree tree = commit.getTree(); - if (tree != null) { - return tree; - } - walk.parseHeaders(commit); - return commit.getTree(); - } - /** * Incrementally update the index with the specified commit for the * repository. @@ -627,24 +639,6 @@ String branch, RevCommit commit) { IndexResult result = new IndexResult(); try { - if (excludedBranches.contains(branch)) { - if (IssueUtils.GB_ISSUES.equals(branch)) { - // index an issue - String issueId = commit.getShortMessage().substring(2).trim(); - IssueModel issue = IssueUtils.getIssue(repository, issueId); - if (issue == null) { - // issue was deleted, remove from index - deleteIssue(repositoryName, issueId); - result.success = true; - return result; - } - result.success = index(repositoryName, issue); - result.issueCount++; - return result; - - } - return result; - } List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit); String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE); @@ -659,7 +653,6 @@ Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED)); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED)); @@ -759,11 +752,11 @@ /** * Updates a repository index incrementally from the last indexed commits. * - * @param repositoryName + * @param model * @param repository * @return IndexResult */ - protected IndexResult updateIndex(String repositoryName, Repository repository) { + protected IndexResult updateIndex(RepositoryModel model, Repository repository) { IndexResult result = new IndexResult(); try { FileBasedConfig config = getConfig(repository); @@ -796,6 +789,12 @@ for (RefModel branch : branches) { String branchName = branch.getName(); + // determine if we should skip this branch + if (!IssueUtils.GB_ISSUES.equals(branch) + && !model.indexedBranches.contains(branch.getName())) { + continue; + } + // remove this branch from the deletedBranches set deletedBranches.remove(branchName); @@ -816,10 +815,33 @@ result.branchCount += 1; } + // track the issue ids that we have already indexed + Set<String> indexedIssues = new TreeSet<String>(); + // reverse the list of commits so we start with the first commit Collections.reverse(revs); - for (RevCommit commit : revs) { - result.add(index(repositoryName, repository, branchName, commit)); + for (RevCommit commit : revs) { + if (IssueUtils.GB_ISSUES.equals(branch)) { + // only index an issue once during updateIndex + String issueId = commit.getShortMessage().substring(2).trim(); + if (indexedIssues.contains(issueId)) { + continue; + } + indexedIssues.add(issueId); + + IssueModel issue = IssueUtils.getIssue(repository, issueId); + if (issue == null) { + // issue was deleted, remove from index + deleteIssue(model.name, issueId); + } else { + // issue was updated + index(model.name, issue); + result.issueCount++; + } + } else { + // index a commit + result.add(index(model.name, repository, branchName, commit)); + } } // update the config @@ -833,18 +855,18 @@ // unless a branch really was deleted and no longer exists if (deletedBranches.size() > 0) { for (String branch : deletedBranches) { - IndexWriter writer = getIndexWriter(repositoryName); + IndexWriter writer = getIndexWriter(model.name); writer.deleteDocuments(new Term(FIELD_BRANCH, branch)); writer.commit(); } } result.success = true; } catch (Throwable t) { - logger.error(MessageFormat.format("Exception while updating {0} Lucene index", repositoryName), t); + logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t); } return result; } - + /** * Creates a Lucene document from an issue. * @@ -906,7 +928,6 @@ */ private boolean index(String repositoryName, Document doc) { try { - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED)); IndexWriter writer = getIndexWriter(repositoryName); writer.addDocument(doc); resetIndexSearcher(repositoryName); @@ -926,7 +947,6 @@ result.author = doc.get(FIELD_AUTHOR); result.committer = doc.get(FIELD_COMMITTER); result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE)); - result.repository = doc.get(FIELD_REPOSITORY); result.branch = doc.get(FIELD_BRANCH); result.commitId = doc.get(FIELD_COMMIT); result.issueId = doc.get(FIELD_ISSUE); @@ -1057,7 +1077,7 @@ readers.add(repositoryIndex.getIndexReader()); } IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]); - MultiReader reader = new MultiReader(rdrs); + MultiSourceReader reader = new MultiSourceReader(rdrs); searcher = new IndexSearcher(reader); } Query rewrittenQuery = searcher.rewrite(query); @@ -1067,8 +1087,17 @@ for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document doc = searcher.doc(docId); - // TODO identify the source index for the doc, then eliminate FIELD_REPOSITORY + // TODO identify the source index for the doc, then eliminate FIELD_REPOSITORY SearchResult result = createSearchResult(doc, hits[i].score); + if (repositories.length == 1) { + // single repository search + result.repository = repositories[0]; + } else { + // multi-repository search + MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader(); + int index = reader.getSourceIndex(docId); + result.repository = repositories[index]; + } String content = doc.get(FIELD_CONTENT); result.fragment = getHighlightedFragment(analyzer, query, content, result); results.add(result); @@ -1188,4 +1217,38 @@ return (endTime - startTime)/1000f; } } + + /** + * Custom subclass of MultiReader to identify the source index for a given + * doc id. This would not be necessary of there was a public method to + * obtain this information. + * + */ + private class MultiSourceReader extends MultiReader { + + final Method method; + + MultiSourceReader(IndexReader[] subReaders) { + super(subReaders); + Method m = null; + try { + m = MultiReader.class.getDeclaredMethod("readerIndex", int.class); + m.setAccessible(true); + } catch (Exception e) { + logger.error("Error getting readerIndex method", e); + } + method = m; + } + + int getSourceIndex(int docId) { + int index = -1; + try { + Object o = method.invoke(this, docId); + index = (Integer) o; + } catch (Exception e) { + logger.error("Error getting source index", e); + } + return index; + } + } } -- Gitblit v1.9.1