From 13417cf9c6eec555b51da49742e47939d2f5715b Mon Sep 17 00:00:00 2001 From: James Moger <james.moger@gitblit.com> Date: Fri, 19 Oct 2012 22:47:33 -0400 Subject: [PATCH] Exclude submodules from zip downloads (issue 151) --- src/com/gitblit/LuceneExecutor.java | 621 ++++++++++++++++++++++++++++++++++++-------------------- 1 files changed, 397 insertions(+), 224 deletions(-) diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java index 527609e..42155f4 100644 --- a/src/com/gitblit/LuceneExecutor.java +++ b/src/com/gitblit/LuceneExecutor.java @@ -21,10 +21,10 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.lang.reflect.Method; import java.text.MessageFormat; import java.text.ParseException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -69,10 +69,12 @@ import org.apache.lucene.util.Version; import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.lib.RepositoryCache.FileKey; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevTree; import org.eclipse.jgit.revwalk.RevWalk; @@ -88,6 +90,7 @@ import com.gitblit.models.IssueModel.Attachment; import com.gitblit.models.PathModel.PathChangeModel; import com.gitblit.models.RefModel; +import com.gitblit.models.RepositoryModel; import com.gitblit.models.SearchResult; import com.gitblit.utils.ArrayUtils; import com.gitblit.utils.IssueUtils; @@ -103,14 +106,13 @@ public class LuceneExecutor implements Runnable { - private static final int INDEX_VERSION = 1; + private static final int INDEX_VERSION = 5; private static final String FIELD_OBJECT_TYPE = "type"; private static final String FIELD_ISSUE = "issue"; private static final String FIELD_PATH = "path"; private static final String FIELD_COMMIT = "commit"; private static final String FIELD_BRANCH = "branch"; - private static final String FIELD_REPOSITORY = "repository"; private static final String FIELD_SUMMARY = "summary"; private static final String FIELD_CONTENT = "content"; private static final String FIELD_AUTHOR = "author"; @@ -137,51 +139,42 @@ private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>(); private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>(); - private final Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc", - "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", - "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip")); - - private final Set<String> excludedBranches = new TreeSet<String>( - Arrays.asList("/refs/heads/gb-issues")); + private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip"; + private Set<String> excludedExtensions; public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) { this.storedSettings = settings; this.repositoriesFolder = repositoriesFolder; + String exts = luceneIgnoreExtensions; + if (settings != null) { + exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts); + } + excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts)); } /** - * Indicates if the Lucene executor can index repositories. - * - * @return true if the Lucene executor is ready to index repositories - */ - public boolean isReady() { - return storedSettings.getBoolean(Keys.lucene.enable, false); - } - - /** - * Run is executed by the gitblit executor service at whatever frequency - * is specified in the settings. Because this is called by an executor - * service, calls will queue - i.e. there can never be concurrent execution - * of repository index updates. + * Run is executed by the Gitblit executor service. Because this is called + * by an executor service, calls will queue - i.e. there can never be + * concurrent execution of repository index updates. */ @Override public void run() { - if (!isReady()) { + if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) { + // Lucene indexing is disabled return; } + // reload the excluded extensions + String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions); + excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts)); - for (String repositoryName : GitBlit.self().getRepositoryList()) { - Repository repository = GitBlit.self().getRepository(repositoryName); - if (repository == null) { - logger.warn(MessageFormat.format( - "Lucene executor could not find repository {0}. Skipping.", - repositoryName)); - continue; + for (String repositoryName: GitBlit.self().getRepositoryList()) { + RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName); + if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) { + Repository repository = GitBlit.self().getRepository(model.name); + index(model, repository); + repository.close(); + System.gc(); } - // TODO allow repository to bypass Lucene indexing - index(repositoryName, repository); - repository.close(); - System.gc(); } } @@ -194,51 +187,71 @@ * @param repository * the repository object */ - protected void index(String name, Repository repository) { + private void index(RepositoryModel model, Repository repository) { try { - if (JGitUtils.hasCommits(repository)) { - if (shouldReindex(repository)) { - // (re)build the entire index - IndexResult result = reindex(name, repository); - - if (result.success) { - if (result.commitCount > 0) { - String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs"; - logger.info(MessageFormat.format(msg, name, - result.commitCount, result.blobCount, result.branchCount, result.duration())); - } - } else { - String msg = "Could not build {0} Lucene index!"; - logger.error(MessageFormat.format(msg, name)); + if (shouldReindex(repository)) { + // (re)build the entire index + IndexResult result = reindex(model, repository); + + if (result.success) { + if (result.commitCount > 0) { + String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs"; + logger.info(MessageFormat.format(msg, model.name, result.commitCount, + result.blobCount, result.branchCount, result.duration())); } } else { - // update the index with latest commits - IndexResult result = updateIndex(name, repository); - if (result.success) { - if (result.commitCount > 0) { - String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs"; - logger.info(MessageFormat.format(msg, name, - result.commitCount, result.blobCount, result.branchCount, result.duration())); - } - } else { - String msg = "Could not update {0} Lucene index!"; - logger.error(MessageFormat.format(msg, name)); - } + String msg = "Could not build {0} Lucene index!"; + logger.error(MessageFormat.format(msg, model.name)); } } else { - logger.info(MessageFormat.format("Skipped Lucene index of empty repository {0}", - name)); + // update the index with latest commits + IndexResult result = updateIndex(model, repository); + if (result.success) { + if (result.commitCount > 0) { + String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs"; + logger.info(MessageFormat.format(msg, model.name, result.commitCount, + result.blobCount, result.branchCount, result.duration())); + } + } else { + String msg = "Could not update {0} Lucene index!"; + logger.error(MessageFormat.format(msg, model.name)); + } } } catch (Throwable t) { - logger.error(MessageFormat.format("Lucene indexing failure for {0}", name), t); + logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t); } + } + + /** + * Close the writer/searcher objects for a repository. + * + * @param repositoryName + */ + public synchronized void close(String repositoryName) { + try { + IndexSearcher searcher = searchers.remove(repositoryName); + if (searcher != null) { + searcher.getIndexReader().close(); + } + } catch (Exception e) { + logger.error("Failed to close index searcher for " + repositoryName, e); + } + + try { + IndexWriter writer = writers.remove(repositoryName); + if (writer != null) { + writer.close(); + } + } catch (Exception e) { + logger.error("Failed to close index writer for " + repositoryName, e); + } } /** * Close all Lucene indexers. * */ - public void close() { + public synchronized void close() { // close all writers for (String writer : writers.keySet()) { try { @@ -252,7 +265,7 @@ // close all searchers for (String searcher : searchers.keySet()) { try { - searchers.get(searcher).close(); + searchers.get(searcher).getIndexReader().close(); } catch (Throwable t) { logger.error("Failed to close Lucene searcher for " + searcher, t); } @@ -269,20 +282,11 @@ */ public boolean deleteIndex(String repositoryName) { try { - // remove the repository index writer from the cache and close it - IndexWriter writer = writers.remove(repositoryName); - if (writer != null) { - writer.close(); - writer = null; - } - // remove the repository index searcher from the cache and close it - IndexSearcher searcher = searchers.remove(repositoryName); - if (searcher != null) { - searcher.close(); - searcher = null; - } + // close any open writer/searcher + close(repositoryName); + // delete the index folder - File repositoryFolder = new File(repositoriesFolder, repositoryName); + File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED); File luceneIndex = new File(repositoryFolder, LUCENE_DIR); if (luceneIndex.exists()) { org.eclipse.jgit.util.FileUtils.delete(luceneIndex, @@ -298,7 +302,6 @@ throw new RuntimeException(e); } } - /** * Returns the author for the commit, if this information is available. @@ -335,6 +338,24 @@ } return name; } + + /** + * Get the tree associated with the given commit. + * + * @param walk + * @param commit + * @return tree + * @throws IOException + */ + private RevTree getTree(final RevWalk walk, final RevCommit commit) + throws IOException { + final RevTree tree = commit.getTree(); + if (tree != null) { + return tree; + } + walk.parseHeaders(commit); + return commit.getTree(); + } /** * Construct a keyname from the branch. @@ -366,7 +387,7 @@ * @param repository * @return true of the on-disk index format is different than INDEX_VERSION */ - protected boolean shouldReindex(Repository repository) { + private boolean shouldReindex(Repository repository) { try { FileBasedConfig config = getConfig(repository); config.load(); @@ -387,15 +408,16 @@ * @param repository * @return IndexResult */ - public IndexResult reindex(String repositoryName, Repository repository) { - IndexResult result = new IndexResult(); - if (!deleteIndex(repositoryName)) { + public IndexResult reindex(RepositoryModel model, Repository repository) { + IndexResult result = new IndexResult(); + if (!deleteIndex(model.name)) { return result; } - try { + try { + String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); FileBasedConfig config = getConfig(repository); Set<String> indexedCommits = new TreeSet<String>(); - IndexWriter writer = getIndexWriter(repositoryName); + IndexWriter writer = getIndexWriter(model.name); // build a quick lookup of tags Map<String, List<String>> tags = new HashMap<String, List<String>>(); for (RefModel tag : JGitUtils.getTags(repository, false, -1)) { @@ -427,7 +449,7 @@ ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository); for (RefModel branch : branches) { if (branch.getObjectId().equals(defaultBranchId)) { - defaultBranch = branch; + defaultBranch = branch; break; } } @@ -436,7 +458,23 @@ // walk through each branch for (RefModel branch : branches) { - if (excludedBranches.contains(branch.getName())) { + + boolean indexBranch = false; + if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH) + && branch.equals(defaultBranch)) { + // indexing "default" branch + indexBranch = true; + } else if (IssueUtils.GB_ISSUES.equals(branch)) { + // skip the GB_ISSUES branch because it is indexed later + // note: this is different than updateIndex + indexBranch = false; + } else { + // normal explicit branch check + indexBranch = model.indexedBranches.contains(branch.getName()); + } + + // if this branch is not specifically indexed then skip + if (!indexBranch) { continue; } @@ -456,7 +494,10 @@ Map<String, ObjectId> paths = new TreeMap<String, ObjectId>(); while (treeWalk.next()) { - paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0)); + // ensure path is not in a submodule + if (treeWalk.getFileMode(0) != FileMode.GITLINK) { + paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0)); + } } ByteArrayOutputStream os = new ByteArrayOutputStream(); @@ -501,7 +542,6 @@ Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED)); @@ -520,14 +560,14 @@ // index the blob content if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB); - InputStream in = ldr.openStream(); + InputStream in = ldr.openStream(); int n; while ((n = in.read(tmp)) > 0) { os.write(tmp, 0, n); } in.close(); byte[] content = os.toByteArray(); - String str = new String(content, Constants.CHARACTER_ENCODING); + String str = StringUtils.decodeString(content, encodings); doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED)); os.reset(); } @@ -542,7 +582,6 @@ // index the tip commit object if (indexedCommits.add(tipId)) { Document doc = createDocument(tip, tags.get(tipId)); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); result.commitCount += 1; @@ -557,7 +596,6 @@ String hash = rev.getId().getName(); if (indexedCommits.add(hash)) { Document doc = createDocument(rev, tags.get(hash)); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); result.commitCount += 1; @@ -577,7 +615,6 @@ for (IssueModel issue : issues) { result.issueCount++; Document doc = createDocument(issue); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); } } @@ -585,33 +622,15 @@ // commit all changes and reset the searcher config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION); config.save(); - resetIndexSearcher(repositoryName); writer.commit(); + resetIndexSearcher(model.name); result.success(); } catch (Exception e) { - logger.error("Exception while reindexing " + repositoryName, e); + logger.error("Exception while reindexing " + model.name, e); } return result; } - /** - * Get the tree associated with the given commit. - * - * @param walk - * @param commit - * @return tree - * @throws IOException - */ - protected RevTree getTree(final RevWalk walk, final RevCommit commit) - throws IOException { - final RevTree tree = commit.getTree(); - if (tree != null) { - return tree; - } - walk.parseHeaders(commit); - return commit.getTree(); - } - /** * Incrementally update the index with the specified commit for the * repository. @@ -627,31 +646,17 @@ String branch, RevCommit commit) { IndexResult result = new IndexResult(); try { - if (excludedBranches.contains(branch)) { - if (IssueUtils.GB_ISSUES.equals(branch)) { - // index an issue - String issueId = commit.getShortMessage().substring(2).trim(); - IssueModel issue = IssueUtils.getIssue(repository, issueId); - if (issue == null) { - // issue was deleted, remove from index - deleteIssue(repositoryName, issueId); - result.success = true; - return result; - } - result.success = index(repositoryName, issue); - result.issueCount++; - return result; - - } - return result; - } + String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit); String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE); IndexWriter writer = getIndexWriter(repositoryName); for (PathChangeModel path : changedPaths) { + if (path.isSubmodule()) { + continue; + } // delete the indexed blob - deleteBlob(repositoryName, branch, path.path); + deleteBlob(repositoryName, branch, path.name); // re-index the blob if (!ChangeType.DELETE.equals(path.changeType)) { @@ -659,7 +664,6 @@ Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED)); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED)); @@ -678,15 +682,27 @@ if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { // read the blob content String str = JGitUtils.getStringContent(repository, commit.getTree(), - path.path); - doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED)); - writer.addDocument(doc); + path.path, encodings); + if (str != null) { + doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED)); + writer.addDocument(doc); + } } } } writer.commit(); - - Document doc = createDocument(commit, null); + + // get any annotated commit tags + List<String> commitTags = new ArrayList<String>(); + for (RefModel ref : JGitUtils.getTags(repository, false, -1)) { + if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) { + commitTags.add(ref.displayName); + } + } + + // create and write the Lucene document + Document doc = createDocument(commit, commitTags); + doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED)); result.commitCount++; result.success = index(repositoryName, doc); } catch (Exception e) { @@ -721,8 +737,9 @@ * @param repositoryName * @param issueId * @throws Exception + * @return true, if deleted, false if no record was deleted */ - private void deleteIssue(String repositoryName, String issueId) throws Exception { + private boolean deleteIssue(String repositoryName, String issueId) throws Exception { BooleanQuery query = new BooleanQuery(); Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.issue.name()); query.add(new TermQuery(objectTerm), Occur.MUST); @@ -730,8 +747,17 @@ query.add(new TermQuery(issueidTerm), Occur.MUST); IndexWriter writer = getIndexWriter(repositoryName); + int numDocsBefore = writer.numDocs(); writer.deleteDocuments(query); writer.commit(); + int numDocsAfter = writer.numDocs(); + if (numDocsBefore == numDocsAfter) { + logger.debug(MessageFormat.format("no records found to delete {0}", query.toString())); + return false; + } else { + logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString())); + return true; + } } /** @@ -741,29 +767,39 @@ * @param branch * @param path * @throws Exception + * @return true, if deleted, false if no record was deleted */ - private void deleteBlob(String repositoryName, String branch, String path) throws Exception { - BooleanQuery query = new BooleanQuery(); - Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.blob.name()); - query.add(new TermQuery(objectTerm), Occur.MUST); - Term branchTerm = new Term(FIELD_BRANCH, branch); - query.add(new TermQuery(branchTerm), Occur.MUST); - Term pathTerm = new Term(FIELD_PATH, path); - query.add(new TermQuery(pathTerm), Occur.MUST); + public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception { + String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH); + String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path); + BooleanQuery query = new BooleanQuery(); + StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); + QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer); + query.add(qp.parse(q), Occur.MUST); + IndexWriter writer = getIndexWriter(repositoryName); - writer.deleteDocuments(query); + int numDocsBefore = writer.numDocs(); + writer.deleteDocuments(query); writer.commit(); + int numDocsAfter = writer.numDocs(); + if (numDocsBefore == numDocsAfter) { + logger.debug(MessageFormat.format("no records found to delete {0}", query.toString())); + return false; + } else { + logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString())); + return true; + } } /** * Updates a repository index incrementally from the last indexed commits. * - * @param repositoryName + * @param model * @param repository * @return IndexResult */ - protected IndexResult updateIndex(String repositoryName, Repository repository) { + private IndexResult updateIndex(RepositoryModel model, Repository repository) { IndexResult result = new IndexResult(); try { FileBasedConfig config = getConfig(repository); @@ -791,14 +827,55 @@ deletedBranches.add(branch); } - // walk through each branches + // get the local branches List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1); + + // sort them by most recently updated + Collections.sort(branches, new Comparator<RefModel>() { + @Override + public int compare(RefModel ref1, RefModel ref2) { + return ref2.getDate().compareTo(ref1.getDate()); + } + }); + + // reorder default branch to first position + RefModel defaultBranch = null; + ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository); + for (RefModel branch : branches) { + if (branch.getObjectId().equals(defaultBranchId)) { + defaultBranch = branch; + break; + } + } + branches.remove(defaultBranch); + branches.add(0, defaultBranch); + + // walk through each branches for (RefModel branch : branches) { String branchName = branch.getName(); + boolean indexBranch = false; + if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH) + && branch.equals(defaultBranch)) { + // indexing "default" branch + indexBranch = true; + } else if (IssueUtils.GB_ISSUES.equals(branch)) { + // update issues modified on the GB_ISSUES branch + // note: this is different than reindex + indexBranch = true; + } else { + // normal explicit branch check + indexBranch = model.indexedBranches.contains(branch.getName()); + } + + // if this branch is not specifically indexed then skip + if (!indexBranch) { + continue; + } + // remove this branch from the deletedBranches set deletedBranches.remove(branchName); - + // determine last commit String keyName = getBranchKey(branchName); String lastCommit = config.getString(CONF_BRANCH, null, keyName); @@ -816,10 +893,35 @@ result.branchCount += 1; } + // track the issue ids that we have already indexed + Set<String> indexedIssues = new TreeSet<String>(); + // reverse the list of commits so we start with the first commit Collections.reverse(revs); - for (RevCommit commit : revs) { - result.add(index(repositoryName, repository, branchName, commit)); + for (RevCommit commit : revs) { + if (IssueUtils.GB_ISSUES.equals(branch)) { + // only index an issue once during updateIndex + String issueId = commit.getShortMessage().substring(2).trim(); + if (indexedIssues.contains(issueId)) { + continue; + } + indexedIssues.add(issueId); + + IssueModel issue = IssueUtils.getIssue(repository, issueId); + if (issue == null) { + // issue was deleted, remove from index + if (!deleteIssue(model.name, issueId)) { + logger.error(MessageFormat.format("Failed to delete issue {0} from Lucene index!", issueId)); + } + } else { + // issue was updated + index(model.name, issue); + result.issueCount++; + } + } else { + // index a commit + result.add(index(model.name, repository, branchName, commit)); + } } // update the config @@ -833,18 +935,18 @@ // unless a branch really was deleted and no longer exists if (deletedBranches.size() > 0) { for (String branch : deletedBranches) { - IndexWriter writer = getIndexWriter(repositoryName); + IndexWriter writer = getIndexWriter(model.name); writer.deleteDocuments(new Term(FIELD_BRANCH, branch)); writer.commit(); } } result.success = true; } catch (Throwable t) { - logger.error(MessageFormat.format("Exception while updating {0} Lucene index", repositoryName), t); + logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t); } return result; } - + /** * Creates a Lucene document from an issue. * @@ -906,11 +1008,10 @@ */ private boolean index(String repositoryName, Document doc) { try { - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED)); IndexWriter writer = getIndexWriter(repositoryName); writer.addDocument(doc); - resetIndexSearcher(repositoryName); writer.commit(); + resetIndexSearcher(repositoryName); return true; } catch (Exception e) { logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e); @@ -918,15 +1019,16 @@ return false; } - private SearchResult createSearchResult(Document doc, float score) throws ParseException { + private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException { SearchResult result = new SearchResult(); + result.hitId = hitId; + result.totalHits = totalHits; result.score = score; result.date = DateTools.stringToDate(doc.get(FIELD_DATE)); result.summary = doc.get(FIELD_SUMMARY); result.author = doc.get(FIELD_AUTHOR); result.committer = doc.get(FIELD_COMMITTER); result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE)); - result.repository = doc.get(FIELD_REPOSITORY); result.branch = doc.get(FIELD_BRANCH); result.commitId = doc.get(FIELD_COMMIT); result.issueId = doc.get(FIELD_ISSUE); @@ -943,7 +1045,7 @@ private synchronized void resetIndexSearcher(String repository) throws IOException { IndexSearcher searcher = searchers.remove(repository); if (searcher != null) { - searcher.close(); + searcher.getIndexReader().close(); } } @@ -973,8 +1075,8 @@ * @throws IOException */ private IndexWriter getIndexWriter(String repository) throws IOException { - IndexWriter indexWriter = writers.get(repository); - File repositoryFolder = new File(repositoriesFolder, repository); + IndexWriter indexWriter = writers.get(repository); + File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED); File indexFolder = new File(repositoryFolder, LUCENE_DIR); Directory directory = FSDirectory.open(indexFolder); @@ -996,19 +1098,21 @@ * * @param text * if the text is null or empty, null is returned - * @param maximumHits - * the maximum number of hits to collect + * @param page + * the page number to retrieve. page is 1-indexed. + * @param pageSize + * the number of elements to return for this page * @param repositories * a list of repositories to search. if no repositories are * specified null is returned. * @return a list of SearchResults in order from highest to the lowest score * */ - public List<SearchResult> search(String text, int maximumHits, List<String> repositories) { + public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) { if (ArrayUtils.isEmpty(repositories)) { return null; } - return search(text, maximumHits, repositories.toArray(new String[0])); + return search(text, page, pageSize, repositories.toArray(new String[0])); } /** @@ -1016,15 +1120,17 @@ * * @param text * if the text is null or empty, null is returned - * @param maximumHits - * the maximum number of hits to collect + * @param page + * the page number to retrieve. page is 1-indexed. + * @param pageSize + * the number of elements to return for this page * @param repositories * a list of repositories to search. if no repositories are * specified null is returned. * @return a list of SearchResults in order from highest to the lowest score * - */ - public List<SearchResult> search(String text, int maximumHits, String... repositories) { + */ + public List<SearchResult> search(String text, int page, int pageSize, String... repositories) { if (StringUtils.isEmpty(text)) { return null; } @@ -1044,7 +1150,7 @@ qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer); qp.setAllowLeadingWildcard(true); query.add(qp.parse(text), Occur.SHOULD); - + IndexSearcher searcher; if (repositories.length == 1) { // single repository search @@ -1057,18 +1163,31 @@ readers.add(repositoryIndex.getIndexReader()); } IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]); - MultiReader reader = new MultiReader(rdrs); + MultiSourceReader reader = new MultiSourceReader(rdrs); searcher = new IndexSearcher(reader); } + Query rewrittenQuery = searcher.rewrite(query); - TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true); + logger.debug(rewrittenQuery.toString()); + + TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true); searcher.search(rewrittenQuery, collector); - ScoreDoc[] hits = collector.topDocs().scoreDocs; + int offset = Math.max(0, (page - 1) * pageSize); + ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs; + int totalHits = collector.getTotalHits(); for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document doc = searcher.doc(docId); - // TODO identify the source index for the doc, then eliminate FIELD_REPOSITORY - SearchResult result = createSearchResult(doc, hits[i].score); + SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits); + if (repositories.length == 1) { + // single repository search + result.repository = repositories[0]; + } else { + // multi-repository search + MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader(); + int index = reader.getSourceIndex(docId); + result.repository = repositories[index]; + } String content = doc.get(FIELD_CONTENT); result.fragment = getHighlightedFragment(analyzer, query, content, result); results.add(result); @@ -1091,42 +1210,86 @@ */ private String getHighlightedFragment(Analyzer analyzer, Query query, String content, SearchResult result) throws IOException, InvalidTokenOffsetsException { - content = content == null ? "":StringUtils.escapeForHtml(content, false); - + if (content == null) { + content = ""; + } + + int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150; + QueryScorer scorer = new QueryScorer(query, "content"); - Fragmenter fragmenter; - - // TODO improve the fragmenter - hopefully on line breaks - if (SearchObjectType.commit == result.type) { - fragmenter = new SimpleSpanFragmenter(scorer, 1024); - } else { - fragmenter = new SimpleSpanFragmenter(scorer, 150); - } + Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); // use an artificial delimiter for the token - String termTag = "<!--["; - String termTagEnd = "]-->"; + String termTag = "!!--["; + String termTagEnd = "]--!!"; SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(fragmenter); - - String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 5); + + String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3); if (ArrayUtils.isEmpty(fragments)) { if (SearchObjectType.blob == result.type) { return ""; } - return "<pre class=\"text\">" + content + "</pre>"; + // clip commit message + String fragment = content; + if (fragment.length() > fragmentLength) { + fragment = fragment.substring(0, fragmentLength) + "..."; + } + return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>"; } + + // make sure we have unique fragments + Set<String> uniqueFragments = new LinkedHashSet<String>(); + for (String fragment : fragments) { + uniqueFragments.add(fragment); + } + fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]); + StringBuilder sb = new StringBuilder(); for (int i = 0, len = fragments.length; i < len; i++) { String fragment = fragments[i]; - + String tag = "<pre class=\"text\">"; + // resurrect the raw fragment from removing the artificial delimiters - String raw = fragment.replace(termTag, "").replace(termTagEnd, ""); - sb.append(getPreTag(result, raw, content)); + String raw = fragment.replace(termTag, "").replace(termTagEnd, ""); + + // determine position of the raw fragment in the content + int pos = content.indexOf(raw); + + // restore complete first line of fragment + int c = pos; + while (c > 0) { + c--; + if (content.charAt(c) == '\n') { + break; + } + } + if (c > 0) { + // inject leading chunk of first fragment line + fragment = content.substring(c + 1, pos) + fragment; + } + + if (SearchObjectType.blob == result.type) { + // count lines as offset into the content for this fragment + int line = Math.max(1, StringUtils.countLines(content.substring(0, pos))); + + // create fragment tag with line number and language + String lang = ""; + String ext = StringUtils.getFileExtension(result.path).toLowerCase(); + if (!StringUtils.isEmpty(ext)) { + // maintain leading space! + lang = " lang-" + ext; + } + tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang); + + } + sb.append(tag); + // replace the artificial delimiter with html tags - String html = fragment.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>"); + String html = StringUtils.escapeForHtml(fragment, false); + html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>"); sb.append(html); sb.append("</pre>"); if (i < len - 1) { @@ -1134,31 +1297,7 @@ } } return sb.toString(); - } - - /** - * Returns the appropriate tag for a fragment. Commit messages are visually - * differentiated from blob fragments. - * - * @param result - * @param fragment - * @param content - * @return an html tag appropriate for the fragment - */ - private String getPreTag(SearchResult result, String fragment, String content) { - String pre = "<pre class=\"text\">"; - if (SearchObjectType.blob == result.type) { - int line = StringUtils.countLines(content.substring(0, content.indexOf(fragment))); - int lastDot = result.path.lastIndexOf('.'); - if (lastDot > -1) { - String ext = result.path.substring(lastDot + 1).toLowerCase(); - pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0} lang-{1}\">", line, ext); - } else { - pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}\">", line); - } - } - return pre; - } + } /** * Simple class to track the results of an index update. @@ -1188,4 +1327,38 @@ return (endTime - startTime)/1000f; } } + + /** + * Custom subclass of MultiReader to identify the source index for a given + * doc id. This would not be necessary of there was a public method to + * obtain this information. + * + */ + private class MultiSourceReader extends MultiReader { + + final Method method; + + MultiSourceReader(IndexReader[] subReaders) { + super(subReaders); + Method m = null; + try { + m = MultiReader.class.getDeclaredMethod("readerIndex", int.class); + m.setAccessible(true); + } catch (Exception e) { + logger.error("Error getting readerIndex method", e); + } + method = m; + } + + int getSourceIndex(int docId) { + int index = -1; + try { + Object o = method.invoke(this, docId); + index = (Integer) o; + } catch (Exception e) { + logger.error("Error getting source index", e); + } + return index; + } + } } -- Gitblit v1.9.1