From d1c49a7dd7dc4dfd852fb2bca4533609432ea26a Mon Sep 17 00:00:00 2001 From: James Moger <james.moger@gitblit.com> Date: Wed, 14 Mar 2012 16:22:30 -0400 Subject: [PATCH] Adjusted spacing on summary page --- src/com/gitblit/utils/LuceneUtils.java | 668 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 files changed, 544 insertions(+), 124 deletions(-) diff --git a/src/com/gitblit/utils/LuceneUtils.java b/src/com/gitblit/utils/LuceneUtils.java index 483537d..1c24f28 100644 --- a/src/com/gitblit/utils/LuceneUtils.java +++ b/src/com/gitblit/utils/LuceneUtils.java @@ -1,4 +1,21 @@ +/* + * Copyright 2012 gitblit.com. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.gitblit.utils; + +import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF; import java.io.ByteArrayOutputStream; import java.io.File; @@ -7,14 +24,19 @@ import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.DateTools.Resolution; @@ -35,21 +57,33 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.search.highlight.Fragmenter; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.apache.lucene.search.highlight.SimpleSpanFragmenter; +import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; +import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.lib.Constants; -import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.RevCommit; -import org.eclipse.jgit.revwalk.RevObject; +import org.eclipse.jgit.revwalk.RevTree; import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.storage.file.FileBasedConfig; +import org.eclipse.jgit.treewalk.EmptyTreeIterator; import org.eclipse.jgit.treewalk.TreeWalk; +import org.eclipse.jgit.util.FS; import com.gitblit.models.IssueModel; import com.gitblit.models.IssueModel.Attachment; +import com.gitblit.models.PathModel.PathChangeModel; import com.gitblit.models.RefModel; import com.gitblit.models.SearchResult; @@ -79,23 +113,26 @@ } private static final Version LUCENE_VERSION = Version.LUCENE_35; + private static final int INDEX_VERSION = 1; private static final String FIELD_OBJECT_TYPE = "type"; - private static final String FIELD_OBJECT_ID = "id"; + private static final String FIELD_ISSUE = "issue"; + private static final String FIELD_PATH = "path"; + private static final String FIELD_COMMIT = "commit"; + private static final String FIELD_BRANCH = "branch"; private static final String FIELD_REPOSITORY = "repository"; private static final String FIELD_SUMMARY = "summary"; private static final String FIELD_CONTENT = "content"; private static final String FIELD_AUTHOR = "author"; private static final String FIELD_COMMITTER = "committer"; private static final String FIELD_DATE = "date"; + private static final String FIELD_TAG = "tag"; private static final String FIELD_LABEL = "label"; private static final String FIELD_ATTACHMENT = "attachment"; - private static Set<String> excludedExtensions = new TreeSet<String>( - Arrays.asList("7z", "arc", "arj", "bin", "bmp", "dll", "doc", - "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", - "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", - "xlsx", "zip")); + private static Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc", + "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", + "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip")); private static Set<String> excludedBranches = new TreeSet<String>( Arrays.asList("/refs/heads/gb-issues")); @@ -103,20 +140,91 @@ private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>(); private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>(); + private static final String LUCENE_DIR = "lucene"; + private static final String CONF_FILE = "lucene.conf"; + private static final String CONF_INDEX = "index"; + private static final String CONF_VERSION = "version"; + private static final String CONF_ALIAS = "aliases"; + private static final String CONF_BRANCH = "branches"; + /** - * Returns the name of the repository. + * Returns the author for the commit, if this information is available. * - * @param repository - * @return the repository name + * @param commit + * @return an author or unknown */ - private static String getName(Repository repository) { - if (repository.isBare()) { - return repository.getDirectory().getName(); - } else { - return repository.getDirectory().getParentFile().getName(); + private static String getAuthor(RevCommit commit) { + String name = "unknown"; + try { + name = commit.getAuthorIdent().getName(); + if (StringUtils.isEmpty(name)) { + name = commit.getAuthorIdent().getEmailAddress(); + } + } catch (NullPointerException n) { } + return name; } + /** + * Returns the committer for the commit, if this information is available. + * + * @param commit + * @return an committer or unknown + */ + private static String getCommitter(RevCommit commit) { + String name = "unknown"; + try { + name = commit.getCommitterIdent().getName(); + if (StringUtils.isEmpty(name)) { + name = commit.getCommitterIdent().getEmailAddress(); + } + } catch (NullPointerException n) { + } + return name; + } + + /** + * Construct a keyname from the branch. + * + * @param branchName + * @return a keyname appropriate for the Git config file format + */ + private static String getBranchKey(String branchName) { + return StringUtils.getSHA1(branchName); + } + + /** + * Returns the Lucene configuration for the specified repository. + * + * @param repository + * @return a config object + */ + private static FileBasedConfig getConfig(Repository repository) { + File file = new File(repository.getDirectory(), CONF_FILE); + FileBasedConfig config = new FileBasedConfig(file, FS.detect()); + return config; + } + + /** + * Reads the Lucene config file for the repository to check the index + * version. If the index version is different, then rebuild the repository + * index. + * + * @param repository + * @return true of the on-disk index format is different than INDEX_VERSION + */ + public static boolean shouldReindex(Repository repository) { + try { + FileBasedConfig config = getConfig(repository); + config.load(); + int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0); + // reindex if versions do not match + return indexVersion != INDEX_VERSION; + } catch (Throwable t) { + } + return true; + } + /** * Deletes the Lucene index for the specified repository. * @@ -125,10 +233,14 @@ */ public static boolean deleteIndex(Repository repository) { try { - File luceneIndex = new File(repository.getDirectory(), "lucene"); + File luceneIndex = new File(repository.getDirectory(), LUCENE_DIR); if (luceneIndex.exists()) { org.eclipse.jgit.util.FileUtils.delete(luceneIndex, org.eclipse.jgit.util.FileUtils.RECURSIVE); + } + File luceneConfig = new File(repository.getDirectory(), CONF_FILE); + if (luceneConfig.exists()) { + luceneConfig.delete(); } return true; } catch (IOException e) { @@ -140,149 +252,321 @@ * This completely indexes the repository and will destroy any existing * index. * + * @param repositoryName * @param repository - * @return true if the indexing has succeeded + * @return IndexResult */ - public static boolean index(Repository repository) { - try { - String repositoryName = getName(repository); + public static IndexResult reindex(String repositoryName, Repository repository) { + IndexResult result = new IndexResult(); + if (!LuceneUtils.deleteIndex(repository)) { + return result; + } + try { + FileBasedConfig config = getConfig(repository); Set<String> indexedCommits = new TreeSet<String>(); IndexWriter writer = getIndexWriter(repository, true); // build a quick lookup of tags Map<String, List<String>> tags = new HashMap<String, List<String>>(); for (RefModel tag : JGitUtils.getTags(repository, false, -1)) { + if (!tag.isAnnotatedTag()) { + // skip non-annotated tags + continue; + } if (!tags.containsKey(tag.getObjectId())) { tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>()); } tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName); } + + ObjectReader reader = repository.newObjectReader(); - // walk through each branch + // get the local branches List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1); + + // sort them by most recently updated + Collections.sort(branches, new Comparator<RefModel>() { + @Override + public int compare(RefModel ref1, RefModel ref2) { + return ref2.getDate().compareTo(ref1.getDate()); + } + }); + + // reorder default branch to first position + RefModel defaultBranch = null; + ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository); + for (RefModel branch : branches) { + if (branch.getObjectId().equals(defaultBranchId)) { + defaultBranch = branch; + break; + } + } + branches.remove(defaultBranch); + branches.add(0, defaultBranch); + + // walk through each branch for (RefModel branch : branches) { if (excludedBranches.contains(branch.getName())) { continue; } - RevWalk revWalk = new RevWalk(repository); - RevCommit rev = revWalk.parseCommit(branch.getObjectId()); + + String branchName = branch.getName(); + RevWalk revWalk = new RevWalk(reader); + RevCommit tip = revWalk.parseCommit(branch.getObjectId()); + String tipId = tip.getId().getName(); + + String keyName = getBranchKey(branchName); + config.setString(CONF_ALIAS, null, keyName, branchName); + config.setString(CONF_BRANCH, null, keyName, tipId); // index the blob contents of the tree + TreeWalk treeWalk = new TreeWalk(repository); + treeWalk.addTree(tip.getTree()); + treeWalk.setRecursive(true); + + Map<String, ObjectId> paths = new TreeMap<String, ObjectId>(); + while (treeWalk.next()) { + paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0)); + } + ByteArrayOutputStream os = new ByteArrayOutputStream(); byte[] tmp = new byte[32767]; - TreeWalk treeWalk = new TreeWalk(repository); - treeWalk.addTree(rev.getTree()); - treeWalk.setRecursive(true); - String revDate = DateTools.timeToString(rev.getCommitTime() * 1000L, - Resolution.MINUTE); - while (treeWalk.next()) { - Document doc = new Document(); - doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, - Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, - Index.NOT_ANALYZED)); - doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES, - Index.NOT_ANALYZED)); - doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO)); - doc.add(new Field(FIELD_AUTHOR, rev.getAuthorIdent().getName(), Store.YES, - Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(FIELD_COMMITTER, rev.getCommitterIdent().getName(), - Store.YES, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(FIELD_LABEL, branch.getName(), Store.YES, Index.ANALYZED)); - // determine extension to compare to the extension - // blacklist - String ext = null; - String name = treeWalk.getPathString().toLowerCase(); - if (name.indexOf('.') > -1) { - ext = name.substring(name.lastIndexOf('.') + 1); + RevWalk commitWalk = new RevWalk(reader); + commitWalk.markStart(tip); + + RevCommit commit; + while ((paths.size() > 0) && (commit = commitWalk.next()) != null) { + TreeWalk diffWalk = new TreeWalk(reader); + int parentCount = commit.getParentCount(); + switch (parentCount) { + case 0: + diffWalk.addTree(new EmptyTreeIterator()); + break; + case 1: + diffWalk.addTree(getTree(commitWalk, commit.getParent(0))); + break; + default: + // skip merge commits + continue; } - - if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { - // read the blob content - ObjectId entid = treeWalk.getObjectId(0); - FileMode entmode = treeWalk.getFileMode(0); - RevObject ro = revWalk.lookupAny(entid, entmode.getObjectType()); - revWalk.parseBody(ro); - ObjectLoader ldr = repository.open(ro.getId(), Constants.OBJ_BLOB); - InputStream in = ldr.openStream(); - os.reset(); - int n = 0; - while ((n = in.read(tmp)) > 0) { - os.write(tmp, 0, n); + diffWalk.addTree(getTree(commitWalk, commit)); + diffWalk.setFilter(ANY_DIFF); + diffWalk.setRecursive(true); + while ((paths.size() > 0) && diffWalk.next()) { + String path = diffWalk.getPathString(); + if (!paths.containsKey(path)) { + continue; } - in.close(); - byte[] content = os.toByteArray(); - String str = new String(content, "UTF-8"); - doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED)); + + // remove path from set + ObjectId blobId = paths.remove(path); + result.blobCount++; + + // index the blob metadata + String blobAuthor = getAuthor(commit); + String blobCommitter = getCommitter(commit); + String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, + Resolution.MINUTE); + + Document doc = new Document(); + doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO)); + doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED)); + + // determine extension to compare to the extension + // blacklist + String ext = null; + String name = path.toLowerCase(); + if (name.indexOf('.') > -1) { + ext = name.substring(name.lastIndexOf('.') + 1); + } + + // index the blob content + if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { + ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB); + InputStream in = ldr.openStream(); + int n; + while ((n = in.read(tmp)) > 0) { + os.write(tmp, 0, n); + } + in.close(); + byte[] content = os.toByteArray(); + String str = new String(content, Constants.CHARACTER_ENCODING); + doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED)); + os.reset(); + } + + // add the blob to the index writer.addDocument(doc); } } os.close(); - treeWalk.release(); - // index the head commit object - String head = rev.getId().getName(); - if (indexedCommits.add(head)) { - Document doc = createDocument(rev, tags.get(head)); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, - Index.NOT_ANALYZED)); + // index the tip commit object + if (indexedCommits.add(tipId)) { + Document doc = createDocument(tip, tags.get(tipId)); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); + result.commitCount += 1; + result.branchCount += 1; } // traverse the log and index the previous commit objects - revWalk.markStart(rev); - while ((rev = revWalk.next()) != null) { + RevWalk historyWalk = new RevWalk(reader); + historyWalk.markStart(historyWalk.parseCommit(tip.getId())); + RevCommit rev; + while ((rev = historyWalk.next()) != null) { String hash = rev.getId().getName(); if (indexedCommits.add(hash)) { Document doc = createDocument(rev, tags.get(hash)); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, - Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); + result.commitCount += 1; } } - - // finished - revWalk.dispose(); } + // finished + reader.release(); + // this repository has a gb-issues branch, index all issues if (IssueUtils.getIssuesBranch(repository) != null) { List<IssueModel> issues = IssueUtils.getIssues(repository, null); + if (issues.size() > 0) { + result.branchCount += 1; + } for (IssueModel issue : issues) { + result.issueCount++; Document doc = createDocument(issue); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, - Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); } } // commit all changes and reset the searcher + config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION); + config.save(); resetIndexSearcher(repository); writer.commit(); - return true; + result.success = true; } catch (Exception e) { e.printStackTrace(); } - return false; + return result; + } + + /** + * Get the tree associated with the given commit. + * + * @param walk + * @param commit + * @return tree + * @throws IOException + */ + protected static RevTree getTree(final RevWalk walk, final RevCommit commit) + throws IOException { + final RevTree tree = commit.getTree(); + if (tree != null) { + return tree; + } + walk.parseHeaders(commit); + return commit.getTree(); } /** * Incrementally update the index with the specified commit for the * repository. * + * @param repositoryName * @param repository + * @param branch + * the fully qualified branch name (e.g. refs/heads/master) * @param commit * @return true, if successful */ - public static boolean index(Repository repository, RevCommit commit) { + private static IndexResult index(String repositoryName, Repository repository, + String branch, RevCommit commit) { + IndexResult result = new IndexResult(); try { + if (excludedBranches.contains(branch)) { + if (IssueUtils.GB_ISSUES.equals(branch)) { + // index an issue + String issueId = commit.getShortMessage().substring(2).trim(); + IssueModel issue = IssueUtils.getIssue(repository, issueId); + if (issue == null) { + // issue was deleted, remove from index + IndexWriter writer = getIndexWriter(repository, false); + writer.deleteDocuments( + new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term( + FIELD_ISSUE, issueId)); + writer.commit(); + result.success = true; + return result; + } + result.success = index(repositoryName, repository, issue); + result.issueCount++; + return result; + + } + return result; + } + List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit); + String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, + Resolution.MINUTE); + IndexWriter writer = getIndexWriter(repository, false); + for (PathChangeModel path : changedPaths) { + // delete the indexed blob + writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()), + new Term(FIELD_BRANCH, branch), new Term(FIELD_PATH, path.path)); + + // re-index the blob + if (!ChangeType.DELETE.equals(path.changeType)) { + result.blobCount++; + Document doc = new Document(); + doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, + Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO)); + doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED)); + + // determine extension to compare to the extension + // blacklist + String ext = null; + String name = path.name.toLowerCase(); + if (name.indexOf('.') > -1) { + ext = name.substring(name.lastIndexOf('.') + 1); + } + + if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { + // read the blob content + String str = JGitUtils.getStringContent(repository, commit.getTree(), + path.path); + doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED)); + writer.addDocument(doc); + } + } + } + writer.commit(); + Document doc = createDocument(commit, null); - return index(repository, doc); + result.commitCount++; + result.success = index(repositoryName, repository, doc); } catch (Exception e) { e.printStackTrace(); } - return false; + return result; } /** @@ -291,26 +575,111 @@ * * @param repository * @param issue - * @param reindex - * if true, the old index entry for this issue will be deleted. - * This is only appropriate for pre-existing/indexed issues. * @return true, if successful */ - public static boolean index(Repository repository, IssueModel issue, boolean reindex) { + public static boolean index(String repositoryName, Repository repository, IssueModel issue) { try { + // delete the old issue from the index, if exists + IndexWriter writer = getIndexWriter(repository, false); + writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term( + FIELD_ISSUE, String.valueOf(issue.id))); + writer.commit(); + Document doc = createDocument(issue); - if (reindex) { - // delete the old issue from the index, if exists - IndexWriter writer = getIndexWriter(repository, false); - writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), - new Term(FIELD_OBJECT_ID, String.valueOf(issue.id))); - writer.commit(); - } - return index(repository, doc); + return index(repositoryName, repository, doc); } catch (Exception e) { e.printStackTrace(); } return false; + } + + /** + * Updates a repository index incrementally from the last indexed commits. + * + * @param repositoryName + * @param repository + * @return IndexResult + */ + public static IndexResult updateIndex(String repositoryName, Repository repository) { + IndexResult result = new IndexResult(); + try { + FileBasedConfig config = getConfig(repository); + config.load(); + + // build a quick lookup of annotated tags + Map<String, List<String>> tags = new HashMap<String, List<String>>(); + for (RefModel tag : JGitUtils.getTags(repository, false, -1)) { + if (!tag.isAnnotatedTag()) { + // skip non-annotated tags + continue; + } + if (!tags.containsKey(tag.getObjectId())) { + tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>()); + } + tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName); + } + + // detect branch deletion + // first assume all branches are deleted and then remove each + // existing branch from deletedBranches during indexing + Set<String> deletedBranches = new TreeSet<String>(); + for (String alias : config.getNames(CONF_ALIAS)) { + String branch = config.getString(CONF_ALIAS, null, alias); + deletedBranches.add(branch); + } + + // walk through each branches + List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1); + for (RefModel branch : branches) { + String branchName = branch.getName(); + + // remove this branch from the deletedBranches set + deletedBranches.remove(branchName); + + // determine last commit + String keyName = getBranchKey(branchName); + String lastCommit = config.getString(CONF_BRANCH, null, keyName); + + List<RevCommit> revs; + if (StringUtils.isEmpty(lastCommit)) { + // new branch/unindexed branch, get all commits on branch + revs = JGitUtils.getRevLog(repository, branchName, 0, -1); + } else { + // pre-existing branch, get changes since last commit + revs = JGitUtils.getRevLog(repository, lastCommit, branchName); + } + + if (revs.size() > 0) { + result.branchCount += 1; + } + + // reverse the list of commits so we start with the first commit + Collections.reverse(revs); + for (RevCommit commit : revs) { + result.add(index(repositoryName, repository, branchName, commit)); + } + + // update the config + config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION); + config.setString(CONF_ALIAS, null, keyName, branchName); + config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName()); + config.save(); + } + + // the deletedBranches set will normally be empty by this point + // unless a branch really was deleted and no longer exists + if (deletedBranches.size() > 0) { + for (String branch : deletedBranches) { + IndexWriter writer = getIndexWriter(repository, false); + writer.deleteDocuments(new Term(FIELD_BRANCH, branch)); + writer.commit(); + } + } + result.success = true; + } catch (Throwable t) { + t.printStackTrace(); + } + return result; } /** @@ -322,11 +691,12 @@ private static Document createDocument(IssueModel issue) { Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES, - Field.Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(FIELD_OBJECT_ID, issue.id, Store.YES, Index.NOT_ANALYZED)); + Field.Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE), Store.YES, Field.Index.NO)); - doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.ANALYZED)); List<String> attachments = new ArrayList<String>(); for (Attachment attachment : issue.getAttachments()) { attachments.add(attachment.name.toLowerCase()); @@ -334,7 +704,7 @@ doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED)); - doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.NO, Index.ANALYZED)); + doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES, Index.ANALYZED)); return doc; @@ -350,19 +720,16 @@ private static Document createDocument(RevCommit commit, List<String> tags) { Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.commit.name(), Store.YES, - Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(FIELD_OBJECT_ID, commit.getName(), Store.YES, Index.NOT_ANALYZED)); + Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE), Store.YES, Index.NO)); - doc.add(new Field(FIELD_AUTHOR, commit.getCommitterIdent().getName(), Store.YES, - Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED)); - doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.NO, Index.ANALYZED)); + doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.YES, Index.ANALYZED)); if (!ArrayUtils.isEmpty(tags)) { - if (!ArrayUtils.isEmpty(tags)) { - doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(tags), Store.YES, - Index.ANALYZED)); - } + doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), Store.YES, Index.ANALYZED)); } return doc; } @@ -370,15 +737,14 @@ /** * Incrementally index an object for the repository. * + * @param repositoryName * @param repository * @param doc * @return true, if successful */ - private static boolean index(Repository repository, Document doc) { - try { - String repositoryName = getName(repository); - doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, - Index.NOT_ANALYZED)); + private static boolean index(String repositoryName, Repository repository, Document doc) { + try { + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED)); IndexWriter writer = getIndexWriter(repository, false); writer.addDocument(doc); resetIndexSearcher(repository); @@ -394,12 +760,18 @@ SearchResult result = new SearchResult(); result.score = score; result.date = DateTools.stringToDate(doc.get(FIELD_DATE)); - result.summary = doc.get(FIELD_SUMMARY); + result.summary = doc.get(FIELD_SUMMARY); result.author = doc.get(FIELD_AUTHOR); result.committer = doc.get(FIELD_COMMITTER); result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE)); result.repository = doc.get(FIELD_REPOSITORY); - result.id = doc.get(FIELD_OBJECT_ID); + result.branch = doc.get(FIELD_BRANCH); + result.commitId = doc.get(FIELD_COMMIT); + result.issueId = doc.get(FIELD_ISSUE); + result.path = doc.get(FIELD_PATH); + if (doc.get(FIELD_TAG) != null) { + result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG)); + } if (doc.get(FIELD_LABEL) != null) { result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL)); } @@ -443,7 +815,7 @@ private static IndexWriter getIndexWriter(Repository repository, boolean forceCreate) throws IOException { IndexWriter indexWriter = WRITERS.get(repository.getDirectory()); - File indexFolder = new File(repository.getDirectory(), "lucene"); + File indexFolder = new File(repository.getDirectory(), LUCENE_DIR); Directory directory = FSDirectory.open(indexFolder); if (forceCreate || !indexFolder.exists()) { // if the writer is going to blow away the existing index and create @@ -517,9 +889,9 @@ for (Repository repository : repositories) { IndexSearcher repositoryIndex = getIndexSearcher(repository); readers.add(repositoryIndex.getIndexReader()); - } - IndexReader [] rdrs = readers.toArray(new IndexReader[readers.size()]); - MultiReader reader = new MultiReader(rdrs); + } + IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]); + MultiReader reader = new MultiReader(rdrs); searcher = new IndexSearcher(reader); } Query rewrittenQuery = searcher.rewrite(query); @@ -530,13 +902,46 @@ int docId = hits[i].doc; Document doc = searcher.doc(docId); SearchResult result = createSearchResult(doc, hits[i].score); + String content = doc.get(FIELD_CONTENT); + result.fragment = getHighlightedFragment(analyzer, query, content); results.add(result); } } catch (Exception e) { e.printStackTrace(); } return new ArrayList<SearchResult>(results); - } + } + + private static String getHighlightedFragment(Analyzer analyzer, Query query, + String content) throws IOException, InvalidTokenOffsetsException { + content = content == null ? "":StringUtils.escapeForHtml(content, false); + TokenStream stream = TokenSources.getTokenStream("content", content, analyzer); + QueryScorer scorer = new QueryScorer(query, "content"); + Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 150); + + SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); + Highlighter highlighter = new Highlighter(formatter, scorer); + + highlighter.setTextFragmenter(fragmenter); + String [] fragments = highlighter.getBestFragments(stream, content, 5); + if (ArrayUtils.isEmpty(fragments)) { + return content; + } + if (fragments.length == 1) { + return "<pre>" + fragments[0] + "</pre>"; + } + StringBuilder sb = new StringBuilder(); + for (int i = 0, len = fragments.length; i < len; i++) { + String fragment = fragments[i].trim(); + sb.append("<pre>"); + sb.append(fragment); + sb.append("</pre>"); + if (i < len - 1) { + sb.append("<span class=\"ellipses\">...</span><br/>"); + } + } + return sb.toString(); + } /** * Close all the index writers and searchers @@ -562,4 +967,19 @@ } SEARCHERS.clear(); } + + public static class IndexResult { + public boolean success; + public int branchCount; + public int commitCount; + public int blobCount; + public int issueCount; + + public void add(IndexResult result) { + this.branchCount += result.branchCount; + this.commitCount += result.commitCount; + this.blobCount += result.blobCount; + this.issueCount += result.issueCount; + } + } } -- Gitblit v1.9.1