James Moger
2012-03-09 3d0494ad6db80843add9bf9d6eac783a691ac5a1
src/com/gitblit/utils/LuceneUtils.java
@@ -1,3 +1,18 @@
/*
 * Copyright 2012 gitblit.com.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.gitblit.utils;
import java.io.ByteArrayOutputStream;
@@ -50,9 +65,11 @@
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.file.FileBasedConfig;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.treewalk.filter.AndTreeFilter;
import org.eclipse.jgit.treewalk.filter.PathFilterGroup;
import org.eclipse.jgit.treewalk.filter.TreeFilter;
import org.eclipse.jgit.util.FS;
import com.gitblit.GitBlit;
import com.gitblit.models.IssueModel;
import com.gitblit.models.IssueModel.Attachment;
import com.gitblit.models.PathModel.PathChangeModel;
@@ -96,6 +113,7 @@
   private static final String FIELD_AUTHOR = "author";
   private static final String FIELD_COMMITTER = "committer";
   private static final String FIELD_DATE = "date";
   private static final String FIELD_TAG = "tag";
   private static final String FIELD_LABEL = "label";
   private static final String FIELD_ATTACHMENT = "attachment";
@@ -109,27 +127,47 @@
   private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();
   private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();
   private static final String LUCENE_DIR = "lucene";
   private static final String CONF_FILE = "lucene.conf";
   private static final String CONF_INDEX = "index";
   private static final String CONF_VERSION = "version";
   private static final String CONF_ALIAS = "aliases";
   private static final String CONF_BRANCH = "branches";
   /**
    * Returns the name of the repository.
    * Returns the author for the commit, if this information is available.
    * 
    * @param repository
    * @return the repository name
    * @param commit
    * @return an author or unknown
    */
   private static String getName(Repository repository) {
      String rootPath = GitBlit.getRepositoriesFolder().getAbsolutePath();
      if (repository.isBare()) {
         return StringUtils.getRelativePath(rootPath, repository.getDirectory()
               .getAbsolutePath());
      } else {
         return StringUtils.getRelativePath(rootPath, repository.getDirectory().getParentFile()
               .getAbsolutePath());
   private static String getAuthor(RevCommit commit) {
      String name = "unknown";
      try {
         name = commit.getAuthorIdent().getName();
         if (StringUtils.isEmpty(name)) {
            name = commit.getAuthorIdent().getEmailAddress();
         }
      } catch (NullPointerException n) {
      }
      return name;
   }
   /**
    * Returns the committer for the commit, if this information is available.
    *
    * @param commit
    * @return an committer or unknown
    */
   private static String getCommitter(RevCommit commit) {
      String name = "unknown";
      try {
         name = commit.getCommitterIdent().getName();
         if (StringUtils.isEmpty(name)) {
            name = commit.getCommitterIdent().getEmailAddress();
         }
      } catch (NullPointerException n) {
      }
      return name;
   }
   /**
@@ -182,7 +220,7 @@
    */
   public static boolean deleteIndex(Repository repository) {
      try {
         File luceneIndex = new File(repository.getDirectory(), "lucene");
         File luceneIndex = new File(repository.getDirectory(), LUCENE_DIR);
         if (luceneIndex.exists()) {
            org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
                  org.eclipse.jgit.util.FileUtils.RECURSIVE);
@@ -201,16 +239,22 @@
    * This completely indexes the repository and will destroy any existing
    * index.
    * 
    * @param repositoryName
    * @param repository
    * @param fullIndex
    *            If false blob metadata is set to the HEAD revision of each
    *            branch.  If true, each the last commit of each blob is
    *            determined to properly index the author, committer, and date.
    *            Full indexing can be time-consuming.
    * @return IndexResult
    */
   public static IndexResult reindex(Repository repository) {
   public static IndexResult reindex(String repositoryName, Repository repository,
         boolean fullIndex) {
      IndexResult result = new IndexResult();
      if (!LuceneUtils.deleteIndex(repository)) {
         return result;
      }
      try {
         String repositoryName = getName(repository);
      try {
         FileBasedConfig config = getConfig(repository);
         Set<String> indexedCommits = new TreeSet<String>();
         IndexWriter writer = getIndexWriter(repository, true);
@@ -235,40 +279,64 @@
            }
            String branchName = branch.getName();
            RevWalk revWalk = new RevWalk(repository);
            RevCommit rev = revWalk.parseCommit(branch.getObjectId());
            RevCommit branchHead = revWalk.parseCommit(branch.getObjectId());
            String head = branchHead.getId().getName();
            String keyName = getBranchKey(branchName);
            config.setString(CONF_ALIAS, null, keyName, branchName);
            config.setString(CONF_BRANCH, null, keyName, rev.getName());
            config.setString(CONF_BRANCH, null, keyName, head);
            // index the blob contents of the tree
            ByteArrayOutputStream os = new ByteArrayOutputStream();
            byte[] tmp = new byte[32767];
            TreeWalk treeWalk = new TreeWalk(repository);
            treeWalk.addTree(rev.getTree());
            treeWalk.addTree(branchHead.getTree());
            treeWalk.setRecursive(true);
            String revDate = DateTools.timeToString(rev.getCommitTime() * 1000L,
                  Resolution.MINUTE);
            while (treeWalk.next()) {
               result.blobCount++;
               String blobPath = treeWalk.getPathString();
               RevCommit blobRev = branchHead;
               RevWalk blobWalk = null;
               if (fullIndex) {
                  // XXX this is _really_ slow, there must be a better way
                  // determine the most recent commit for this blob
                  blobWalk = new RevWalk(repository);
                  blobWalk.markStart(blobWalk.parseCommit(branch.getObjectId()));
                  TreeFilter filter = AndTreeFilter.create(
                        PathFilterGroup.createFromStrings(Collections.singleton(blobPath)),
                        TreeFilter.ANY_DIFF);
                  blobWalk.setTreeFilter(filter);
                  for (RevCommit commit : blobWalk) {
                     blobRev = commit;
                     break;
                  }
               }
               String blobAuthor = getAuthor(blobRev);
               String blobCommitter = getCommitter(blobRev);
               String blobDate = DateTools.timeToString(blobRev.getCommitTime() * 1000L,
                     Resolution.MINUTE);
               if (blobWalk != null) {
                  blobWalk.dispose();
               }
               Document doc = new Document();
               doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES,
                     Index.NOT_ANALYZED_NO_NORMS));
               doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
                     Index.NOT_ANALYZED));
               doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
               doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES,
                     Index.NOT_ANALYZED));
               doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
               doc.add(new Field(FIELD_AUTHOR, rev.getAuthorIdent().getName(), Store.YES,
                     Index.NOT_ANALYZED_NO_NORMS));
               doc.add(new Field(FIELD_COMMITTER, rev.getCommitterIdent().getName(),
                     Store.YES, Index.NOT_ANALYZED_NO_NORMS));
               doc.add(new Field(FIELD_LABEL, branch.getName(), Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
               doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_OBJECT_ID, blobPath, Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
               doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));
               // determine extension to compare to the extension
               // blacklist
               String ext = null;
               String name = treeWalk.getPathString().toLowerCase();
               String name = blobPath.toLowerCase();
               if (name.indexOf('.') > -1) {
                  ext = name.substring(name.lastIndexOf('.') + 1);
               }
@@ -298,25 +366,25 @@
            treeWalk.release();
            // index the head commit object
            String head = rev.getId().getName();
            if (indexedCommits.add(head)) {
               Document doc = createDocument(rev, tags.get(head));
               doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
                     Index.NOT_ANALYZED));
               doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
               Document doc = createDocument(branchHead, tags.get(head));
               doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
               writer.addDocument(doc);
               result.commitCount += 1;
               result.branchCount += 1;
            }
            // traverse the log and index the previous commit objects
            revWalk.markStart(rev);
            revWalk.reset();
            revWalk.markStart(branchHead);
            RevCommit rev;
            while ((rev = revWalk.next()) != null) {
               String hash = rev.getId().getName();
               if (indexedCommits.add(hash)) {
                  Document doc = createDocument(rev, tags.get(hash));
                  doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
                        Index.NOT_ANALYZED));
                  doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
                  doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
                  doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                  writer.addDocument(doc);
                  result.commitCount += 1;
               }
@@ -329,10 +397,13 @@
         // this repository has a gb-issues branch, index all issues
         if (IssueUtils.getIssuesBranch(repository) != null) {
            List<IssueModel> issues = IssueUtils.getIssues(repository, null);
            if (issues.size() > 0) {
               result.branchCount += 1;
            }
            for (IssueModel issue : issues) {
               result.issueCount++;
               Document doc = createDocument(issue);
               doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
                     Index.NOT_ANALYZED));
               doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
               writer.addDocument(doc);
            }
         }
@@ -353,13 +424,16 @@
    * Incrementally update the index with the specified commit for the
    * repository.
    * 
    * @param repositoryName
    * @param repository
    * @param branch
    *            the fully qualified branch name (e.g. refs/heads/master)
    * @param commit
    * @return true, if successful
    */
   public static boolean index(Repository repository, String branch, RevCommit commit) {
   private static IndexResult index(String repositoryName, Repository repository,
         String branch, RevCommit commit) {
      IndexResult result = new IndexResult();
      try {
         if (excludedBranches.contains(branch)) {
            if (IssueUtils.GB_ISSUES.equals(branch)) {
@@ -367,20 +441,23 @@
               String issueId = commit.getShortMessage().substring(2).trim();
               IssueModel issue = IssueUtils.getIssue(repository, issueId);
               if (issue == null) {
                  // delete the old issue from the index, if exists
                  // issue was deleted, remove from index
                  IndexWriter writer = getIndexWriter(repository, false);
                  writer.deleteDocuments(
                        new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
                              FIELD_OBJECT_ID, issueId));
                  writer.commit();
                  return true;
                  result.success = true;
                  return result;
               }
               return index(repository, issue);
               result.success = index(repositoryName, repository, issue);
               result.issueCount++;
               return result;
            }
            return false;
            return result;
         }
         List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
         String repositoryName = getName(repository);
         String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
               Resolution.MINUTE);
         IndexWriter writer = getIndexWriter(repository, false);
@@ -391,19 +468,16 @@
            // re-index the blob
            if (!ChangeType.DELETE.equals(path.changeType)) {
               result.blobCount++;
               Document doc = new Document();
               doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES,
                     Index.NOT_ANALYZED_NO_NORMS));
               doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
                     Index.NOT_ANALYZED));
               doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.NOT_ANALYZED));
               doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES, Index.NOT_ANALYZED));
               doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
               doc.add(new Field(FIELD_AUTHOR, commit.getAuthorIdent().getName(), Store.YES,
                     Index.NOT_ANALYZED_NO_NORMS));
               doc.add(new Field(FIELD_COMMITTER, commit.getCommitterIdent().getName(),
                     Store.YES, Index.NOT_ANALYZED_NO_NORMS));
               doc.add(new Field(FIELD_LABEL, branch, Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
               // determine extension to compare to the extension
               // blacklist
@@ -425,11 +499,12 @@
         writer.commit();
         Document doc = createDocument(commit, null);
         return index(repository, doc);
         result.commitCount++;
         result.success = index(repositoryName, repository, doc);
      } catch (Exception e) {
         e.printStackTrace();
      }
      return false;
      return result;
   }
   /**
@@ -440,7 +515,7 @@
    * @param issue
    * @return true, if successful
    */
   public static boolean index(Repository repository, IssueModel issue) {
   public static boolean index(String repositoryName, Repository repository, IssueModel issue) {
      try {
         // delete the old issue from the index, if exists
         IndexWriter writer = getIndexWriter(repository, false);
@@ -449,7 +524,7 @@
         writer.commit();
         Document doc = createDocument(issue);
         return index(repository, doc);
         return index(repositoryName, repository, doc);
      } catch (Exception e) {
         e.printStackTrace();
      }
@@ -459,10 +534,11 @@
   /**
    * Updates a repository index incrementally from the last indexed commits.
    * 
    * @param repositoryName
    * @param repository
    * @return IndexResult
    */
   public static IndexResult updateIndex(Repository repository) {
   public static IndexResult updateIndex(String repositoryName, Repository repository) {
      IndexResult result = new IndexResult();
      try {
         FileBasedConfig config = getConfig(repository);
@@ -511,11 +587,14 @@
               revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
            }
            // reverse the list of commits so we start with the first commit
            if (revs.size() > 0) {
               result.branchCount += 1;
            }
            // reverse the list of commits so we start with the first commit
            Collections.reverse(revs);
            for (RevCommit commit : revs) {
               index(repository, branchName, commit);
               result.commitCount += 1;
               result.add(index(repositoryName, repository, branchName, commit));
            }
            // update the config
@@ -550,12 +629,12 @@
   private static Document createDocument(IssueModel issue) {
      Document doc = new Document();
      doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS));
      doc.add(new Field(FIELD_OBJECT_ID, issue.id, Store.YES, Index.NOT_ANALYZED));
      doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.NOT_ANALYZED));
            Field.Index.NOT_ANALYZED));
      doc.add(new Field(FIELD_OBJECT_ID, issue.id, Store.YES, Index.ANALYZED));
      doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED));
      doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),
            Store.YES, Field.Index.NO));
      doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
      doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.ANALYZED));
      List<String> attachments = new ArrayList<String>();
      for (Attachment attachment : issue.getAttachments()) {
         attachments.add(attachment.name.toLowerCase());
@@ -579,19 +658,16 @@
   private static Document createDocument(RevCommit commit, List<String> tags) {
      Document doc = new Document();
      doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.commit.name(), Store.YES,
            Index.NOT_ANALYZED_NO_NORMS));
      doc.add(new Field(FIELD_OBJECT_ID, commit.getName(), Store.YES, Index.NOT_ANALYZED));
            Index.NOT_ANALYZED));
      doc.add(new Field(FIELD_OBJECT_ID, commit.getName(), Store.YES, Index.ANALYZED));
      doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
            Resolution.MINUTE), Store.YES, Index.NO));
      doc.add(new Field(FIELD_AUTHOR, commit.getCommitterIdent().getName(), Store.YES,
            Index.NOT_ANALYZED_NO_NORMS));
      doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
      doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
      doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED));
      doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.NO, Index.ANALYZED));
      if (!ArrayUtils.isEmpty(tags)) {
         if (!ArrayUtils.isEmpty(tags)) {
            doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(tags), Store.YES,
                  Index.ANALYZED));
         }
         doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), Store.YES, Index.ANALYZED));
      }
      return doc;
   }
@@ -599,13 +675,13 @@
   /**
    * Incrementally index an object for the repository.
    * 
    * @param repositoryName
    * @param repository
    * @param doc
    * @return true, if successful
    */
   private static boolean index(Repository repository, Document doc) {
      try {
         String repositoryName = getName(repository);
   private static boolean index(String repositoryName, Repository repository, Document doc) {
      try {
         doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));
         IndexWriter writer = getIndexWriter(repository, false);
         writer.addDocument(doc);
@@ -629,6 +705,9 @@
      result.repository = doc.get(FIELD_REPOSITORY);
      result.branch = doc.get(FIELD_BRANCH);
      result.id = doc.get(FIELD_OBJECT_ID);
      if (doc.get(FIELD_TAG) != null) {
         result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
      }
      if (doc.get(FIELD_LABEL) != null) {
         result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));
      }
@@ -672,7 +751,7 @@
   private static IndexWriter getIndexWriter(Repository repository, boolean forceCreate)
         throws IOException {
      IndexWriter indexWriter = WRITERS.get(repository.getDirectory());
      File indexFolder = new File(repository.getDirectory(), "lucene");
      File indexFolder = new File(repository.getDirectory(), LUCENE_DIR);
      Directory directory = FSDirectory.open(indexFolder);
      if (forceCreate || !indexFolder.exists()) {
         // if the writer is going to blow away the existing index and create
@@ -794,6 +873,16 @@
   public static class IndexResult {
      public boolean success;
      public int branchCount;
      public int commitCount;
      public int blobCount;
      public int issueCount;
      public void add(IndexResult result) {
         this.branchCount += result.branchCount;
         this.commitCount += result.commitCount;
         this.blobCount += result.blobCount;
         this.issueCount += result.issueCount;
      }
   }
}