James Moger
2012-02-26 e31da050c6ab5ece38fb18196948337395ae59e6
Partially working Lucene executor. Needs refactoring. (issue-16)
1 files added
4 files modified
386 ■■■■■ changed files
src/com/gitblit/GitBlit.java 10 ●●●●● patch | view | raw | blame | history
src/com/gitblit/LuceneExecutor.java 177 ●●●●● patch | view | raw | blame | history
src/com/gitblit/utils/LuceneUtils.java 189 ●●●● patch | view | raw | blame | history
tests/com/gitblit/tests/IssuesTest.java 4 ●●●● patch | view | raw | blame | history
tests/com/gitblit/tests/LuceneUtilsTest.java 6 ●●●● patch | view | raw | blame | history
src/com/gitblit/GitBlit.java
@@ -136,6 +136,8 @@
    private MailExecutor mailExecutor;
    
    private LuceneExecutor luceneExecutor;
    private TimeZone timezone;
    public GitBlit() {
@@ -1806,10 +1808,18 @@
        setUserService(loginService);
        mailExecutor = new MailExecutor(settings);
        if (mailExecutor.isReady()) {
            logger.info("Mail executor is scheduled to process the message queue every 2 minutes.");
            scheduledExecutor.scheduleAtFixedRate(mailExecutor, 1, 2, TimeUnit.MINUTES);
        } else {
            logger.warn("Mail server is not properly configured.  Mail services disabled.");
        }
        luceneExecutor = new LuceneExecutor(settings);
        if (luceneExecutor.isReady()) {
            logger.info("Lucene executor is scheduled to process the repository queue every 10 minutes.");
            scheduledExecutor.scheduleAtFixedRate(luceneExecutor, 1, 10, TimeUnit.MINUTES);
        } else {
            logger.warn("Lucene executor is disabled.");
        }
        if (startFederation) {
            configureFederation();
        }        
src/com/gitblit/LuceneExecutor.java
New file
@@ -0,0 +1,177 @@
/*
 * Copyright 2012 gitblit.com.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.gitblit;
import java.text.MessageFormat;
import java.util.HashSet;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import org.eclipse.jgit.lib.Repository;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.gitblit.models.RepositoryModel;
import com.gitblit.utils.JGitUtils;
import com.gitblit.utils.LuceneUtils;
/**
 * The Lucene executor handles indexing repositories synchronously and
 * asynchronously from a queue.
 *
 * @author James Moger
 *
 */
public class LuceneExecutor implements Runnable {
    private final Logger logger = LoggerFactory.getLogger(LuceneExecutor.class);
    private final Queue<String> queue = new ConcurrentLinkedQueue<String>();
    private final IStoredSettings settings;
    private final boolean isLuceneEnabled;
    private final boolean isPollingMode;
    private final AtomicBoolean firstRun = new AtomicBoolean(true);
    public LuceneExecutor(IStoredSettings settings) {
        this.settings = settings;
        this.isLuceneEnabled = settings.getBoolean("lucene.enableLucene", false);
        this.isPollingMode = settings.getBoolean("lucene.pollingMode", false);
    }
    /**
     * Indicates if the Lucene executor can index repositories.
     *
     * @return true if the Lucene executor is ready to index repositories
     */
    public boolean isReady() {
        return isLuceneEnabled;
    }
    /**
     * Returns the status of the Lucene queue.
     *
     * @return true, if the queue is empty
     */
    public boolean hasEmptyQueue() {
        return queue.isEmpty();
    }
    /**
     * Queues a repository to be asynchronously indexed.
     *
     * @param repository
     * @return true if the repository was queued
     */
    public boolean queue(RepositoryModel repository) {
        if (!isReady()) {
            return false;
        }
        queue.add(repository.name);
        return true;
    }
    @Override
    public void run() {
        if (!isLuceneEnabled) {
            return;
        }
        if (firstRun.get() || isPollingMode) {
            // update all indexes on first run or if polling mode
            firstRun.set(false);
            queue.addAll(GitBlit.self().getRepositoryList());
        }
        Set<String> processed = new HashSet<String>();
        if (!queue.isEmpty()) {
            // update the repository Lucene index
            String repositoryName = null;
            while ((repositoryName = queue.poll()) != null) {
                if (processed.contains(repositoryName)) {
                    // skipping multi-queued repository
                    continue;
                }
                try {
                    Repository repository = GitBlit.self().getRepository(repositoryName);
                    if (repository == null) {
                        logger.warn(MessageFormat.format(
                                "Lucene executor could not find repository {0}. Skipping.",
                                repositoryName));
                        continue;
                    }
                    index(repositoryName, repository);
                    repository.close();
                    processed.add(repositoryName);
                } catch (Throwable e) {
                    logger.error(MessageFormat.format("Failed to update {0} Lucene index",
                            repositoryName), e);
                }
            }
        }
    }
    /**
     * Synchronously indexes a repository. This may build a complete index of a
     * repository or it may update an existing index.
     *
     * @param repositoryName
     *            the name of the repository
     * @param repository
     *            the repository object
     */
    public void index(String repositoryName, Repository repository) {
        try {
            if (JGitUtils.hasCommits(repository)) {
                if (LuceneUtils.shouldReindex(repository)) {
                    // (re)build the entire index
                    long start = System.currentTimeMillis();
                    boolean success = LuceneUtils.reindex(repository);
                    long duration = System.currentTimeMillis() - start;
                    if (success) {
                        String msg = "Built {0} Lucene index in {1} msecs";
                        logger.info(MessageFormat.format(msg, repositoryName, duration));
                    } else {
                        String msg = "Could not build {0} Lucene index!";
                        logger.error(MessageFormat.format(msg, repositoryName));
                    }
                } else {
                    // update the index with latest commits
                    long start = System.currentTimeMillis();
                    boolean success = LuceneUtils.updateIndex(repository);
                    long duration = System.currentTimeMillis() - start;
                    if (success) {
                        String msg = "Updated {0} Lucene index in {1} msecs";
                        logger.info(MessageFormat.format(msg, repositoryName, duration));
                    } else {
                        String msg = "Could not update {0} Lucene index!";
                        logger.error(MessageFormat.format(msg, repositoryName));
                    }
                }
            } else {
                logger.info(MessageFormat.format("Skipped Lucene index of empty repository {0}",
                        repositoryName));
            }
        } catch (Throwable t) {
            logger.error(MessageFormat.format("Lucene indexing failure for {0}", repositoryName), t);
        }
    }
}
src/com/gitblit/utils/LuceneUtils.java
@@ -7,6 +7,7 @@
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
@@ -47,7 +48,9 @@
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.file.FileBasedConfig;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.util.FS;
import com.gitblit.models.IssueModel;
import com.gitblit.models.IssueModel.Attachment;
@@ -81,6 +84,7 @@
    }
    private static final Version LUCENE_VERSION = Version.LUCENE_35;
    private static final int INDEX_VERSION = 1;
    private static final String FIELD_OBJECT_TYPE = "type";
    private static final String FIELD_OBJECT_ID = "id";
@@ -94,17 +98,21 @@
    private static final String FIELD_LABEL = "label";
    private static final String FIELD_ATTACHMENT = "attachment";
    private static Set<String> excludedExtensions = new TreeSet<String>(
            Arrays.asList("7z", "arc", "arj", "bin", "bmp", "dll", "doc",
                    "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh",
                    "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls",
                    "xlsx", "zip"));
    private static Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",
            "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
            "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
    private static Set<String> excludedBranches = new TreeSet<String>(
            Arrays.asList("/refs/heads/gb-issues"));
    private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();
    private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();
    private static final String CONF_FILE = "lucene.conf";
    private static final String CONF_INDEX = "index";
    private static final String CONF_VERSION = "version";
    private static final String CONF_ALIAS = "aliases";
    private static final String CONF_BRANCH = "branches";
    /**
     * Returns the name of the repository.
@@ -121,6 +129,48 @@
    }
    
    /**
     * Construct a keyname from the branch.
     *
     * @param branchName
     * @return a keyname appropriate for the Git config file format
     */
    private static String getBranchKey(String branchName) {
        return StringUtils.getSHA1(branchName);
    }
    /**
     * Returns the Lucene configuration for the specified repository.
     *
     * @param repository
     * @return a config object
     */
    private static FileBasedConfig getConfig(Repository repository) {
        File file = new File(repository.getDirectory(), CONF_FILE);
        FileBasedConfig config = new FileBasedConfig(file, FS.detect());
        return config;
    }
    /**
     * Reads the Lucene config file for the repository to check the index
     * version. If the index version is different, then rebuild the repository
     * index.
     *
     * @param repository
     * @return true of the on-disk index format is different than INDEX_VERSION
     */
    public static boolean shouldReindex(Repository repository) {
        try {
            FileBasedConfig config = getConfig(repository);
            config.load();
            int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
            // reindex if versions do not match
            return indexVersion != INDEX_VERSION;
        } catch (Throwable t) {
        }
        return true;
    }
    /**
     * Deletes the Lucene index for the specified repository.
     * 
     * @param repository
@@ -132,6 +182,10 @@
            if (luceneIndex.exists()) {
                org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
                        org.eclipse.jgit.util.FileUtils.RECURSIVE);
            }
            File luceneConfig = new File(repository.getDirectory(), CONF_FILE);
            if (luceneConfig.exists()) {
                luceneConfig.delete();
            }
            return true;
        } catch (IOException e) {
@@ -146,14 +200,22 @@
     * @param repository
     * @return true if the indexing has succeeded
     */
    public static boolean index(Repository repository) {
    public static boolean reindex(Repository repository) {
        if (!LuceneUtils.deleteIndex(repository)) {
            return false;
        }
        try {
            String repositoryName = getName(repository);
            FileBasedConfig config = getConfig(repository);
            Set<String> indexedCommits = new TreeSet<String>();
            IndexWriter writer = getIndexWriter(repository, true);
            // build a quick lookup of tags
            Map<String, List<String>> tags = new HashMap<String, List<String>>();
            for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
                if (!tag.isAnnotatedTag()) {
                    // skip non-annotated tags
                    continue;
                }
                if (!tags.containsKey(tag.getObjectId())) {
                    tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
                }
@@ -170,6 +232,10 @@
                RevWalk revWalk = new RevWalk(repository);
                RevCommit rev = revWalk.parseCommit(branch.getObjectId());
                String keyName = getBranchKey(branchName);
                config.setString(CONF_ALIAS, null, keyName, branchName);
                config.setString(CONF_BRANCH, null, keyName, rev.getName());
                // index the blob contents of the tree
                ByteArrayOutputStream os = new ByteArrayOutputStream();
                byte[] tmp = new byte[32767];
@@ -184,8 +250,7 @@
                            Index.NOT_ANALYZED_NO_NORMS));
                    doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
                            Index.NOT_ANALYZED));
                    doc.add(new Field(FIELD_BRANCH, branchName, Store.YES,
                            Index.NOT_ANALYZED));
                    doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
                    doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES,
                            Index.NOT_ANALYZED));
                    doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
@@ -233,8 +298,7 @@
                    Document doc = createDocument(rev, tags.get(head));
                    doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
                            Index.NOT_ANALYZED));
                    doc.add(new Field(FIELD_BRANCH, branchName, Store.YES,
                            Index.NOT_ANALYZED));
                    doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
                    writer.addDocument(doc);
                }
@@ -246,8 +310,7 @@
                        Document doc = createDocument(rev, tags.get(hash));
                        doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
                                Index.NOT_ANALYZED));
                        doc.add(new Field(FIELD_BRANCH, branchName, Store.YES,
                                Index.NOT_ANALYZED));
                        doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
                        writer.addDocument(doc);
                    }
                }
@@ -268,6 +331,8 @@
            }
            // commit all changes and reset the searcher
            config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
            config.save();
            resetIndexSearcher(repository);
            writer.commit();
            return true;
@@ -294,7 +359,16 @@
                    // index an issue
                    String issueId = commit.getShortMessage().substring(2).trim();
                    IssueModel issue = IssueUtils.getIssue(repository, issueId);
                    return index(repository, issue, true);
                    if (issue == null) {
                        // delete the old issue from the index, if exists
                        IndexWriter writer = getIndexWriter(repository, false);
                        writer.deleteDocuments(
                                new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
                                        FIELD_OBJECT_ID, issueId));
                        writer.commit();
                        return true;
                    }
                    return index(repository, issue);
                }
                return false;
            }
@@ -306,8 +380,7 @@
            for (PathChangeModel path : changedPaths) {
                // delete the indexed blob
                writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()),
                        new Term(FIELD_BRANCH, branch),
                        new Term(FIELD_OBJECT_ID, path.path));
                        new Term(FIELD_BRANCH, branch), new Term(FIELD_OBJECT_ID, path.path));
                
                // re-index the blob
                if (!ChangeType.DELETE.equals(path.changeType)) {
@@ -317,8 +390,7 @@
                    doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
                            Index.NOT_ANALYZED));
                    doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.NOT_ANALYZED));
                    doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES,
                            Index.NOT_ANALYZED));
                    doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES, Index.NOT_ANALYZED));
                    doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
                    doc.add(new Field(FIELD_AUTHOR, commit.getAuthorIdent().getName(), Store.YES,
                            Index.NOT_ANALYZED_NO_NORMS));
@@ -336,8 +408,8 @@
                    if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                        // read the blob content
                        String str = JGitUtils.getStringContent(repository,
                                commit.getTree(), path.path);
                        String str = JGitUtils.getStringContent(repository, commit.getTree(),
                                path.path);
                        doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));
                        writer.addDocument(doc);
                    }
@@ -359,26 +431,84 @@
     * 
     * @param repository
     * @param issue
     * @param reindex
     *            if true, the old index entry for this issue will be deleted.
     *            This is only appropriate for pre-existing/indexed issues.
     * @return true, if successful
     */
    public static boolean index(Repository repository, IssueModel issue, boolean reindex) {
    public static boolean index(Repository repository, IssueModel issue) {
        try {
            Document doc = createDocument(issue);
            if (reindex) {
                // delete the old issue from the index, if exists
                IndexWriter writer = getIndexWriter(repository, false);
                writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()),
                        new Term(FIELD_OBJECT_ID, String.valueOf(issue.id)));
            writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
                    FIELD_OBJECT_ID, String.valueOf(issue.id)));
                writer.commit();
            }
            Document doc = createDocument(issue);
            return index(repository, doc);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }
    /**
     * Updates a repository index incrementally from the last indexed commits.
     *
     * @param repository
     */
    public static boolean updateIndex(Repository repository) {
        boolean success = false;
        try {
            FileBasedConfig config = getConfig(repository);
            config.load();
            // build a quick lookup of annotated tags
            Map<String, List<String>> tags = new HashMap<String, List<String>>();
            for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
                if (!tag.isAnnotatedTag()) {
                    // skip non-annotated tags
                    continue;
                }
                if (!tags.containsKey(tag.getObjectId())) {
                    tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
                }
                tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
            }
            List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
            // TODO detect branch deletion
            // walk through each branch
            for (RefModel branch : branches) {
                // determine last commit
                String branchName = branch.getName();
                String keyName = getBranchKey(branchName);
                String lastCommit = config.getString(CONF_BRANCH, null, keyName);
                List<RevCommit> revs;
                if (StringUtils.isEmpty(lastCommit)) {
                    // new branch/unindexed branch, get all commits on branch
                    revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
                } else {
                    // pre-existing branch, get changes since last commit
                    revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
                }
                // reverse the list of commits so we start with the first commit
                Collections.reverse(revs);
                for (RevCommit commit : revs) {
                    index(repository, branchName, commit);
                }
                // update the config
                config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
                config.setString(CONF_ALIAS, null, keyName, branchName);
                config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
                config.save();
            }
            success = true;
        } catch (Throwable t) {
            t.printStackTrace();
        }
        return success;
    }
    /**
@@ -446,8 +576,7 @@
    private static boolean index(Repository repository, Document doc) {
        try {
            String repositoryName = getName(repository);
            doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
                    Index.NOT_ANALYZED));
            doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));
            IndexWriter writer = getIndexWriter(repository, false);
            writer.addDocument(doc);
            resetIndexSearcher(repository);
tests/com/gitblit/tests/IssuesTest.java
@@ -128,7 +128,7 @@
        // build a new Lucene index
        LuceneUtils.deleteIndex(repository);
        for (IssueModel anIssue : allIssues) {
            LuceneUtils.index(repository, anIssue, false);
            LuceneUtils.index(repository, anIssue);
        }
        List<SearchResult> hits = LuceneUtils.search("working", 10, repository);
        assertTrue(hits.size() > 0);
@@ -139,7 +139,7 @@
        change.comment("this is a test of reindexing an issue");
        IssueUtils.updateIssue(repository, issue.id, change);
        issue = IssueUtils.getIssue(repository, issue.id);
        LuceneUtils.index(repository, issue, true);
        LuceneUtils.index(repository, issue);
        // delete all issues
        for (IssueModel anIssue : allIssues) {
tests/com/gitblit/tests/LuceneUtilsTest.java
@@ -37,17 +37,17 @@
    public void testFullIndex() throws Exception {
        // reindex helloworld
        Repository repository = GitBlitSuite.getHelloworldRepository();
        LuceneUtils.index(repository);
        LuceneUtils.reindex(repository);
        repository.close();
        // reindex theoretical physics
        repository = GitBlitSuite.getTheoreticalPhysicsRepository();
        LuceneUtils.index(repository);
        LuceneUtils.reindex(repository);
        repository.close();
        // reindex JGit
        repository = GitBlitSuite.getJGitRepository();
        LuceneUtils.index(repository);
        LuceneUtils.reindex(repository);
        repository.close();
        LuceneUtils.close();