| | |
| | | import java.io.File;
|
| | | import java.io.IOException;
|
| | | import java.io.InputStream;
|
| | | import java.lang.reflect.Method;
|
| | | import java.text.MessageFormat;
|
| | | import java.text.ParseException;
|
| | | import java.util.ArrayList;
|
| | |
| | | import com.gitblit.models.IssueModel.Attachment;
|
| | | import com.gitblit.models.PathModel.PathChangeModel;
|
| | | import com.gitblit.models.RefModel;
|
| | | import com.gitblit.models.RepositoryModel;
|
| | | import com.gitblit.models.SearchResult;
|
| | | import com.gitblit.utils.ArrayUtils;
|
| | | import com.gitblit.utils.IssueUtils;
|
| | |
| | | public class LuceneExecutor implements Runnable {
|
| | |
|
| | |
|
| | | private static final int INDEX_VERSION = 1;
|
| | | private static final int INDEX_VERSION = 2;
|
| | |
|
| | | private static final String FIELD_OBJECT_TYPE = "type";
|
| | | private static final String FIELD_ISSUE = "issue";
|
| | | private static final String FIELD_PATH = "path";
|
| | | private static final String FIELD_COMMIT = "commit";
|
| | | private static final String FIELD_BRANCH = "branch";
|
| | | private static final String FIELD_REPOSITORY = "repository";
|
| | | private static final String FIELD_SUMMARY = "summary";
|
| | | private static final String FIELD_CONTENT = "content";
|
| | | private static final String FIELD_AUTHOR = "author";
|
| | |
| | | "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
|
| | | "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
|
| | |
|
| | | private final Set<String> excludedBranches = new TreeSet<String>(
|
| | | Arrays.asList("/refs/heads/gb-issues"));
|
| | | |
| | | public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {
|
| | | this.storedSettings = settings;
|
| | | this.repositoriesFolder = repositoriesFolder;
|
| | |
| | | return;
|
| | | }
|
| | |
|
| | | for (String repositoryName : GitBlit.self().getRepositoryList()) {
|
| | | Repository repository = GitBlit.self().getRepository(repositoryName);
|
| | | if (repository == null) {
|
| | | logger.warn(MessageFormat.format(
|
| | | "Lucene executor could not find repository {0}. Skipping.",
|
| | | repositoryName));
|
| | | continue;
|
| | | for (String repositoryName: GitBlit.self().getRepositoryList()) {
|
| | | RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName);
|
| | | if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
|
| | | Repository repository = GitBlit.self().getRepository(model.name);
|
| | | index(model, repository); |
| | | repository.close();
|
| | | System.gc();
|
| | | }
|
| | | // TODO allow repository to bypass Lucene indexing |
| | | index(repositoryName, repository);
|
| | | repository.close();
|
| | | System.gc();
|
| | | }
|
| | | }
|
| | |
|
| | |
| | | * @param repository
|
| | | * the repository object
|
| | | */
|
| | | protected void index(String name, Repository repository) {
|
| | | protected void index(RepositoryModel model, Repository repository) {
|
| | | try {
|
| | | if (JGitUtils.hasCommits(repository)) {
|
| | | if (shouldReindex(repository)) {
|
| | | // (re)build the entire index |
| | | IndexResult result = reindex(name, repository);
|
| | | |
| | | if (result.success) {
|
| | | if (result.commitCount > 0) {
|
| | | String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
|
| | | logger.info(MessageFormat.format(msg, name,
|
| | | result.commitCount, result.blobCount, result.branchCount, result.duration()));
|
| | | }
|
| | | } else {
|
| | | String msg = "Could not build {0} Lucene index!";
|
| | | logger.error(MessageFormat.format(msg, name));
|
| | | if (shouldReindex(repository)) {
|
| | | // (re)build the entire index
|
| | | IndexResult result = reindex(model, repository);
|
| | |
|
| | | if (result.success) {
|
| | | if (result.commitCount > 0) {
|
| | | String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
|
| | | logger.info(MessageFormat.format(msg, model.name, result.commitCount,
|
| | | result.blobCount, result.branchCount, result.duration()));
|
| | | }
|
| | | } else {
|
| | | // update the index with latest commits |
| | | IndexResult result = updateIndex(name, repository);
|
| | | if (result.success) {
|
| | | if (result.commitCount > 0) {
|
| | | String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
|
| | | logger.info(MessageFormat.format(msg, name,
|
| | | result.commitCount, result.blobCount, result.branchCount, result.duration()));
|
| | | }
|
| | | } else {
|
| | | String msg = "Could not update {0} Lucene index!";
|
| | | logger.error(MessageFormat.format(msg, name));
|
| | | }
|
| | | String msg = "Could not build {0} Lucene index!";
|
| | | logger.error(MessageFormat.format(msg, model.name));
|
| | | }
|
| | | } else {
|
| | | logger.info(MessageFormat.format("Skipped Lucene index of empty repository {0}",
|
| | | name));
|
| | | // update the index with latest commits
|
| | | IndexResult result = updateIndex(model, repository);
|
| | | if (result.success) {
|
| | | if (result.commitCount > 0) {
|
| | | String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
|
| | | logger.info(MessageFormat.format(msg, model.name, result.commitCount,
|
| | | result.blobCount, result.branchCount, result.duration()));
|
| | | }
|
| | | } else {
|
| | | String msg = "Could not update {0} Lucene index!";
|
| | | logger.error(MessageFormat.format(msg, model.name));
|
| | | }
|
| | | }
|
| | | } catch (Throwable t) {
|
| | | logger.error(MessageFormat.format("Lucene indexing failure for {0}", name), t);
|
| | | logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
|
| | | }
|
| | | }
|
| | | |
| | | /**
|
| | | * Close the writer/searcher objects for a repository.
|
| | | * |
| | | * @param repositoryName
|
| | | */
|
| | | public void close(String repositoryName) {
|
| | | try {
|
| | | IndexWriter writer = writers.remove(repositoryName);
|
| | | if (writer != null) {
|
| | | writer.close();
|
| | | }
|
| | | } catch (Exception e) {
|
| | | logger.error("Failed to close index writer for " + repositoryName, e);
|
| | | }
|
| | |
|
| | | try {
|
| | | IndexSearcher searcher = searchers.remove(repositoryName);
|
| | | if (searcher != null) {
|
| | | searcher.close();
|
| | | }
|
| | | } catch (Exception e) {
|
| | | logger.error("Failed to close index searcher for " + repositoryName, e);
|
| | | }
|
| | | }
|
| | |
|
| | |
| | | }
|
| | | return name;
|
| | | }
|
| | | |
| | | /**
|
| | | * Get the tree associated with the given commit.
|
| | | *
|
| | | * @param walk
|
| | | * @param commit
|
| | | * @return tree
|
| | | * @throws IOException
|
| | | */
|
| | | protected RevTree getTree(final RevWalk walk, final RevCommit commit)
|
| | | throws IOException {
|
| | | final RevTree tree = commit.getTree();
|
| | | if (tree != null) {
|
| | | return tree;
|
| | | }
|
| | | walk.parseHeaders(commit);
|
| | | return commit.getTree();
|
| | | }
|
| | |
|
| | | /**
|
| | | * Construct a keyname from the branch.
|
| | |
| | | * @param repository
|
| | | * @return IndexResult
|
| | | */
|
| | | public IndexResult reindex(String repositoryName, Repository repository) {
|
| | | public IndexResult reindex(RepositoryModel model, Repository repository) {
|
| | | IndexResult result = new IndexResult();
|
| | | if (!deleteIndex(repositoryName)) {
|
| | | if (!deleteIndex(model.name)) {
|
| | | return result;
|
| | | }
|
| | | try {
|
| | | FileBasedConfig config = getConfig(repository);
|
| | | Set<String> indexedCommits = new TreeSet<String>();
|
| | | IndexWriter writer = getIndexWriter(repositoryName);
|
| | | IndexWriter writer = getIndexWriter(model.name);
|
| | | // build a quick lookup of tags
|
| | | Map<String, List<String>> tags = new HashMap<String, List<String>>();
|
| | | for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
|
| | |
| | |
|
| | | // walk through each branch
|
| | | for (RefModel branch : branches) {
|
| | | if (excludedBranches.contains(branch.getName())) {
|
| | |
|
| | | // if this branch is not specifically indexed then skip
|
| | | if (!model.indexedBranches.contains(branch.getName())) {
|
| | | continue;
|
| | | }
|
| | |
|
| | |
| | |
|
| | | Document doc = new Document();
|
| | | doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
|
| | | doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
|
| | | doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
|
| | | doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
|
| | | doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
|
| | |
| | | // index the tip commit object
|
| | | if (indexedCommits.add(tipId)) {
|
| | | Document doc = createDocument(tip, tags.get(tipId));
|
| | | doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
|
| | | doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
|
| | | writer.addDocument(doc);
|
| | | result.commitCount += 1;
|
| | |
| | | String hash = rev.getId().getName();
|
| | | if (indexedCommits.add(hash)) {
|
| | | Document doc = createDocument(rev, tags.get(hash));
|
| | | doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
|
| | | doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
|
| | | writer.addDocument(doc);
|
| | | result.commitCount += 1;
|
| | |
| | | for (IssueModel issue : issues) {
|
| | | result.issueCount++;
|
| | | Document doc = createDocument(issue);
|
| | | doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
|
| | | writer.addDocument(doc);
|
| | | }
|
| | | }
|
| | |
| | | // commit all changes and reset the searcher
|
| | | config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
|
| | | config.save();
|
| | | resetIndexSearcher(repositoryName);
|
| | | resetIndexSearcher(model.name);
|
| | | writer.commit();
|
| | | result.success();
|
| | | } catch (Exception e) {
|
| | | logger.error("Exception while reindexing " + repositoryName, e);
|
| | | logger.error("Exception while reindexing " + model.name, e);
|
| | | }
|
| | | return result;
|
| | | }
|
| | |
|
| | | /**
|
| | | * Get the tree associated with the given commit.
|
| | | *
|
| | | * @param walk
|
| | | * @param commit
|
| | | * @return tree
|
| | | * @throws IOException
|
| | | */
|
| | | protected RevTree getTree(final RevWalk walk, final RevCommit commit)
|
| | | throws IOException {
|
| | | final RevTree tree = commit.getTree();
|
| | | if (tree != null) {
|
| | | return tree;
|
| | | }
|
| | | walk.parseHeaders(commit);
|
| | | return commit.getTree();
|
| | | }
|
| | |
|
| | | /**
|
| | | * Incrementally update the index with the specified commit for the
|
| | | * repository.
|
| | |
| | | String branch, RevCommit commit) {
|
| | | IndexResult result = new IndexResult();
|
| | | try {
|
| | | if (excludedBranches.contains(branch)) {
|
| | | if (IssueUtils.GB_ISSUES.equals(branch)) {
|
| | | // index an issue
|
| | | String issueId = commit.getShortMessage().substring(2).trim();
|
| | | IssueModel issue = IssueUtils.getIssue(repository, issueId);
|
| | | if (issue == null) {
|
| | | // issue was deleted, remove from index
|
| | | deleteIssue(repositoryName, issueId);
|
| | | result.success = true;
|
| | | return result;
|
| | | }
|
| | | result.success = index(repositoryName, issue);
|
| | | result.issueCount++;
|
| | | return result;
|
| | | |
| | | }
|
| | | return result;
|
| | | }
|
| | | List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
|
| | | String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
|
| | | Resolution.MINUTE);
|
| | |
| | | Document doc = new Document();
|
| | | doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
|
| | | Index.NOT_ANALYZED));
|
| | | doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
|
| | | doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
|
| | | doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
|
| | | doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
|
| | |
| | | writer.commit();
|
| | |
|
| | | Document doc = createDocument(commit, null);
|
| | | doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
|
| | | result.commitCount++;
|
| | | result.success = index(repositoryName, doc);
|
| | | } catch (Exception e) {
|
| | |
| | | /**
|
| | | * Updates a repository index incrementally from the last indexed commits.
|
| | | *
|
| | | * @param repositoryName
|
| | | * @param model
|
| | | * @param repository
|
| | | * @return IndexResult
|
| | | */
|
| | | protected IndexResult updateIndex(String repositoryName, Repository repository) {
|
| | | protected IndexResult updateIndex(RepositoryModel model, Repository repository) {
|
| | | IndexResult result = new IndexResult();
|
| | | try {
|
| | | FileBasedConfig config = getConfig(repository);
|
| | |
| | | for (RefModel branch : branches) {
|
| | | String branchName = branch.getName();
|
| | |
|
| | | // determine if we should skip this branch
|
| | | if (!IssueUtils.GB_ISSUES.equals(branch)
|
| | | && !model.indexedBranches.contains(branch.getName())) {
|
| | | continue;
|
| | | }
|
| | | |
| | | // remove this branch from the deletedBranches set
|
| | | deletedBranches.remove(branchName);
|
| | |
|
| | |
| | | result.branchCount += 1;
|
| | | }
|
| | |
|
| | | // track the issue ids that we have already indexed
|
| | | Set<String> indexedIssues = new TreeSet<String>();
|
| | | |
| | | // reverse the list of commits so we start with the first commit
|
| | | Collections.reverse(revs);
|
| | | for (RevCommit commit : revs) {
|
| | | result.add(index(repositoryName, repository, branchName, commit)); |
| | | for (RevCommit commit : revs) { |
| | | if (IssueUtils.GB_ISSUES.equals(branch)) {
|
| | | // only index an issue once during updateIndex
|
| | | String issueId = commit.getShortMessage().substring(2).trim();
|
| | | if (indexedIssues.contains(issueId)) {
|
| | | continue;
|
| | | }
|
| | | indexedIssues.add(issueId);
|
| | | |
| | | IssueModel issue = IssueUtils.getIssue(repository, issueId);
|
| | | if (issue == null) {
|
| | | // issue was deleted, remove from index
|
| | | deleteIssue(model.name, issueId);
|
| | | } else {
|
| | | // issue was updated
|
| | | index(model.name, issue);
|
| | | result.issueCount++;
|
| | | }
|
| | | } else {
|
| | | // index a commit
|
| | | result.add(index(model.name, repository, branchName, commit));
|
| | | }
|
| | | }
|
| | |
|
| | | // update the config
|
| | |
| | | // unless a branch really was deleted and no longer exists
|
| | | if (deletedBranches.size() > 0) {
|
| | | for (String branch : deletedBranches) {
|
| | | IndexWriter writer = getIndexWriter(repositoryName);
|
| | | IndexWriter writer = getIndexWriter(model.name);
|
| | | writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
|
| | | writer.commit();
|
| | | }
|
| | | }
|
| | | result.success = true;
|
| | | } catch (Throwable t) {
|
| | | logger.error(MessageFormat.format("Exception while updating {0} Lucene index", repositoryName), t);
|
| | | logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
|
| | | }
|
| | | return result;
|
| | | }
|
| | |
|
| | | |
| | | /**
|
| | | * Creates a Lucene document from an issue.
|
| | | *
|
| | |
| | | */
|
| | | private boolean index(String repositoryName, Document doc) {
|
| | | try {
|
| | | doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));
|
| | | IndexWriter writer = getIndexWriter(repositoryName);
|
| | | writer.addDocument(doc);
|
| | | resetIndexSearcher(repositoryName);
|
| | |
| | | return false;
|
| | | }
|
| | |
|
| | | private SearchResult createSearchResult(Document doc, float score) throws ParseException {
|
| | | private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
|
| | | SearchResult result = new SearchResult();
|
| | | result.hitId = hitId;
|
| | | result.totalHits = totalHits;
|
| | | result.score = score;
|
| | | result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
|
| | | result.summary = doc.get(FIELD_SUMMARY);
|
| | | result.author = doc.get(FIELD_AUTHOR);
|
| | | result.committer = doc.get(FIELD_COMMITTER);
|
| | | result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
|
| | | result.repository = doc.get(FIELD_REPOSITORY);
|
| | | result.branch = doc.get(FIELD_BRANCH);
|
| | | result.commitId = doc.get(FIELD_COMMIT);
|
| | | result.issueId = doc.get(FIELD_ISSUE);
|
| | |
| | | *
|
| | | * @param text
|
| | | * if the text is null or empty, null is returned
|
| | | * @param maximumHits
|
| | | * the maximum number of hits to collect
|
| | | * @param page
|
| | | * the page number to retrieve. page is 1-indexed.
|
| | | * @param pageSize
|
| | | * the number of elements to return for this page
|
| | | * @param repositories
|
| | | * a list of repositories to search. if no repositories are
|
| | | * specified null is returned.
|
| | | * @return a list of SearchResults in order from highest to the lowest score
|
| | | *
|
| | | */
|
| | | public List<SearchResult> search(String text, int maximumHits, List<String> repositories) {
|
| | | public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
|
| | | if (ArrayUtils.isEmpty(repositories)) {
|
| | | return null;
|
| | | }
|
| | | return search(text, maximumHits, repositories.toArray(new String[0]));
|
| | | return search(text, page, pageSize, repositories.toArray(new String[0]));
|
| | | }
|
| | |
|
| | | /**
|
| | |
| | | *
|
| | | * @param text
|
| | | * if the text is null or empty, null is returned
|
| | | * @param maximumHits
|
| | | * the maximum number of hits to collect
|
| | | * @param page
|
| | | * the page number to retrieve. page is 1-indexed.
|
| | | * @param pageSize
|
| | | * the number of elements to return for this page
|
| | | * @param repositories
|
| | | * a list of repositories to search. if no repositories are
|
| | | * specified null is returned.
|
| | | * @return a list of SearchResults in order from highest to the lowest score
|
| | | *
|
| | | */ |
| | | public List<SearchResult> search(String text, int maximumHits, String... repositories) {
|
| | | */
|
| | | public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
|
| | | if (StringUtils.isEmpty(text)) {
|
| | | return null;
|
| | | }
|
| | |
| | | readers.add(repositoryIndex.getIndexReader());
|
| | | }
|
| | | IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
|
| | | MultiReader reader = new MultiReader(rdrs);
|
| | | MultiSourceReader reader = new MultiSourceReader(rdrs);
|
| | | searcher = new IndexSearcher(reader);
|
| | | }
|
| | | Query rewrittenQuery = searcher.rewrite(query);
|
| | | TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true);
|
| | | TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
|
| | | searcher.search(rewrittenQuery, collector);
|
| | | ScoreDoc[] hits = collector.topDocs().scoreDocs;
|
| | | int offset = Math.max(0, (page - 1) * pageSize);
|
| | | ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
|
| | | int totalHits = collector.getTotalHits();
|
| | | for (int i = 0; i < hits.length; i++) {
|
| | | int docId = hits[i].doc;
|
| | | Document doc = searcher.doc(docId);
|
| | | // TODO identify the source index for the doc, then eliminate FIELD_REPOSITORY
|
| | | SearchResult result = createSearchResult(doc, hits[i].score);
|
| | | SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
|
| | | if (repositories.length == 1) {
|
| | | // single repository search
|
| | | result.repository = repositories[0];
|
| | | } else {
|
| | | // multi-repository search
|
| | | MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
|
| | | int index = reader.getSourceIndex(docId);
|
| | | result.repository = repositories[index];
|
| | | }
|
| | | String content = doc.get(FIELD_CONTENT);
|
| | | result.fragment = getHighlightedFragment(analyzer, query, content, result);
|
| | | results.add(result);
|
| | |
| | | return (endTime - startTime)/1000f;
|
| | | }
|
| | | }
|
| | | |
| | | /**
|
| | | * Custom subclass of MultiReader to identify the source index for a given
|
| | | * doc id. This would not be necessary of there was a public method to
|
| | | * obtain this information.
|
| | | * |
| | | */
|
| | | private class MultiSourceReader extends MultiReader {
|
| | | |
| | | final Method method;
|
| | | |
| | | MultiSourceReader(IndexReader[] subReaders) {
|
| | | super(subReaders);
|
| | | Method m = null;
|
| | | try {
|
| | | m = MultiReader.class.getDeclaredMethod("readerIndex", int.class);
|
| | | m.setAccessible(true);
|
| | | } catch (Exception e) {
|
| | | logger.error("Error getting readerIndex method", e);
|
| | | }
|
| | | method = m;
|
| | | }
|
| | | |
| | | int getSourceIndex(int docId) {
|
| | | int index = -1;
|
| | | try {
|
| | | Object o = method.invoke(this, docId);
|
| | | index = (Integer) o;
|
| | | } catch (Exception e) {
|
| | | logger.error("Error getting source index", e);
|
| | | }
|
| | | return index;
|
| | | }
|
| | | }
|
| | | }
|