From 40538c57dd574d831d044cda50a8999941dc0a24 Mon Sep 17 00:00:00 2001 From: James Moger <james.moger@gitblit.com> Date: Sat, 25 Feb 2012 08:18:22 -0500 Subject: [PATCH] Use proper timezone to generate all metrics --- src/com/gitblit/utils/LuceneUtils.java | 161 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 145 insertions(+), 16 deletions(-) diff --git a/src/com/gitblit/utils/LuceneUtils.java b/src/com/gitblit/utils/LuceneUtils.java index 4ca72f0..738382a 100644 --- a/src/com/gitblit/utils/LuceneUtils.java +++ b/src/com/gitblit/utils/LuceneUtils.java @@ -8,7 +8,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause.Occur; @@ -37,6 +38,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; +import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.ObjectId; @@ -49,6 +51,7 @@ import com.gitblit.models.IssueModel; import com.gitblit.models.IssueModel.Attachment; +import com.gitblit.models.PathModel.PathChangeModel; import com.gitblit.models.RefModel; import com.gitblit.models.SearchResult; @@ -81,6 +84,7 @@ private static final String FIELD_OBJECT_TYPE = "type"; private static final String FIELD_OBJECT_ID = "id"; + private static final String FIELD_BRANCH = "branch"; private static final String FIELD_REPOSITORY = "repository"; private static final String FIELD_SUMMARY = "summary"; private static final String FIELD_CONTENT = "content"; @@ -90,13 +94,32 @@ private static final String FIELD_LABEL = "label"; private static final String FIELD_ATTACHMENT = "attachment"; - private static Set<String> excludes = new TreeSet<String>(Arrays.asList("7z", "arc", "arj", - "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", - "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip")); + private static Set<String> excludedExtensions = new TreeSet<String>( + Arrays.asList("7z", "arc", "arj", "bin", "bmp", "dll", "doc", + "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", + "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", + "xlsx", "zip")); + + private static Set<String> excludedBranches = new TreeSet<String>( + Arrays.asList("/refs/heads/gb-issues")); private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>(); private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>(); + /** + * Returns the name of the repository. + * + * @param repository + * @return the repository name + */ + private static String getName(Repository repository) { + if (repository.isBare()) { + return repository.getDirectory().getName(); + } else { + return repository.getDirectory().getParentFile().getName(); + } + } + /** * Deletes the Lucene index for the specified repository. * @@ -125,6 +148,7 @@ */ public static boolean index(Repository repository) { try { + String repositoryName = getName(repository); Set<String> indexedCommits = new TreeSet<String>(); IndexWriter writer = getIndexWriter(repository, true); // build a quick lookup of tags @@ -139,6 +163,10 @@ // walk through each branch List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1); for (RefModel branch : branches) { + if (excludedBranches.contains(branch.getName())) { + continue; + } + String branchName = branch.getName(); RevWalk revWalk = new RevWalk(repository); RevCommit rev = revWalk.parseCommit(branch.getObjectId()); @@ -154,6 +182,10 @@ Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, + Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, + Index.NOT_ANALYZED)); doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO)); @@ -171,7 +203,7 @@ ext = name.substring(name.lastIndexOf('.') + 1); } - if (StringUtils.isEmpty(ext) || !excludes.contains(ext)) { + if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { // read the blob content ObjectId entid = treeWalk.getObjectId(0); FileMode entmode = treeWalk.getFileMode(0); @@ -199,6 +231,10 @@ String head = rev.getId().getName(); if (indexedCommits.add(head)) { Document doc = createDocument(rev, tags.get(head)); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, + Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, + Index.NOT_ANALYZED)); writer.addDocument(doc); } @@ -208,6 +244,10 @@ String hash = rev.getId().getName(); if (indexedCommits.add(hash)) { Document doc = createDocument(rev, tags.get(hash)); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, + Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, + Index.NOT_ANALYZED)); writer.addDocument(doc); } } @@ -221,6 +261,8 @@ List<IssueModel> issues = IssueUtils.getIssues(repository, null); for (IssueModel issue : issues) { Document doc = createDocument(issue); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, + Index.NOT_ANALYZED)); writer.addDocument(doc); } } @@ -240,11 +282,69 @@ * repository. * * @param repository + * @param branch + * the fully qualified branch name (e.g. refs/heads/master) * @param commit * @return true, if successful */ - public static boolean index(Repository repository, RevCommit commit) { - try { + public static boolean index(Repository repository, String branch, RevCommit commit) { + try { + if (excludedBranches.contains(branch)) { + if (IssueUtils.GB_ISSUES.equals(branch)) { + // index an issue + String issueId = commit.getShortMessage().substring(2).trim(); + IssueModel issue = IssueUtils.getIssue(repository, issueId); + return index(repository, issue, true); + } + return false; + } + List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit); + String repositoryName = getName(repository); + String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, + Resolution.MINUTE); + IndexWriter writer = getIndexWriter(repository, false); + for (PathChangeModel path : changedPaths) { + // delete the indexed blob + writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()), + new Term(FIELD_BRANCH, branch), + new Term(FIELD_OBJECT_ID, path.path)); + + // re-index the blob + if (!ChangeType.DELETE.equals(path.changeType)) { + Document doc = new Document(); + doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, + Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, + Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES, + Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO)); + doc.add(new Field(FIELD_AUTHOR, commit.getAuthorIdent().getName(), Store.YES, + Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_COMMITTER, commit.getCommitterIdent().getName(), + Store.YES, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_LABEL, branch, Store.YES, Index.ANALYZED)); + + // determine extension to compare to the extension + // blacklist + String ext = null; + String name = path.name.toLowerCase(); + if (name.indexOf('.') > -1) { + ext = name.substring(name.lastIndexOf('.') + 1); + } + + if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { + // read the blob content + String str = JGitUtils.getStringContent(repository, + commit.getTree(), path.path); + doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED)); + writer.addDocument(doc); + } + } + } + writer.commit(); + Document doc = createDocument(commit, null); return index(repository, doc); } catch (Exception e) { @@ -292,6 +392,7 @@ doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_OBJECT_ID, issue.id, Store.YES, Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE), Store.YES, Field.Index.NO)); doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.NOT_ANALYZED_NO_NORMS)); @@ -344,6 +445,9 @@ */ private static boolean index(Repository repository, Document doc) { try { + String repositoryName = getName(repository); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, + Index.NOT_ANALYZED)); IndexWriter writer = getIndexWriter(repository, false); writer.addDocument(doc); resetIndexSearcher(repository); @@ -363,6 +467,8 @@ result.author = doc.get(FIELD_AUTHOR); result.committer = doc.get(FIELD_COMMITTER); result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE)); + result.repository = doc.get(FIELD_REPOSITORY); + result.branch = doc.get(FIELD_BRANCH); result.id = doc.get(FIELD_OBJECT_ID); if (doc.get(FIELD_LABEL) != null) { result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL)); @@ -437,17 +543,27 @@ } /** - * Search the repository for the given text or query + * Searches the specified repositories for the given text or query * - * @param repository * @param text - * @return a list of SearchResults + * if the text is null or empty, null is returned + * @param maximumHits + * the maximum number of hits to collect + * @param repositories + * a list of repositories to search. if no repositories are + * specified null is returned. + * @return a list of SearchResults in order from highest to the lowest score + * */ - public static List<SearchResult> search(Repository repository, String text) { + public static List<SearchResult> search(String text, int maximumHits, + Repository... repositories) { if (StringUtils.isEmpty(text)) { return null; } - Set<SearchResult> results = new HashSet<SearchResult>(); + if (repositories.length == 0) { + return null; + } + Set<SearchResult> results = new LinkedHashSet<SearchResult>(); StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); try { // default search checks summary and content @@ -461,10 +577,23 @@ qp.setAllowLeadingWildcard(true); query.add(qp.parse(text), Occur.SHOULD); - IndexSearcher searcher = getIndexSearcher(repository); + IndexSearcher searcher; + if (repositories.length == 1) { + // single repository search + searcher = getIndexSearcher(repositories[0]); + } else { + // multiple repository search + List<IndexReader> readers = new ArrayList<IndexReader>(); + for (Repository repository : repositories) { + IndexSearcher repositoryIndex = getIndexSearcher(repository); + readers.add(repositoryIndex.getIndexReader()); + } + IndexReader [] rdrs = readers.toArray(new IndexReader[readers.size()]); + MultiReader reader = new MultiReader(rdrs); + searcher = new IndexSearcher(reader); + } Query rewrittenQuery = searcher.rewrite(query); - - TopScoreDocCollector collector = TopScoreDocCollector.create(200, true); + TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true); searcher.search(rewrittenQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; i++) { @@ -477,7 +606,7 @@ e.printStackTrace(); } return new ArrayList<SearchResult>(results); - } + } /** * Close all the index writers and searchers -- Gitblit v1.9.1