James Moger
2012-03-12 98b4edf75ffe0fd57caece27701250e42c901ff3
Revise indexed fields and improve Lucene page usability
7 files modified
164 ■■■■ changed files
resources/gitblit.css 28 ●●●●● patch | view | raw | blame | history
src/com/gitblit/models/SearchResult.java 22 ●●●● patch | view | raw | blame | history
src/com/gitblit/utils/IssueUtils.java 4 ●●●● patch | view | raw | blame | history
src/com/gitblit/utils/LuceneUtils.java 76 ●●●● patch | view | raw | blame | history
src/com/gitblit/wicket/pages/LucenePage.html 13 ●●●●● patch | view | raw | blame | history
src/com/gitblit/wicket/pages/LucenePage.java 9 ●●●●● patch | view | raw | blame | history
tests/com/gitblit/tests/LuceneUtilsTest.java 12 ●●●● patch | view | raw | blame | history
resources/gitblit.css
@@ -286,6 +286,34 @@
    color:#999;
}
div.searchResult .body {
    padding-left:20px;
}
div.searchResult .fragment {
    padding: 7px 0;
}
div.searchResult .highlight {
    background-color: #ffffaa;
    border: 1px solid #ffcc00;
    padding: 0 2px;
}
div.searchResult .ellipses {
    font-family: sans-serif;
    font-size: 9px;
    font-weight: normal;
    background-color: #eee;
    border: 1px solid #ccc;
    padding: 0 3px;
    margin: 0px;
}
div.searchResult pre {
    margin: 1px 0px;
}
div.header, div.commitHeader, table.repositories th {
    background-color:#e0e0e0;
    background-repeat:repeat-x;
src/com/gitblit/models/SearchResult.java
@@ -26,13 +26,17 @@
    public String summary;
    
    public String content;
    public String fragment;
    
    public String repository;
    
    public String branch;
    public String id;
    public String commitId;
    public String path;
    public String issueId;
    public List<String> tags;
    
@@ -42,9 +46,21 @@
    public SearchResult() {
    }
    public String getId() {
        switch (type) {
        case blob:
            return path;
        case commit:
            return commitId;
        case issue:
            return issueId;
        }
        return commitId;
    }
    @Override
    public String toString() {
        return  score + " : " + type.name() + " : " + repository + " : " + id + " (" + branch + ")";
        return  score + " : " + type.name() + " : " + repository + " : " + getId() + " (" + branch + ")";
    }
}
src/com/gitblit/utils/IssueUtils.java
@@ -425,7 +425,7 @@
     * Updates an issue in the gb-issues branch of the repository.
     * 
     * @param repository
     * @param issue
     * @param issueId
     * @param change
     * @return true if successful
     */
@@ -619,7 +619,7 @@
     * distributed merging.
     * 
     * @param repository
     * @param issue
     * @param issueId
     * @param change
     * @return true, if the change was committed
     */
src/com/gitblit/utils/LuceneUtils.java
@@ -35,6 +35,8 @@
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
@@ -55,6 +57,13 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
@@ -107,7 +116,9 @@
    private static final int INDEX_VERSION = 1;
    private static final String FIELD_OBJECT_TYPE = "type";
    private static final String FIELD_OBJECT_ID = "id";
    private static final String FIELD_ISSUE = "issue";
    private static final String FIELD_PATH = "path";
    private static final String FIELD_COMMIT = "commit";
    private static final String FIELD_BRANCH = "branch";
    private static final String FIELD_REPOSITORY = "repository";
    private static final String FIELD_SUMMARY = "summary";
@@ -361,7 +372,8 @@
                        doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
                        doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
                        doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                        doc.add(new Field(FIELD_OBJECT_ID, path, Store.YES, Index.ANALYZED));
                        doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
                        doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
                        doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
                        doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
                        doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));                    
@@ -385,7 +397,7 @@
                            in.close();
                            byte[] content = os.toByteArray();
                            String str = new String(content, Constants.CHARACTER_ENCODING);
                            doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));
                            doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
                            os.reset();
                        }                            
                        
@@ -462,8 +474,9 @@
    protected static RevTree getTree(final RevWalk walk, final RevCommit commit)
            throws IOException {
        final RevTree tree = commit.getTree();
        if (tree != null)
        if (tree != null) {
            return tree;
        }
        walk.parseHeaders(commit);
        return commit.getTree();
    }
@@ -493,7 +506,7 @@
                        IndexWriter writer = getIndexWriter(repository, false);
                        writer.deleteDocuments(
                                new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
                                        FIELD_OBJECT_ID, issueId));
                                        FIELD_ISSUE, issueId));
                        writer.commit();
                        result.success = true;
                        return result;
@@ -512,7 +525,7 @@
            for (PathChangeModel path : changedPaths) {
                // delete the indexed blob
                writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()),
                        new Term(FIELD_BRANCH, branch), new Term(FIELD_OBJECT_ID, path.path));
                        new Term(FIELD_BRANCH, branch), new Term(FIELD_PATH, path.path));
                // re-index the blob
                if (!ChangeType.DELETE.equals(path.changeType)) {
@@ -522,7 +535,8 @@
                            Index.NOT_ANALYZED));
                    doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
                    doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
@@ -539,7 +553,7 @@
                        // read the blob content
                        String str = JGitUtils.getStringContent(repository, commit.getTree(),
                                path.path);
                        doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));
                        doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
                        writer.addDocument(doc);
                    }
                }
@@ -568,7 +582,7 @@
            // delete the old issue from the index, if exists
            IndexWriter writer = getIndexWriter(repository, false);
            writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
                    FIELD_OBJECT_ID, String.valueOf(issue.id)));
                    FIELD_ISSUE, String.valueOf(issue.id)));
            writer.commit();
            Document doc = createDocument(issue);
@@ -678,7 +692,7 @@
        Document doc = new Document();
        doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES,
                Field.Index.NOT_ANALYZED));
        doc.add(new Field(FIELD_OBJECT_ID, issue.id, Store.YES, Index.ANALYZED));
        doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED));
        doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED));
        doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),
                Store.YES, Field.Index.NO));
@@ -707,7 +721,7 @@
        Document doc = new Document();
        doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.commit.name(), Store.YES,
                Index.NOT_ANALYZED));
        doc.add(new Field(FIELD_OBJECT_ID, commit.getName(), Store.YES, Index.ANALYZED));
        doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
        doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
                Resolution.MINUTE), Store.YES, Index.NO));
        doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
@@ -746,14 +760,15 @@
        SearchResult result = new SearchResult();
        result.score = score;
        result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
        result.summary = doc.get(FIELD_SUMMARY);
        result.content = doc.get(FIELD_CONTENT);
        result.summary = doc.get(FIELD_SUMMARY);
        result.author = doc.get(FIELD_AUTHOR);
        result.committer = doc.get(FIELD_COMMITTER);
        result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
        result.repository = doc.get(FIELD_REPOSITORY);
        result.branch = doc.get(FIELD_BRANCH);
        result.id = doc.get(FIELD_OBJECT_ID);
        result.commitId = doc.get(FIELD_COMMIT);
        result.issueId = doc.get(FIELD_ISSUE);
        result.path = doc.get(FIELD_PATH);
        if (doc.get(FIELD_TAG) != null) {
            result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
        }
@@ -887,6 +902,8 @@
                int docId = hits[i].doc;
                Document doc = searcher.doc(docId);
                SearchResult result = createSearchResult(doc, hits[i].score);
                String content = doc.get(FIELD_CONTENT);
                result.fragment = getHighlightedFragment(analyzer, query, content);
                results.add(result);
            }
        } catch (Exception e) {
@@ -894,6 +911,37 @@
        }
        return new ArrayList<SearchResult>(results);
    }
    private static String getHighlightedFragment(Analyzer analyzer, Query query,
            String content) throws IOException, InvalidTokenOffsetsException {
        content = content == null ? "":StringUtils.escapeForHtml(content, false);
        TokenStream stream = TokenSources.getTokenStream("content", content, analyzer);
        QueryScorer scorer = new QueryScorer(query, "content");
        Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 150);
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
        Highlighter highlighter = new Highlighter(formatter, scorer);
        highlighter.setTextFragmenter(fragmenter);
        String [] fragments = highlighter.getBestFragments(stream, content, 5);
        if (ArrayUtils.isEmpty(fragments)) {
            return content;
        }
        if (fragments.length == 1) {
            return "<pre>" + fragments[0] + "</pre>";
        }
        StringBuilder sb = new StringBuilder();
        for (int i = 0, len = fragments.length; i < len; i++) {
            String fragment = fragments[i].trim();
            sb.append("<pre>");
            sb.append(fragment);
            sb.append("</pre>");
            if (i < len - 1) {
                sb.append("<span class=\"ellipses\">...</span><br/>");
            }
        }
        return sb.toString();
    }
    /**
     * Close all the index writers and searchers
src/com/gitblit/wicket/pages/LucenePage.html
@@ -24,8 +24,9 @@
                    <div style="margin-left:0px;" class="span4">
                        <div class="alert alert">
                            <b>type:</b> commit or blob<br/>
                            <b>id:</b> commit id or file path<br/>
                            <b>branch:</b><br/>
                            <b>commit:</b> commit id<br/>
                            <b>path:</b> blob path<br/>
                            <b>branch:</b> refs/heads/master<br/>
                            <b>author:</b><br/>
                            <b>committer:</b><br/>
                            <b>tag:</b> tag<br/>
@@ -42,8 +43,12 @@
    <div class="row-fluid">
    <div class="searchResult" wicket:id="searchResults">
        <div><i wicket:id="type"></i><span class="summary" wicket:id="summary"></span></div>
        <span class="author" wicket:id="author"></span> committed to <span class="repository" wicket:id="repository"></span>:<span class="branch" wicket:id="branch"></span><br/>
        <span class="date" wicket:id="date"></span>
        <div class="body">
            <div class="fragment" wicket:id="fragment"></div>
            <span class="author" wicket:id="author"></span> committed to <span class="repository" wicket:id="repository"></span>:<span class="branch" wicket:id="branch"></span><br/>
            <span class="date" wicket:id="date"></span>
            <hr/>
        </div>
    </div>
    </div>
</wicket:extend>
src/com/gitblit/wicket/pages/LucenePage.java
@@ -75,24 +75,25 @@
                    Label icon = WicketUtils.newIcon("type", "icon-refresh");
                    WicketUtils.setHtmlTooltip(icon, "commit");
                    item.add(icon);
                    item.add(new LinkPanel("summary", null, sr.summary, CommitPage.class, WicketUtils.newObjectParameter(sr.repository, sr.id)));
                    item.add(new LinkPanel("summary", null, sr.summary, CommitPage.class, WicketUtils.newObjectParameter(sr.repository, sr.commitId)));
                    break;
                }
                case blob: {
                    Label icon = WicketUtils.newIcon("type", "icon-file");
                    WicketUtils.setHtmlTooltip(icon, "blob");
                    item.add(icon);
                    item.add(new LinkPanel("summary", null, sr.id, BlobPage.class, WicketUtils.newPathParameter(sr.repository, sr.branch, sr.id)));
                    item.add(new LinkPanel("summary", null, sr.path, BlobPage.class, WicketUtils.newPathParameter(sr.repository, sr.branch, sr.path)));
                    break;
                }
                case issue: {
                    Label icon = WicketUtils.newIcon("type", "icon-file");
                    WicketUtils.setHtmlTooltip(icon, "issue");
                    item.add(icon);
                    item.add(new Label("summary", "issue: " + sr.id));
                    item.add(new Label("summary", "issue: " + sr.issueId));
                    break;
                }
                }
                item.add(new Label("fragment", sr.fragment).setEscapeModelStrings(false).setVisible(!StringUtils.isEmpty(sr.fragment)));
                item.add(new LinkPanel("repository", null, sr.repository, SummaryPage.class, WicketUtils.newRepositoryParameter(sr.repository)));
                item.add(new LinkPanel("branch", "branch", StringUtils.getRelativePath(Constants.R_HEADS, sr.branch), LogPage.class, WicketUtils.newObjectParameter(sr.repository, sr.branch)));
                item.add(new Label("author", sr.author));
@@ -140,7 +141,7 @@
            }
        };
        ListMultipleChoice<String> selections = new ListMultipleChoice<String>("repositories", repositories, GitBlit.self().getRepositoryList());
        selections.setMaxRows(11);
        selections.setMaxRows(10);
        form.add(selections);
        form.add(new TextField<String>("fragment", fragment));
        add(form);
tests/com/gitblit/tests/LuceneUtilsTest.java
@@ -41,9 +41,9 @@
        String name = StringUtils.getRelativePath(GitBlitSuite.REPOSITORIES.getAbsolutePath(),
                repository.getDirectory().getAbsolutePath());
        LuceneUtils.reindex(name, repository);
        SearchResult result = LuceneUtils.search("type:blob AND id:bit.bit", 1, repository).get(0);
        SearchResult result = LuceneUtils.search("type:blob AND path:bit.bit", 1, repository).get(0);
        assertEquals("Mike Donaghy", result.author);
        result = LuceneUtils.search("type:blob AND id:clipper.prg", 1, repository).get(0);
        result = LuceneUtils.search("type:blob AND path:clipper.prg", 1, repository).get(0);
        assertEquals("tinogomes", result.author);
        repository.close();
@@ -82,24 +82,24 @@
        // blob test
        results = LuceneUtils.search("type: blob AND \"import std.stdio\"", 10, repository);
        assertEquals(1, results.size());
        assertEquals("d.D", results.get(0).id);
        assertEquals("d.D", results.get(0).path);
        
        // 1 occurrence on the gh-pages branch
        repository = GitBlitSuite.getTheoreticalPhysicsRepository();
        results = LuceneUtils.search("\"add the .nojekyll file\"", 10, repository);
        assertEquals(1, results.size());
        assertEquals("Ondrej Certik", results.get(0).author);
        assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).id);
        assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).commitId);
        assertEquals("refs/heads/gh-pages", results.get(0).branch);
        
        results = LuceneUtils.search("type:blob AND \"src/intro.rst\"", 10, repository);
        assertEquals(4, results.size());
        
        // hash id tests
        results = LuceneUtils.search("id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, repository);
        results = LuceneUtils.search("commit:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, repository);
        assertEquals(1, results.size());
        results = LuceneUtils.search("id:57c4f26f157*", 10, repository);
        results = LuceneUtils.search("commit:57c4f26f157*", 10, repository);
        assertEquals(1, results.size());
        repository.close();