From 98b4edf75ffe0fd57caece27701250e42c901ff3 Mon Sep 17 00:00:00 2001
From: James Moger <james.moger@gitblit.com>
Date: Mon, 12 Mar 2012 22:57:13 -0400
Subject: [PATCH] Revise indexed fields and improve Lucene page usability

---
 src/com/gitblit/models/SearchResult.java     |   22 ++++++-
 src/com/gitblit/utils/IssueUtils.java        |    4 
 src/com/gitblit/wicket/pages/LucenePage.html |   13 +++-
 src/com/gitblit/utils/LuceneUtils.java       |   76 ++++++++++++++++++++----
 tests/com/gitblit/tests/LuceneUtilsTest.java |   12 ++--
 resources/gitblit.css                        |   28 +++++++++
 src/com/gitblit/wicket/pages/LucenePage.java |    9 +-
 7 files changed, 131 insertions(+), 33 deletions(-)

diff --git a/resources/gitblit.css b/resources/gitblit.css
index 8f65986..0e8e753 100644
--- a/resources/gitblit.css
+++ b/resources/gitblit.css
@@ -286,6 +286,34 @@
 	color:#999;
 }
 
+div.searchResult .body {
+	padding-left:20px;
+}
+
+div.searchResult .fragment {
+	padding: 7px 0;
+}
+
+div.searchResult .highlight {
+	background-color: #ffffaa;	
+	border: 1px solid #ffcc00;
+	padding: 0 2px;
+}
+
+div.searchResult .ellipses {
+	font-family: sans-serif;
+	font-size: 9px;
+	font-weight: normal;	
+	background-color: #eee;	
+	border: 1px solid #ccc;
+	padding: 0 3px;
+	margin: 0px;
+}
+
+div.searchResult pre {
+	margin: 1px 0px;
+}
+
 div.header, div.commitHeader, table.repositories th {
 	background-color:#e0e0e0;
 	background-repeat:repeat-x;
diff --git a/src/com/gitblit/models/SearchResult.java b/src/com/gitblit/models/SearchResult.java
index c74229a..181eb0e 100644
--- a/src/com/gitblit/models/SearchResult.java
+++ b/src/com/gitblit/models/SearchResult.java
@@ -26,13 +26,17 @@
 
 	public String summary;
 	
-	public String content;
+	public String fragment;
 	
 	public String repository;
 	
 	public String branch;
 
-	public String id;
+	public String commitId;
+	
+	public String path;
+	
+	public String issueId;
 
 	public List<String> tags;
 	
@@ -42,9 +46,21 @@
 
 	public SearchResult() {
 	}
+	
+	public String getId() {
+		switch (type) {
+		case blob:
+			return path;
+		case commit:
+			return commitId;
+		case issue:
+			return issueId;
+		}
+		return commitId;
+	}
 
 	@Override
 	public String toString() {
-		return  score + " : " + type.name() + " : " + repository + " : " + id + " (" + branch + ")";
+		return  score + " : " + type.name() + " : " + repository + " : " + getId() + " (" + branch + ")";
 	}
 }
\ No newline at end of file
diff --git a/src/com/gitblit/utils/IssueUtils.java b/src/com/gitblit/utils/IssueUtils.java
index eb3b347..cfd6200 100644
--- a/src/com/gitblit/utils/IssueUtils.java
+++ b/src/com/gitblit/utils/IssueUtils.java
@@ -425,7 +425,7 @@
 	 * Updates an issue in the gb-issues branch of the repository.
 	 * 
 	 * @param repository
-	 * @param issue
+	 * @param issueId
 	 * @param change
 	 * @return true if successful
 	 */
@@ -619,7 +619,7 @@
 	 * distributed merging.
 	 * 
 	 * @param repository
-	 * @param issue
+	 * @param issueId
 	 * @param change
 	 * @return true, if the change was committed
 	 */
diff --git a/src/com/gitblit/utils/LuceneUtils.java b/src/com/gitblit/utils/LuceneUtils.java
index 3c2606b..1c24f28 100644
--- a/src/com/gitblit/utils/LuceneUtils.java
+++ b/src/com/gitblit/utils/LuceneUtils.java
@@ -35,6 +35,8 @@
 import java.util.TreeSet;
 import java.util.concurrent.ConcurrentHashMap;
 
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.DateTools;
 import org.apache.lucene.document.DateTools.Resolution;
@@ -55,6 +57,13 @@
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.search.highlight.Fragmenter;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.search.highlight.TokenSources;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
@@ -107,7 +116,9 @@
 	private static final int INDEX_VERSION = 1;
 
 	private static final String FIELD_OBJECT_TYPE = "type";
-	private static final String FIELD_OBJECT_ID = "id";
+	private static final String FIELD_ISSUE = "issue";
+	private static final String FIELD_PATH = "path";
+	private static final String FIELD_COMMIT = "commit";
 	private static final String FIELD_BRANCH = "branch";
 	private static final String FIELD_REPOSITORY = "repository";
 	private static final String FIELD_SUMMARY = "summary";
@@ -361,7 +372,8 @@
 						doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
 						doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
 						doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
-						doc.add(new Field(FIELD_OBJECT_ID, path, Store.YES, Index.ANALYZED));
+						doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
+						doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
 						doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
 						doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
 						doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));					
@@ -385,7 +397,7 @@
 							in.close();
 							byte[] content = os.toByteArray();
 							String str = new String(content, Constants.CHARACTER_ENCODING);
-							doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));
+							doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
 							os.reset();
 						}							
 						
@@ -462,8 +474,9 @@
 	protected static RevTree getTree(final RevWalk walk, final RevCommit commit)
 			throws IOException {
 		final RevTree tree = commit.getTree();
-		if (tree != null)
+		if (tree != null) {
 			return tree;
+		}
 		walk.parseHeaders(commit);
 		return commit.getTree();
 	}
@@ -493,7 +506,7 @@
 						IndexWriter writer = getIndexWriter(repository, false);
 						writer.deleteDocuments(
 								new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
-										FIELD_OBJECT_ID, issueId));
+										FIELD_ISSUE, issueId));
 						writer.commit();
 						result.success = true;
 						return result;
@@ -512,7 +525,7 @@
 			for (PathChangeModel path : changedPaths) {
 				// delete the indexed blob
 				writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()),
-						new Term(FIELD_BRANCH, branch), new Term(FIELD_OBJECT_ID, path.path));
+						new Term(FIELD_BRANCH, branch), new Term(FIELD_PATH, path.path));
 
 				// re-index the blob
 				if (!ChangeType.DELETE.equals(path.changeType)) {
@@ -522,7 +535,8 @@
 							Index.NOT_ANALYZED));
 					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
 					doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
-					doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES, Index.ANALYZED));
+					doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
+					doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
 					doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
 					doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
 					doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
@@ -539,7 +553,7 @@
 						// read the blob content
 						String str = JGitUtils.getStringContent(repository, commit.getTree(),
 								path.path);
-						doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));
+						doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
 						writer.addDocument(doc);
 					}
 				}
@@ -568,7 +582,7 @@
 			// delete the old issue from the index, if exists
 			IndexWriter writer = getIndexWriter(repository, false);
 			writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
-					FIELD_OBJECT_ID, String.valueOf(issue.id)));
+					FIELD_ISSUE, String.valueOf(issue.id)));
 			writer.commit();
 
 			Document doc = createDocument(issue);
@@ -678,7 +692,7 @@
 		Document doc = new Document();
 		doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES,
 				Field.Index.NOT_ANALYZED));
-		doc.add(new Field(FIELD_OBJECT_ID, issue.id, Store.YES, Index.ANALYZED));
+		doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED));
 		doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED));
 		doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),
 				Store.YES, Field.Index.NO));
@@ -707,7 +721,7 @@
 		Document doc = new Document();
 		doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.commit.name(), Store.YES,
 				Index.NOT_ANALYZED));
-		doc.add(new Field(FIELD_OBJECT_ID, commit.getName(), Store.YES, Index.ANALYZED));
+		doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
 		doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
 				Resolution.MINUTE), Store.YES, Index.NO));
 		doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
@@ -746,14 +760,15 @@
 		SearchResult result = new SearchResult();
 		result.score = score;
 		result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
-		result.summary = doc.get(FIELD_SUMMARY);
-		result.content = doc.get(FIELD_CONTENT);
+		result.summary = doc.get(FIELD_SUMMARY);		
 		result.author = doc.get(FIELD_AUTHOR);
 		result.committer = doc.get(FIELD_COMMITTER);
 		result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
 		result.repository = doc.get(FIELD_REPOSITORY);
 		result.branch = doc.get(FIELD_BRANCH);
-		result.id = doc.get(FIELD_OBJECT_ID);
+		result.commitId = doc.get(FIELD_COMMIT);
+		result.issueId = doc.get(FIELD_ISSUE);
+		result.path = doc.get(FIELD_PATH);
 		if (doc.get(FIELD_TAG) != null) {
 			result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
 		}
@@ -887,6 +902,8 @@
 				int docId = hits[i].doc;
 				Document doc = searcher.doc(docId);
 				SearchResult result = createSearchResult(doc, hits[i].score);
+				String content = doc.get(FIELD_CONTENT);
+				result.fragment = getHighlightedFragment(analyzer, query, content);
 				results.add(result);
 			}
 		} catch (Exception e) {
@@ -894,6 +911,37 @@
 		}
 		return new ArrayList<SearchResult>(results);
 	}
+	
+	private static String getHighlightedFragment(Analyzer analyzer, Query query,
+			String content) throws IOException, InvalidTokenOffsetsException {
+		content = content == null ? "":StringUtils.escapeForHtml(content, false);	
+		TokenStream stream = TokenSources.getTokenStream("content", content, analyzer);
+		QueryScorer scorer = new QueryScorer(query, "content");
+		Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 150);
+
+		SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
+		Highlighter highlighter = new Highlighter(formatter, scorer);
+		
+		highlighter.setTextFragmenter(fragmenter);
+		String [] fragments = highlighter.getBestFragments(stream, content, 5);
+		if (ArrayUtils.isEmpty(fragments)) {
+			return content;
+		}
+		if (fragments.length == 1) {
+			return "<pre>" + fragments[0] + "</pre>";
+		}
+		StringBuilder sb = new StringBuilder();
+		for (int i = 0, len = fragments.length; i < len; i++) {
+			String fragment = fragments[i].trim();			
+			sb.append("<pre>");
+			sb.append(fragment);
+			sb.append("</pre>");
+			if (i < len - 1) {
+				sb.append("<span class=\"ellipses\">...</span><br/>");
+			}
+		}
+		return sb.toString();
+	}
 
 	/**
 	 * Close all the index writers and searchers
diff --git a/src/com/gitblit/wicket/pages/LucenePage.html b/src/com/gitblit/wicket/pages/LucenePage.html
index ff91116..6999d78 100644
--- a/src/com/gitblit/wicket/pages/LucenePage.html
+++ b/src/com/gitblit/wicket/pages/LucenePage.html
@@ -24,8 +24,9 @@
 					<div style="margin-left:0px;" class="span4">
 						<div class="alert alert">
 							<b>type:</b> commit or blob<br/>
-							<b>id:</b> commit id or file path<br/>
-							<b>branch:</b><br/>
+							<b>commit:</b> commit id<br/>
+							<b>path:</b> blob path<br/>
+							<b>branch:</b> refs/heads/master<br/>
 							<b>author:</b><br/>
 							<b>committer:</b><br/>
 							<b>tag:</b> tag<br/>
@@ -42,8 +43,12 @@
 	<div class="row-fluid">
 	<div class="searchResult" wicket:id="searchResults">
 		<div><i wicket:id="type"></i><span class="summary" wicket:id="summary"></span></div>
-		<span class="author" wicket:id="author"></span> committed to <span class="repository" wicket:id="repository"></span>:<span class="branch" wicket:id="branch"></span><br/>
-		<span class="date" wicket:id="date"></span>
+		<div class="body">
+			<div class="fragment" wicket:id="fragment"></div>
+			<span class="author" wicket:id="author"></span> committed to <span class="repository" wicket:id="repository"></span>:<span class="branch" wicket:id="branch"></span><br/>
+			<span class="date" wicket:id="date"></span>
+			<hr/>
+		</div>
 	</div>
 	</div>
 </wicket:extend>
diff --git a/src/com/gitblit/wicket/pages/LucenePage.java b/src/com/gitblit/wicket/pages/LucenePage.java
index c269c8e..f72aeda 100644
--- a/src/com/gitblit/wicket/pages/LucenePage.java
+++ b/src/com/gitblit/wicket/pages/LucenePage.java
@@ -75,24 +75,25 @@
 					Label icon = WicketUtils.newIcon("type", "icon-refresh");
 					WicketUtils.setHtmlTooltip(icon, "commit");
 					item.add(icon);
-					item.add(new LinkPanel("summary", null, sr.summary, CommitPage.class, WicketUtils.newObjectParameter(sr.repository, sr.id)));
+					item.add(new LinkPanel("summary", null, sr.summary, CommitPage.class, WicketUtils.newObjectParameter(sr.repository, sr.commitId)));
 					break;
 				}
 				case blob: {
 					Label icon = WicketUtils.newIcon("type", "icon-file");
 					WicketUtils.setHtmlTooltip(icon, "blob");
 					item.add(icon);
-					item.add(new LinkPanel("summary", null, sr.id, BlobPage.class, WicketUtils.newPathParameter(sr.repository, sr.branch, sr.id)));
+					item.add(new LinkPanel("summary", null, sr.path, BlobPage.class, WicketUtils.newPathParameter(sr.repository, sr.branch, sr.path)));
 					break;
 				}
 				case issue: {
 					Label icon = WicketUtils.newIcon("type", "icon-file");
 					WicketUtils.setHtmlTooltip(icon, "issue");
 					item.add(icon);
-					item.add(new Label("summary", "issue: " + sr.id));
+					item.add(new Label("summary", "issue: " + sr.issueId));
 					break;
 				}
 				}
+				item.add(new Label("fragment", sr.fragment).setEscapeModelStrings(false).setVisible(!StringUtils.isEmpty(sr.fragment)));
 				item.add(new LinkPanel("repository", null, sr.repository, SummaryPage.class, WicketUtils.newRepositoryParameter(sr.repository)));
 				item.add(new LinkPanel("branch", "branch", StringUtils.getRelativePath(Constants.R_HEADS, sr.branch), LogPage.class, WicketUtils.newObjectParameter(sr.repository, sr.branch)));
 				item.add(new Label("author", sr.author));
@@ -140,7 +141,7 @@
 			}
 		};
 		ListMultipleChoice<String> selections = new ListMultipleChoice<String>("repositories", repositories, GitBlit.self().getRepositoryList());
-		selections.setMaxRows(11);
+		selections.setMaxRows(10);
 		form.add(selections);
 		form.add(new TextField<String>("fragment", fragment));
 		add(form);
diff --git a/tests/com/gitblit/tests/LuceneUtilsTest.java b/tests/com/gitblit/tests/LuceneUtilsTest.java
index e775458..01858f5 100644
--- a/tests/com/gitblit/tests/LuceneUtilsTest.java
+++ b/tests/com/gitblit/tests/LuceneUtilsTest.java
@@ -41,9 +41,9 @@
 		String name = StringUtils.getRelativePath(GitBlitSuite.REPOSITORIES.getAbsolutePath(),
 				repository.getDirectory().getAbsolutePath());
 		LuceneUtils.reindex(name, repository);
-		SearchResult result = LuceneUtils.search("type:blob AND id:bit.bit", 1, repository).get(0);		
+		SearchResult result = LuceneUtils.search("type:blob AND path:bit.bit", 1, repository).get(0);		
 		assertEquals("Mike Donaghy", result.author);
-		result = LuceneUtils.search("type:blob AND id:clipper.prg", 1, repository).get(0);		
+		result = LuceneUtils.search("type:blob AND path:clipper.prg", 1, repository).get(0);		
 		assertEquals("tinogomes", result.author);
 		repository.close();
 
@@ -82,24 +82,24 @@
 		// blob test
 		results = LuceneUtils.search("type: blob AND \"import std.stdio\"", 10, repository);
 		assertEquals(1, results.size());
-		assertEquals("d.D", results.get(0).id);
+		assertEquals("d.D", results.get(0).path);
 		
 		// 1 occurrence on the gh-pages branch
 		repository = GitBlitSuite.getTheoreticalPhysicsRepository();
 		results = LuceneUtils.search("\"add the .nojekyll file\"", 10, repository);
 		assertEquals(1, results.size());
 		assertEquals("Ondrej Certik", results.get(0).author);
-		assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).id);
+		assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).commitId);
 		assertEquals("refs/heads/gh-pages", results.get(0).branch);
 		
 		results = LuceneUtils.search("type:blob AND \"src/intro.rst\"", 10, repository);
 		assertEquals(4, results.size());
 		
 		// hash id tests
-		results = LuceneUtils.search("id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, repository);
+		results = LuceneUtils.search("commit:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, repository);
 		assertEquals(1, results.size());
 
-		results = LuceneUtils.search("id:57c4f26f157*", 10, repository);
+		results = LuceneUtils.search("commit:57c4f26f157*", 10, repository);
 		assertEquals(1, results.size());
 
 		repository.close();

--
Gitblit v1.9.1