From f22a0633d08e38ac4bf92b5165a708e11b4d6598 Mon Sep 17 00:00:00 2001
From: James Moger <james.moger@gitblit.com>
Date: Wed, 03 Oct 2012 17:31:37 -0400
Subject: [PATCH] Implemented support for toggling User.canFork in Manager

---
 src/com/gitblit/LuceneExecutor.java |  528 +++++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 320 insertions(+), 208 deletions(-)

diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java
index 0b90b74..42155f4 100644
--- a/src/com/gitblit/LuceneExecutor.java
+++ b/src/com/gitblit/LuceneExecutor.java
@@ -25,7 +25,6 @@
 import java.text.MessageFormat;
 import java.text.ParseException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -70,10 +69,12 @@
 import org.apache.lucene.util.Version;
 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
 import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.FileMode;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.ObjectLoader;
 import org.eclipse.jgit.lib.ObjectReader;
 import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.lib.RepositoryCache.FileKey;
 import org.eclipse.jgit.revwalk.RevCommit;
 import org.eclipse.jgit.revwalk.RevTree;
 import org.eclipse.jgit.revwalk.RevWalk;
@@ -89,6 +90,7 @@
 import com.gitblit.models.IssueModel.Attachment;
 import com.gitblit.models.PathModel.PathChangeModel;
 import com.gitblit.models.RefModel;
+import com.gitblit.models.RepositoryModel;
 import com.gitblit.models.SearchResult;
 import com.gitblit.utils.ArrayUtils;
 import com.gitblit.utils.IssueUtils;
@@ -104,7 +106,7 @@
 public class LuceneExecutor implements Runnable {
 	
 		
-	private static final int INDEX_VERSION = 2;
+	private static final int INDEX_VERSION = 5;
 
 	private static final String FIELD_OBJECT_TYPE = "type";
 	private static final String FIELD_ISSUE = "issue";
@@ -137,51 +139,42 @@
 	private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
 	private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
 	
-	private final Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",
-			"arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
-			"lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
-
-	private final Set<String> excludedBranches = new TreeSet<String>(
-			Arrays.asList("/refs/heads/gb-issues"));
+	private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
+	private Set<String> excludedExtensions;
 	
 	public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {
 		this.storedSettings = settings;
 		this.repositoriesFolder = repositoriesFolder;
+		String exts = luceneIgnoreExtensions;
+		if (settings != null) {
+			exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
+		}
+		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
 	}
 
 	/**
-	 * Indicates if the Lucene executor can index repositories.
-	 * 
-	 * @return true if the Lucene executor is ready to index repositories
-	 */
-	public boolean isReady() {
-		return storedSettings.getBoolean(Keys.lucene.enable, false);
-	}
-
-	/**
-	 * Run is executed by the gitblit executor service at whatever frequency
-	 * is specified in the settings.  Because this is called by an executor
-	 * service, calls will queue - i.e. there can never be concurrent execution
-	 * of repository index updates.
+	 * Run is executed by the Gitblit executor service.  Because this is called 
+	 * by an executor service, calls will queue - i.e. there can never be
+	 * concurrent execution of repository index updates.
 	 */
 	@Override
 	public void run() {
-		if (!isReady()) {
+		if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
+			// Lucene indexing is disabled
 			return;
 		}
+		// reload the excluded extensions
+		String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
+		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
 
-		for (String repositoryName : GitBlit.self().getRepositoryList()) {
-			Repository repository = GitBlit.self().getRepository(repositoryName);
-			if (repository == null) {
-				logger.warn(MessageFormat.format(
-						"Lucene executor could not find repository {0}. Skipping.",
-						repositoryName));
-				continue;
+		for (String repositoryName: GitBlit.self().getRepositoryList()) {
+			RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName);
+			if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
+				Repository repository = GitBlit.self().getRepository(model.name);
+				index(model, repository);				
+				repository.close();
+				System.gc();
 			}
-			// TODO allow repository to bypass Lucene indexing				
-			index(repositoryName, repository);
-			repository.close();
-			System.gc();
 		}
 	}
 
@@ -194,43 +187,38 @@
 	 * @param repository
 	 *            the repository object
 	 */
-	protected void index(String name, Repository repository) {
+	private void index(RepositoryModel model, Repository repository) {
 		try {
-			if (JGitUtils.hasCommits(repository)) {
-				if (shouldReindex(repository)) {
-					// (re)build the entire index					
-					IndexResult result = reindex(name, repository);
-					
-					if (result.success) {
-						if (result.commitCount > 0) {
-							String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
-							logger.info(MessageFormat.format(msg, name,
-									result.commitCount, result.blobCount, result.branchCount, result.duration()));
-						}
-					} else {
-						String msg = "Could not build {0} Lucene index!";
-						logger.error(MessageFormat.format(msg, name));
+			if (shouldReindex(repository)) {
+				// (re)build the entire index
+				IndexResult result = reindex(model, repository);
+
+				if (result.success) {
+					if (result.commitCount > 0) {
+						String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
+						logger.info(MessageFormat.format(msg, model.name, result.commitCount,
+								result.blobCount, result.branchCount, result.duration()));
 					}
 				} else {
-					// update the index with latest commits					
-					IndexResult result = updateIndex(name, repository);
-					if (result.success) {
-						if (result.commitCount > 0) {
-							String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
-							logger.info(MessageFormat.format(msg, name,
-									result.commitCount, result.blobCount, result.branchCount, result.duration()));
-						}
-					} else {
-						String msg = "Could not update {0} Lucene index!";
-						logger.error(MessageFormat.format(msg, name));
-					}
+					String msg = "Could not build {0} Lucene index!";
+					logger.error(MessageFormat.format(msg, model.name));
 				}
 			} else {
-				logger.info(MessageFormat.format("Skipped Lucene index of empty repository {0}",
-						name));
+				// update the index with latest commits
+				IndexResult result = updateIndex(model, repository);
+				if (result.success) {
+					if (result.commitCount > 0) {
+						String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
+						logger.info(MessageFormat.format(msg, model.name, result.commitCount,
+								result.blobCount, result.branchCount, result.duration()));
+					}
+				} else {
+					String msg = "Could not update {0} Lucene index!";
+					logger.error(MessageFormat.format(msg, model.name));
+				}
 			}
 		} catch (Throwable t) {
-			logger.error(MessageFormat.format("Lucene indexing failure for {0}", name), t);
+			logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
 		}
 	}
 	
@@ -239,7 +227,16 @@
 	 * 
 	 * @param repositoryName
 	 */
-	public void close(String repositoryName) {
+	public synchronized void close(String repositoryName) {
+		try {
+			IndexSearcher searcher = searchers.remove(repositoryName);
+			if (searcher != null) {
+				searcher.getIndexReader().close();
+			}
+		} catch (Exception e) {
+			logger.error("Failed to close index searcher for " + repositoryName, e);
+		}
+		
 		try {
 			IndexWriter writer = writers.remove(repositoryName);
 			if (writer != null) {
@@ -247,23 +244,14 @@
 			}
 		} catch (Exception e) {
 			logger.error("Failed to close index writer for " + repositoryName, e);
-		}
-
-		try {
-			IndexSearcher searcher = searchers.remove(repositoryName);
-			if (searcher != null) {
-				searcher.close();
-			}
-		} catch (Exception e) {
-			logger.error("Failed to close index searcher for " + repositoryName, e);
-		}
+		}		
 	}
 
 	/**
 	 * Close all Lucene indexers.
 	 * 
 	 */
-	public void close() {
+	public synchronized void close() {
 		// close all writers
 		for (String writer : writers.keySet()) {
 			try {
@@ -277,7 +265,7 @@
 		// close all searchers
 		for (String searcher : searchers.keySet()) {
 			try {
-				searchers.get(searcher).close();
+				searchers.get(searcher).getIndexReader().close();
 			} catch (Throwable t) {
 				logger.error("Failed to close Lucene searcher for " + searcher, t);
 			}
@@ -294,20 +282,11 @@
 	 */
 	public boolean deleteIndex(String repositoryName) {
 		try {
-			// remove the repository index writer from the cache and close it
-			IndexWriter writer = writers.remove(repositoryName);
-			if (writer != null) {
-				writer.close();
-				writer = null;
-			}
-			// remove the repository index searcher from the cache and close it
-			IndexSearcher searcher = searchers.remove(repositoryName);
-			if (searcher != null) {
-				searcher.close();
-				searcher = null;
-			}
+			// close any open writer/searcher
+			close(repositoryName);
+
 			// delete the index folder
-			File repositoryFolder = new File(repositoriesFolder, repositoryName);
+			File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
 			File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
 			if (luceneIndex.exists()) {
 				org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
@@ -323,7 +302,6 @@
 			throw new RuntimeException(e);
 		}
 	}
-
 	
 	/**
 	 * Returns the author for the commit, if this information is available.
@@ -369,7 +347,7 @@
 	 * @return tree
 	 * @throws IOException
 	 */
-	protected RevTree getTree(final RevWalk walk, final RevCommit commit)
+	private RevTree getTree(final RevWalk walk, final RevCommit commit)
 			throws IOException {
 		final RevTree tree = commit.getTree();
 		if (tree != null) {
@@ -409,7 +387,7 @@
 	 * @param repository
 	 * @return true of the on-disk index format is different than INDEX_VERSION
 	 */
-	protected boolean shouldReindex(Repository repository) {
+	private boolean shouldReindex(Repository repository) {
 		try {
 			FileBasedConfig config = getConfig(repository);
 			config.load();
@@ -430,15 +408,16 @@
 	 * @param repository
 	 * @return IndexResult
 	 */
-	public IndexResult reindex(String repositoryName, Repository repository) {
-		IndexResult result = new IndexResult();
-		if (!deleteIndex(repositoryName)) {
+	public IndexResult reindex(RepositoryModel model, Repository repository) {
+		IndexResult result = new IndexResult();		
+		if (!deleteIndex(model.name)) {
 			return result;
 		}
-		try {			
+		try {
+			String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
 			FileBasedConfig config = getConfig(repository);
 			Set<String> indexedCommits = new TreeSet<String>();
-			IndexWriter writer = getIndexWriter(repositoryName);
+			IndexWriter writer = getIndexWriter(model.name);
 			// build a quick lookup of tags
 			Map<String, List<String>> tags = new HashMap<String, List<String>>();
 			for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
@@ -470,7 +449,7 @@
 			ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
 			for (RefModel branch :  branches) {
 				if (branch.getObjectId().equals(defaultBranchId)) {
-					defaultBranch = branch;					
+					defaultBranch = branch;
 					break;
 				}
 			}
@@ -479,7 +458,23 @@
 			
 			// walk through each branch
 			for (RefModel branch : branches) {
-				if (excludedBranches.contains(branch.getName())) {
+
+				boolean indexBranch = false;
+				if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
+						&& branch.equals(defaultBranch)) {
+					// indexing "default" branch
+					indexBranch = true;
+				} else if (IssueUtils.GB_ISSUES.equals(branch)) {
+					// skip the GB_ISSUES branch because it is indexed later
+					// note: this is different than updateIndex
+					indexBranch = false;
+				} else {
+					// normal explicit branch check
+					indexBranch = model.indexedBranches.contains(branch.getName());
+				}
+				
+				// if this branch is not specifically indexed then skip
+				if (!indexBranch) {
 					continue;
 				}
 
@@ -499,7 +494,10 @@
 				
 				Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
 				while (treeWalk.next()) {
-					paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
+					// ensure path is not in a submodule
+					if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
+						paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
+					}
 				}				
 
 				ByteArrayOutputStream os = new ByteArrayOutputStream();
@@ -562,14 +560,14 @@
 						// index the blob content
 						if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {							
 							ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
-							InputStream in = ldr.openStream();							
+							InputStream in = ldr.openStream();						
 							int n;
 							while ((n = in.read(tmp)) > 0) {
 								os.write(tmp, 0, n);
 							}
 							in.close();
 							byte[] content = os.toByteArray();
-							String str = new String(content, Constants.CHARACTER_ENCODING);
+							String str = StringUtils.decodeString(content, encodings);							
 							doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
 							os.reset();
 						}							
@@ -624,11 +622,11 @@
 			// commit all changes and reset the searcher
 			config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
 			config.save();
-			resetIndexSearcher(repositoryName);
 			writer.commit();
+			resetIndexSearcher(model.name);
 			result.success();
 		} catch (Exception e) {
-			logger.error("Exception while reindexing " + repositoryName, e);
+			logger.error("Exception while reindexing " + model.name, e);
 		}
 		return result;
 	}
@@ -648,31 +646,17 @@
 			String branch, RevCommit commit) {
 		IndexResult result = new IndexResult();
 		try {
-			if (excludedBranches.contains(branch)) {
-				if (IssueUtils.GB_ISSUES.equals(branch)) {
-					// index an issue
-					String issueId = commit.getShortMessage().substring(2).trim();
-					IssueModel issue = IssueUtils.getIssue(repository, issueId);
-					if (issue == null) {
-						// issue was deleted, remove from index
-						deleteIssue(repositoryName, issueId);
-						result.success = true;
-						return result;
-					}
-					result.success = index(repositoryName, issue);
-					result.issueCount++;
-					return result;
-					
-				}
-				return result;
-			}
+			String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
 			List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
 			String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
 					Resolution.MINUTE);
 			IndexWriter writer = getIndexWriter(repositoryName);
 			for (PathChangeModel path : changedPaths) {
+				if (path.isSubmodule()) {
+					continue;
+				}
 				// delete the indexed blob
-				deleteBlob(repositoryName, branch, path.path);
+				deleteBlob(repositoryName, branch, path.name);
 
 				// re-index the blob
 				if (!ChangeType.DELETE.equals(path.changeType)) {
@@ -698,15 +682,27 @@
 					if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
 						// read the blob content
 						String str = JGitUtils.getStringContent(repository, commit.getTree(),
-								path.path);
-						doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
-						writer.addDocument(doc);
+								path.path, encodings);
+						if (str != null) {
+							doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
+							writer.addDocument(doc);
+						}
 					}
 				}
 			}
 			writer.commit();
-
-			Document doc = createDocument(commit, null);
+			
+			// get any annotated commit tags
+			List<String> commitTags = new ArrayList<String>();
+			for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
+				if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
+					commitTags.add(ref.displayName);
+				}
+			}
+			
+			// create and write the Lucene document
+			Document doc = createDocument(commit, commitTags);
+			doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
 			result.commitCount++;
 			result.success = index(repositoryName, doc);
 		} catch (Exception e) {
@@ -741,8 +737,9 @@
 	 * @param repositoryName
 	 * @param issueId
 	 * @throws Exception
+	 * @return true, if deleted, false if no record was deleted
 	 */
-	private void deleteIssue(String repositoryName, String issueId) throws Exception {
+	private boolean deleteIssue(String repositoryName, String issueId) throws Exception {
 		BooleanQuery query = new BooleanQuery();
 		Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.issue.name());
 		query.add(new TermQuery(objectTerm), Occur.MUST);
@@ -750,8 +747,17 @@
 		query.add(new TermQuery(issueidTerm), Occur.MUST);
 		
 		IndexWriter writer = getIndexWriter(repositoryName);
+		int numDocsBefore = writer.numDocs();
 		writer.deleteDocuments(query);
 		writer.commit();
+		int numDocsAfter = writer.numDocs();
+		if (numDocsBefore == numDocsAfter) {
+			logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
+			return false;
+		} else {
+			logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
+			return true;
+		}
 	}
 	
 	/**
@@ -761,29 +767,39 @@
 	 * @param branch
 	 * @param path
 	 * @throws Exception
+	 * @return true, if deleted, false if no record was deleted
 	 */
-	private void deleteBlob(String repositoryName, String branch, String path) throws Exception {
-		BooleanQuery query = new BooleanQuery();
-		Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.blob.name());
-		query.add(new TermQuery(objectTerm), Occur.MUST);
-		Term branchTerm = new Term(FIELD_BRANCH, branch);
-		query.add(new TermQuery(branchTerm), Occur.MUST);
-		Term pathTerm = new Term(FIELD_PATH, path);
-		query.add(new TermQuery(pathTerm), Occur.MUST);
+	public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
+		String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
+		String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);
 		
+		BooleanQuery query = new BooleanQuery();
+		StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
+		QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
+		query.add(qp.parse(q), Occur.MUST);
+
 		IndexWriter writer = getIndexWriter(repositoryName);
-		writer.deleteDocuments(query);
+		int numDocsBefore = writer.numDocs();
+		writer.deleteDocuments(query);		
 		writer.commit();
+		int numDocsAfter = writer.numDocs();
+		if (numDocsBefore == numDocsAfter) {
+			logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
+			return false;
+		} else {
+			logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
+			return true;
+		}
 	}
 
 	/**
 	 * Updates a repository index incrementally from the last indexed commits.
 	 * 
-	 * @param repositoryName
+	 * @param model
 	 * @param repository
 	 * @return IndexResult
 	 */
-	protected IndexResult updateIndex(String repositoryName, Repository repository) {
+	private IndexResult updateIndex(RepositoryModel model, Repository repository) {
 		IndexResult result = new IndexResult();
 		try {
 			FileBasedConfig config = getConfig(repository);
@@ -811,14 +827,55 @@
 				deletedBranches.add(branch);
 			}
 
-			// walk through each branches
+			// get the local branches
 			List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
+			
+			// sort them by most recently updated
+			Collections.sort(branches, new Comparator<RefModel>() {
+				@Override
+				public int compare(RefModel ref1, RefModel ref2) {
+					return ref2.getDate().compareTo(ref1.getDate());
+				}
+			});
+						
+			// reorder default branch to first position
+			RefModel defaultBranch = null;
+			ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
+			for (RefModel branch :  branches) {
+				if (branch.getObjectId().equals(defaultBranchId)) {
+					defaultBranch = branch;
+					break;
+				}
+			}
+			branches.remove(defaultBranch);
+			branches.add(0, defaultBranch);
+			
+			// walk through each branches
 			for (RefModel branch : branches) {
 				String branchName = branch.getName();
 
+				boolean indexBranch = false;
+				if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
+						&& branch.equals(defaultBranch)) {
+					// indexing "default" branch
+					indexBranch = true;
+				} else if (IssueUtils.GB_ISSUES.equals(branch)) {
+					// update issues modified on the GB_ISSUES branch
+					// note: this is different than reindex
+					indexBranch = true;
+				} else {
+					// normal explicit branch check
+					indexBranch = model.indexedBranches.contains(branch.getName());
+				}
+				
+				// if this branch is not specifically indexed then skip
+				if (!indexBranch) {
+					continue;
+				}
+				
 				// remove this branch from the deletedBranches set
 				deletedBranches.remove(branchName);
-
+				
 				// determine last commit
 				String keyName = getBranchKey(branchName);
 				String lastCommit = config.getString(CONF_BRANCH, null, keyName);
@@ -836,10 +893,35 @@
 					result.branchCount += 1;
 				}
 				
+				// track the issue ids that we have already indexed
+				Set<String> indexedIssues = new TreeSet<String>();
+				
 				// reverse the list of commits so we start with the first commit				
 				Collections.reverse(revs);
-				for (RevCommit commit : revs) {
-					result.add(index(repositoryName, repository, branchName, commit));					
+				for (RevCommit commit : revs) {					
+					if (IssueUtils.GB_ISSUES.equals(branch)) {
+						// only index an issue once during updateIndex
+						String issueId = commit.getShortMessage().substring(2).trim();
+						if (indexedIssues.contains(issueId)) {
+							continue;
+						}
+						indexedIssues.add(issueId);
+						
+						IssueModel issue = IssueUtils.getIssue(repository, issueId);
+						if (issue == null) {
+							// issue was deleted, remove from index
+							if (!deleteIssue(model.name, issueId)) {
+								logger.error(MessageFormat.format("Failed to delete issue {0} from Lucene index!", issueId));
+							}
+						} else {
+							// issue was updated
+							index(model.name, issue);
+							result.issueCount++;
+						}
+					} else {
+						// index a commit
+						result.add(index(model.name, repository, branchName, commit));
+					}
 				}
 
 				// update the config
@@ -853,14 +935,14 @@
 			// unless a branch really was deleted and no longer exists
 			if (deletedBranches.size() > 0) {
 				for (String branch : deletedBranches) {
-					IndexWriter writer = getIndexWriter(repositoryName);
+					IndexWriter writer = getIndexWriter(model.name);
 					writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
 					writer.commit();
 				}
 			}
 			result.success = true;
 		} catch (Throwable t) {
-			logger.error(MessageFormat.format("Exception while updating {0} Lucene index", repositoryName), t);
+			logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
 		}
 		return result;
 	}
@@ -928,8 +1010,8 @@
 		try {			
 			IndexWriter writer = getIndexWriter(repositoryName);
 			writer.addDocument(doc);
-			resetIndexSearcher(repositoryName);
 			writer.commit();
+			resetIndexSearcher(repositoryName);
 			return true;
 		} catch (Exception e) {
 			logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
@@ -937,8 +1019,10 @@
 		return false;
 	}
 
-	private SearchResult createSearchResult(Document doc, float score) throws ParseException {
+	private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
 		SearchResult result = new SearchResult();
+		result.hitId = hitId;
+		result.totalHits = totalHits;
 		result.score = score;
 		result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
 		result.summary = doc.get(FIELD_SUMMARY);		
@@ -961,7 +1045,7 @@
 	private synchronized void resetIndexSearcher(String repository) throws IOException {
 		IndexSearcher searcher = searchers.remove(repository);
 		if (searcher != null) {
-			searcher.close();
+			searcher.getIndexReader().close();
 		}
 	}
 
@@ -991,8 +1075,8 @@
 	 * @throws IOException
 	 */
 	private IndexWriter getIndexWriter(String repository) throws IOException {
-		IndexWriter indexWriter = writers.get(repository);		
-		File repositoryFolder = new File(repositoriesFolder, repository);
+		IndexWriter indexWriter = writers.get(repository);				
+		File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
 		File indexFolder = new File(repositoryFolder, LUCENE_DIR);
 		Directory directory = FSDirectory.open(indexFolder);		
 
@@ -1014,19 +1098,21 @@
 	 * 
 	 * @param text
 	 *            if the text is null or empty, null is returned
-	 * @param maximumHits
-	 *            the maximum number of hits to collect
+	 * @param page
+	 *            the page number to retrieve. page is 1-indexed.
+	 * @param pageSize
+	 *            the number of elements to return for this page
 	 * @param repositories
 	 *            a list of repositories to search. if no repositories are
 	 *            specified null is returned.
 	 * @return a list of SearchResults in order from highest to the lowest score
 	 * 
 	 */
-	public List<SearchResult> search(String text, int maximumHits, List<String> repositories) {
+	public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
 		if (ArrayUtils.isEmpty(repositories)) {
 			return null;
 		}
-		return search(text, maximumHits, repositories.toArray(new String[0]));
+		return search(text, page, pageSize, repositories.toArray(new String[0]));
 	}
 	
 	/**
@@ -1034,15 +1120,17 @@
 	 * 
 	 * @param text
 	 *            if the text is null or empty, null is returned
-	 * @param maximumHits
-	 *            the maximum number of hits to collect
+	 * @param page
+	 *            the page number to retrieve. page is 1-indexed.
+	 * @param pageSize
+	 *            the number of elements to return for this page
 	 * @param repositories
 	 *            a list of repositories to search. if no repositories are
 	 *            specified null is returned.
 	 * @return a list of SearchResults in order from highest to the lowest score
 	 * 
-	 */	
-	public List<SearchResult> search(String text, int maximumHits, String... repositories) {
+	 */
+	public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
 		if (StringUtils.isEmpty(text)) {
 			return null;
 		}
@@ -1062,7 +1150,7 @@
 			qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
 			qp.setAllowLeadingWildcard(true);
 			query.add(qp.parse(text), Occur.SHOULD);
-
+			
 			IndexSearcher searcher;
 			if (repositories.length == 1) {
 				// single repository search
@@ -1078,15 +1166,19 @@
 				MultiSourceReader reader = new MultiSourceReader(rdrs);
 				searcher = new IndexSearcher(reader);
 			}
+			
 			Query rewrittenQuery = searcher.rewrite(query);
-			TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true);
+			logger.debug(rewrittenQuery.toString());
+
+			TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
 			searcher.search(rewrittenQuery, collector);
-			ScoreDoc[] hits = collector.topDocs().scoreDocs;
+			int offset = Math.max(0, (page - 1) * pageSize);
+			ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
+			int totalHits = collector.getTotalHits();
 			for (int i = 0; i < hits.length; i++) {
 				int docId = hits[i].doc;
 				Document doc = searcher.doc(docId);
-				// TODO identify the source index for the doc, then eliminate FIELD_REPOSITORY				
-				SearchResult result = createSearchResult(doc, hits[i].score);
+				SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
 				if (repositories.length == 1) {
 					// single repository search
 					result.repository = repositories[0];
@@ -1118,42 +1210,86 @@
 	 */
 	private String getHighlightedFragment(Analyzer analyzer, Query query,
 			String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
-		content = content == null ? "":StringUtils.escapeForHtml(content, false);
-		
+		if (content == null) {
+			content = "";
+		}		
+
+		int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
+
 		QueryScorer scorer = new QueryScorer(query, "content");
-		Fragmenter fragmenter;
-		
-		// TODO improve the fragmenter - hopefully on line breaks
-		if (SearchObjectType.commit == result.type) {
-			fragmenter = new SimpleSpanFragmenter(scorer, 1024); 
-		} else {
-			fragmenter = new SimpleSpanFragmenter(scorer, 150);
-		}
+		Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); 
 
 		// use an artificial delimiter for the token
-		String termTag = "<!--[";
-		String termTagEnd = "]-->";
+		String termTag = "!!--[";
+		String termTagEnd = "]--!!";
 		SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
 		Highlighter highlighter = new Highlighter(formatter, scorer);		
 		highlighter.setTextFragmenter(fragmenter);
-		
-		String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 5);
+
+		String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
 		if (ArrayUtils.isEmpty(fragments)) {
 			if (SearchObjectType.blob  == result.type) {
 				return "";
 			}
-			return "<pre class=\"text\">" + content + "</pre>";
+			// clip commit message
+			String fragment = content;
+			if (fragment.length() > fragmentLength) {
+				fragment = fragment.substring(0, fragmentLength) + "...";
+			}
+			return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>";
 		}
+		
+		// make sure we have unique fragments
+		Set<String> uniqueFragments = new LinkedHashSet<String>();
+		for (String fragment : fragments) {
+			uniqueFragments.add(fragment);
+		}
+		fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);
+		
 		StringBuilder sb = new StringBuilder();
 		for (int i = 0, len = fragments.length; i < len; i++) {
 			String fragment = fragments[i];
-			
+			String tag = "<pre class=\"text\">";
+
 			// resurrect the raw fragment from removing the artificial delimiters
-			String raw = fragment.replace(termTag, "").replace(termTagEnd, "");			
-			sb.append(getPreTag(result, raw, content));
+			String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
+
+			// determine position of the raw fragment in the content
+			int pos = content.indexOf(raw);
+				
+			// restore complete first line of fragment
+			int c = pos;
+			while (c > 0) {
+				c--;
+				if (content.charAt(c) == '\n') {
+					break;
+				}
+			}
+			if (c > 0) {
+				// inject leading chunk of first fragment line
+				fragment = content.substring(c + 1, pos) + fragment;
+			}
+				
+			if (SearchObjectType.blob  == result.type) {
+				// count lines as offset into the content for this fragment
+				int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
+				
+				// create fragment tag with line number and language
+				String lang = "";
+				String ext = StringUtils.getFileExtension(result.path).toLowerCase();
+				if (!StringUtils.isEmpty(ext)) {
+					// maintain leading space!
+					lang = " lang-" + ext;
+				}
+				tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
+								
+			}
 			
+			sb.append(tag);
+
 			// replace the artificial delimiter with html tags
-			String html = fragment.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
+			String html = StringUtils.escapeForHtml(fragment, false);
+			html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
 			sb.append(html);
 			sb.append("</pre>");
 			if (i < len - 1) {
@@ -1161,31 +1297,7 @@
 			}
 		}
 		return sb.toString();
-	}
-	
-	/**
-	 * Returns the appropriate tag for a fragment. Commit messages are visually
-	 * differentiated from blob fragments.
-	 * 
-	 * @param result
-	 * @param fragment
-	 * @param content
-	 * @return an html tag appropriate for the fragment
-	 */
-	private String getPreTag(SearchResult result, String fragment, String content) {
-		String pre = "<pre class=\"text\">";
-		if (SearchObjectType.blob  == result.type) {
-			int line = StringUtils.countLines(content.substring(0, content.indexOf(fragment)));			
-			int lastDot = result.path.lastIndexOf('.');
-			if (lastDot > -1) {
-				String ext = result.path.substring(lastDot + 1).toLowerCase();
-				pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0} lang-{1}\">", line, ext);	
-			} else {
-				pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}\">", line);
-			}
-		}
-		return pre;
-	}
+	}	
 	
 	/**
 	 * Simple class to track the results of an index update. 

--
Gitblit v1.9.1