diff options
-rw-r--r-- | distrib/gitblit.properties | 23 | ||||
-rw-r--r-- | src/com/gitblit/Constants.java | 17 | ||||
-rw-r--r-- | src/com/gitblit/GitBlit.java | 38 | ||||
-rw-r--r-- | src/com/gitblit/GitServlet.java | 3 | ||||
-rw-r--r-- | src/com/gitblit/LuceneExecutor.java | 1178 | ||||
-rw-r--r-- | src/com/gitblit/models/SearchResult.java | 4 | ||||
-rw-r--r-- | src/com/gitblit/utils/LuceneUtils.java | 1018 | ||||
-rw-r--r-- | src/com/gitblit/wicket/pages/LucenePage.java | 23 | ||||
-rw-r--r-- | tests/com/gitblit/tests/GitBlitSuite.java | 5 | ||||
-rw-r--r-- | tests/com/gitblit/tests/IssuesTest.java | 15 | ||||
-rw-r--r-- | tests/com/gitblit/tests/LuceneExecutorTest.java (renamed from tests/com/gitblit/tests/LuceneUtilsTest.java) | 100 |
11 files changed, 1222 insertions, 1202 deletions
diff --git a/distrib/gitblit.properties b/distrib/gitblit.properties index ede5f596..18eafca9 100644 --- a/distrib/gitblit.properties +++ b/distrib/gitblit.properties @@ -92,24 +92,29 @@ groovy.preReceiveScripts = groovy.postReceiveScripts =
# If true, a Lucene index will be generated and maintained for each repository.
-# Lucene search replaces brute-force Git repository traversal.
+# Lucene search replaces brute-force Git repository traversal. Initial indexing
+# or reindexing of a repository can be memory intensive so be advised that you
+# may need to adjust your JVM heap setting accordingly (e.g. -Xmx1024M)
#
# SINCE 0.9.0
# RESTART REQUIRED
lucene.enable = false
-# If *lucene.pollingMode* = true, Gitblit will periodically check all repositories
-# for branch updates.
-# If *lucene.pollingMode* = false, repositories will only be indexed on pushes
-# to Gitblit.
+# This value specifies the idle period for the Lucene executor to wait between
+# repository ref checks. If refs have been changed since the last check, the
+# executor will incrementally index the changes.
#
-# Regardless of this setting, Gitblit will check all repositories for branch
-# updates 1 minute after startup. Indexes will automatically be built for any
-# repository that is missing its index or if an index version change is detected.
+# Gitblit will check all repositories for branch updates 1 minute after startup.
+# Indexes will automatically be built for any repository that is missing its index
+# or if an index version change is detected.
+#
+# The shortest frequency allowed is every 2 minutes
+# Decimal frequency values are cast to integers
+# Frequency values may be specified in mins, hours, or days
#
# SINCE 0.9.0
# RESTART REQUIRED
-lucene.pollingMode = false
+lucene.frequency = 2 mins
#
# Authentication Settings
diff --git a/src/com/gitblit/Constants.java b/src/com/gitblit/Constants.java index 3f823de1..54a6db3f 100644 --- a/src/com/gitblit/Constants.java +++ b/src/com/gitblit/Constants.java @@ -15,6 +15,7 @@ */
package com.gitblit;
+
/**
* Constant values used by Gitblit.
*
@@ -258,4 +259,20 @@ public class Constants { return name().toLowerCase();
}
}
+
+ /**
+ * The types of objects that can be indexed and queried.
+ */
+ public static enum SearchObjectType {
+ commit, blob, issue;
+
+ static SearchObjectType fromName(String name) {
+ for (SearchObjectType value : values()) {
+ if (value.name().equals(name)) {
+ return value;
+ }
+ }
+ return null;
+ }
+ }
}
diff --git a/src/com/gitblit/GitBlit.java b/src/com/gitblit/GitBlit.java index e2240256..3bda06fb 100644 --- a/src/com/gitblit/GitBlit.java +++ b/src/com/gitblit/GitBlit.java @@ -73,6 +73,7 @@ import com.gitblit.models.FederationProposal; import com.gitblit.models.FederationSet;
import com.gitblit.models.Metric;
import com.gitblit.models.RepositoryModel;
+import com.gitblit.models.SearchResult;
import com.gitblit.models.ServerSettings;
import com.gitblit.models.ServerStatus;
import com.gitblit.models.SettingModel;
@@ -86,6 +87,7 @@ import com.gitblit.utils.JsonUtils; import com.gitblit.utils.MetricUtils;
import com.gitblit.utils.ObjectCache;
import com.gitblit.utils.StringUtils;
+import com.gitblit.utils.TimeUtils;
/**
* GitBlit is the servlet context listener singleton that acts as the core for
@@ -1646,6 +1648,19 @@ public class GitBlit implements ServletContextListener { }
return scripts;
}
+
+ /**
+ * Search the specified repositories using the Lucene query.
+ *
+ * @param query
+ * @param maximumHits
+ * @param repositories
+ * @return
+ */
+ public List<SearchResult> search(String query, int maximumHits, List<String> repositories) {
+ List<SearchResult> srs = luceneExecutor.search(query, maximumHits, repositories);
+ return srs;
+ }
/**
* Notify the administrators by email.
@@ -1698,15 +1713,6 @@ public class GitBlit implements ServletContextListener { }
/**
- * Update the Lucene index of a repository.
- *
- * @param repository
- */
- public void updateLuceneIndex(RepositoryModel repository) {
- luceneExecutor.queue(repository);
- }
-
- /**
* Returns the descriptions/comments of the Gitblit config settings.
*
* @return SettingsModel
@@ -1823,12 +1829,18 @@ public class GitBlit implements ServletContextListener { } else {
logger.warn("Mail server is not properly configured. Mail services disabled.");
}
- luceneExecutor = new LuceneExecutor(settings);
+ luceneExecutor = new LuceneExecutor(settings, repositoriesFolder);
if (luceneExecutor.isReady()) {
- logger.info("Lucene executor is scheduled to process the repository queue every 2 minutes.");
- scheduledExecutor.scheduleAtFixedRate(luceneExecutor, 1, 2, TimeUnit.MINUTES);
+ String idle = settings.getString(Keys.lucene.frequency, "2 mins");
+ int mins = TimeUtils.convertFrequencyToMinutes(idle);
+ if (mins <= 2) {
+ mins = 2;
+ idle = mins + " mins";
+ }
+ logger.info("Lucene executor is scheduled to process ref changes every " + idle);
+ scheduledExecutor.scheduleAtFixedRate(luceneExecutor, 1, mins, TimeUnit.MINUTES);
} else {
- logger.warn("Lucene executor is disabled.");
+ logger.warn("Lucene integration is disabled.");
}
if (startFederation) {
configureFederation();
diff --git a/src/com/gitblit/GitServlet.java b/src/com/gitblit/GitServlet.java index 473e4c90..3b60e9f1 100644 --- a/src/com/gitblit/GitServlet.java +++ b/src/com/gitblit/GitServlet.java @@ -197,9 +197,6 @@ public class GitServlet extends org.eclipse.jgit.http.server.GitServlet { // Experimental
// runNativeScript(rp, "hooks/post-receive", commands);
-
- // Update the Lucene search index
- GitBlit.self().updateLuceneIndex(repository);
}
/**
diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java index c9e4c73e..527609e9 100644 --- a/src/com/gitblit/LuceneExecutor.java +++ b/src/com/gitblit/LuceneExecutor.java @@ -15,47 +15,138 @@ */
package com.gitblit;
+import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
import java.text.MessageFormat;
-import java.util.HashSet;
-import java.util.Queue;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
import java.util.Set;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.concurrent.ConcurrentHashMap;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.DateTools;
+import org.apache.lucene.document.DateTools.Resolution;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.search.highlight.Fragmenter;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
+import org.eclipse.jgit.diff.DiffEntry.ChangeType;
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectLoader;
+import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.eclipse.jgit.revwalk.RevTree;
+import org.eclipse.jgit.revwalk.RevWalk;
+import org.eclipse.jgit.storage.file.FileBasedConfig;
+import org.eclipse.jgit.treewalk.EmptyTreeIterator;
+import org.eclipse.jgit.treewalk.TreeWalk;
+import org.eclipse.jgit.util.FS;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.gitblit.models.RepositoryModel;
+import com.gitblit.Constants.SearchObjectType;
+import com.gitblit.models.IssueModel;
+import com.gitblit.models.IssueModel.Attachment;
+import com.gitblit.models.PathModel.PathChangeModel;
+import com.gitblit.models.RefModel;
+import com.gitblit.models.SearchResult;
+import com.gitblit.utils.ArrayUtils;
+import com.gitblit.utils.IssueUtils;
import com.gitblit.utils.JGitUtils;
-import com.gitblit.utils.LuceneUtils;
-import com.gitblit.utils.LuceneUtils.IndexResult;
+import com.gitblit.utils.StringUtils;
/**
- * The Lucene executor handles indexing repositories synchronously and
- * asynchronously from a queue.
+ * The Lucene executor handles indexing and searching repositories.
*
* @author James Moger
*
*/
public class LuceneExecutor implements Runnable {
+
+
+ private static final int INDEX_VERSION = 1;
- private final Logger logger = LoggerFactory.getLogger(LuceneExecutor.class);
-
- private final Queue<String> queue = new ConcurrentLinkedQueue<String>();
-
- private final IStoredSettings settings;
-
- private final boolean isLuceneEnabled;
+ private static final String FIELD_OBJECT_TYPE = "type";
+ private static final String FIELD_ISSUE = "issue";
+ private static final String FIELD_PATH = "path";
+ private static final String FIELD_COMMIT = "commit";
+ private static final String FIELD_BRANCH = "branch";
+ private static final String FIELD_REPOSITORY = "repository";
+ private static final String FIELD_SUMMARY = "summary";
+ private static final String FIELD_CONTENT = "content";
+ private static final String FIELD_AUTHOR = "author";
+ private static final String FIELD_COMMITTER = "committer";
+ private static final String FIELD_DATE = "date";
+ private static final String FIELD_TAG = "tag";
+ private static final String FIELD_LABEL = "label";
+ private static final String FIELD_ATTACHMENT = "attachment";
- private final boolean isPollingMode;
-
- private final AtomicBoolean firstRun = new AtomicBoolean(true);
+ private static final String CONF_FILE = "lucene.conf";
+ private static final String LUCENE_DIR = "lucene";
+ private static final String CONF_INDEX = "index";
+ private static final String CONF_VERSION = "version";
+ private static final String CONF_ALIAS = "aliases";
+ private static final String CONF_BRANCH = "branches";
+
+ private static final Version LUCENE_VERSION = Version.LUCENE_35;
+
+ private final Logger logger = LoggerFactory.getLogger(LuceneExecutor.class);
+
+ private final IStoredSettings storedSettings;
+ private final File repositoriesFolder;
+
+ private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
+ private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
+
+ private final Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",
+ "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
+ "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
- public LuceneExecutor(IStoredSettings settings) {
- this.settings = settings;
- this.isLuceneEnabled = settings.getBoolean(Keys.lucene.enable, false);
- this.isPollingMode = settings.getBoolean(Keys.lucene.pollingMode, false);
+ private final Set<String> excludedBranches = new TreeSet<String>(
+ Arrays.asList("/refs/heads/gb-issues"));
+
+ public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {
+ this.storedSettings = settings;
+ this.repositoriesFolder = repositoriesFolder;
}
/**
@@ -64,70 +155,33 @@ public class LuceneExecutor implements Runnable { * @return true if the Lucene executor is ready to index repositories
*/
public boolean isReady() {
- return isLuceneEnabled;
- }
-
- /**
- * Returns the status of the Lucene queue.
- *
- * @return true, if the queue is empty
- */
- public boolean hasEmptyQueue() {
- return queue.isEmpty();
+ return storedSettings.getBoolean(Keys.lucene.enable, false);
}
/**
- * Queues a repository to be asynchronously indexed.
- *
- * @param repository
- * @return true if the repository was queued
+ * Run is executed by the gitblit executor service at whatever frequency
+ * is specified in the settings. Because this is called by an executor
+ * service, calls will queue - i.e. there can never be concurrent execution
+ * of repository index updates.
*/
- public boolean queue(RepositoryModel repository) {
- if (!isReady()) {
- return false;
- }
- queue.add(repository.name);
- return true;
- }
-
@Override
public void run() {
- if (!isLuceneEnabled) {
+ if (!isReady()) {
return;
}
- if (firstRun.get() || isPollingMode) {
- // update all indexes on first run or if polling mode
- firstRun.set(false);
- queue.addAll(GitBlit.self().getRepositoryList());
- }
-
- Set<String> processed = new HashSet<String>();
- if (!queue.isEmpty()) {
- // update the repository Lucene index
- String name = null;
- while ((name = queue.poll()) != null) {
- if (processed.contains(name)) {
- // skipping multi-queued repository
- continue;
- }
- try {
- Repository repository = GitBlit.self().getRepository(name);
- if (repository == null) {
- logger.warn(MessageFormat.format(
- "Lucene executor could not find repository {0}. Skipping.",
- name));
- continue;
- }
- index(name, repository);
- repository.close();
- System.gc();
- processed.add(name);
- } catch (Throwable e) {
- logger.error(MessageFormat.format("Failed to update {0} Lucene index",
- name), e);
- }
+ for (String repositoryName : GitBlit.self().getRepositoryList()) {
+ Repository repository = GitBlit.self().getRepository(repositoryName);
+ if (repository == null) {
+ logger.warn(MessageFormat.format(
+ "Lucene executor could not find repository {0}. Skipping.",
+ repositoryName));
+ continue;
}
+ // TODO allow repository to bypass Lucene indexing
+ index(repositoryName, repository);
+ repository.close();
+ System.gc();
}
}
@@ -140,34 +194,31 @@ public class LuceneExecutor implements Runnable { * @param repository
* the repository object
*/
- public void index(String name, Repository repository) {
+ protected void index(String name, Repository repository) {
try {
if (JGitUtils.hasCommits(repository)) {
- if (LuceneUtils.shouldReindex(repository)) {
- // (re)build the entire index
- long start = System.currentTimeMillis();
- IndexResult result = LuceneUtils.reindex(name, repository);
- float duration = (System.currentTimeMillis() - start)/1000f;
+ if (shouldReindex(repository)) {
+ // (re)build the entire index
+ IndexResult result = reindex(name, repository);
+
if (result.success) {
if (result.commitCount > 0) {
String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
logger.info(MessageFormat.format(msg, name,
- result.commitCount, result.blobCount, result.branchCount, duration));
+ result.commitCount, result.blobCount, result.branchCount, result.duration()));
}
} else {
String msg = "Could not build {0} Lucene index!";
logger.error(MessageFormat.format(msg, name));
}
} else {
- // update the index with latest commits
- long start = System.currentTimeMillis();
- IndexResult result = LuceneUtils.updateIndex(name, repository);
- float duration = (System.currentTimeMillis() - start)/1000f;
+ // update the index with latest commits
+ IndexResult result = updateIndex(name, repository);
if (result.success) {
if (result.commitCount > 0) {
String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
logger.info(MessageFormat.format(msg, name,
- result.commitCount, result.blobCount, result.branchCount, duration));
+ result.commitCount, result.blobCount, result.branchCount, result.duration()));
}
} else {
String msg = "Could not update {0} Lucene index!";
@@ -188,6 +239,953 @@ public class LuceneExecutor implements Runnable { *
*/
public void close() {
- LuceneUtils.close();
+ // close all writers
+ for (String writer : writers.keySet()) {
+ try {
+ writers.get(writer).close(true);
+ } catch (Throwable t) {
+ logger.error("Failed to close Lucene writer for " + writer, t);
+ }
+ }
+ writers.clear();
+
+ // close all searchers
+ for (String searcher : searchers.keySet()) {
+ try {
+ searchers.get(searcher).close();
+ } catch (Throwable t) {
+ logger.error("Failed to close Lucene searcher for " + searcher, t);
+ }
+ }
+ searchers.clear();
+ }
+
+
+ /**
+ * Deletes the Lucene index for the specified repository.
+ *
+ * @param repositoryName
+ * @return true, if successful
+ */
+ public boolean deleteIndex(String repositoryName) {
+ try {
+ // remove the repository index writer from the cache and close it
+ IndexWriter writer = writers.remove(repositoryName);
+ if (writer != null) {
+ writer.close();
+ writer = null;
+ }
+ // remove the repository index searcher from the cache and close it
+ IndexSearcher searcher = searchers.remove(repositoryName);
+ if (searcher != null) {
+ searcher.close();
+ searcher = null;
+ }
+ // delete the index folder
+ File repositoryFolder = new File(repositoriesFolder, repositoryName);
+ File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
+ if (luceneIndex.exists()) {
+ org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
+ org.eclipse.jgit.util.FileUtils.RECURSIVE);
+ }
+ // delete the config file
+ File luceneConfig = new File(repositoryFolder, CONF_FILE);
+ if (luceneConfig.exists()) {
+ luceneConfig.delete();
+ }
+ return true;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+
+ /**
+ * Returns the author for the commit, if this information is available.
+ *
+ * @param commit
+ * @return an author or unknown
+ */
+ private String getAuthor(RevCommit commit) {
+ String name = "unknown";
+ try {
+ name = commit.getAuthorIdent().getName();
+ if (StringUtils.isEmpty(name)) {
+ name = commit.getAuthorIdent().getEmailAddress();
+ }
+ } catch (NullPointerException n) {
+ }
+ return name;
+ }
+
+ /**
+ * Returns the committer for the commit, if this information is available.
+ *
+ * @param commit
+ * @return an committer or unknown
+ */
+ private String getCommitter(RevCommit commit) {
+ String name = "unknown";
+ try {
+ name = commit.getCommitterIdent().getName();
+ if (StringUtils.isEmpty(name)) {
+ name = commit.getCommitterIdent().getEmailAddress();
+ }
+ } catch (NullPointerException n) {
+ }
+ return name;
+ }
+
+ /**
+ * Construct a keyname from the branch.
+ *
+ * @param branchName
+ * @return a keyname appropriate for the Git config file format
+ */
+ private String getBranchKey(String branchName) {
+ return StringUtils.getSHA1(branchName);
+ }
+
+ /**
+ * Returns the Lucene configuration for the specified repository.
+ *
+ * @param repository
+ * @return a config object
+ */
+ private FileBasedConfig getConfig(Repository repository) {
+ File file = new File(repository.getDirectory(), CONF_FILE);
+ FileBasedConfig config = new FileBasedConfig(file, FS.detect());
+ return config;
+ }
+
+ /**
+ * Reads the Lucene config file for the repository to check the index
+ * version. If the index version is different, then rebuild the repository
+ * index.
+ *
+ * @param repository
+ * @return true of the on-disk index format is different than INDEX_VERSION
+ */
+ protected boolean shouldReindex(Repository repository) {
+ try {
+ FileBasedConfig config = getConfig(repository);
+ config.load();
+ int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
+ // reindex if versions do not match
+ return indexVersion != INDEX_VERSION;
+ } catch (Throwable t) {
+ }
+ return true;
+ }
+
+
+ /**
+ * This completely indexes the repository and will destroy any existing
+ * index.
+ *
+ * @param repositoryName
+ * @param repository
+ * @return IndexResult
+ */
+ public IndexResult reindex(String repositoryName, Repository repository) {
+ IndexResult result = new IndexResult();
+ if (!deleteIndex(repositoryName)) {
+ return result;
+ }
+ try {
+ FileBasedConfig config = getConfig(repository);
+ Set<String> indexedCommits = new TreeSet<String>();
+ IndexWriter writer = getIndexWriter(repositoryName);
+ // build a quick lookup of tags
+ Map<String, List<String>> tags = new HashMap<String, List<String>>();
+ for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
+ if (!tag.isAnnotatedTag()) {
+ // skip non-annotated tags
+ continue;
+ }
+ if (!tags.containsKey(tag.getObjectId())) {
+ tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
+ }
+ tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
+ }
+
+ ObjectReader reader = repository.newObjectReader();
+
+ // get the local branches
+ List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
+
+ // sort them by most recently updated
+ Collections.sort(branches, new Comparator<RefModel>() {
+ @Override
+ public int compare(RefModel ref1, RefModel ref2) {
+ return ref2.getDate().compareTo(ref1.getDate());
+ }
+ });
+
+ // reorder default branch to first position
+ RefModel defaultBranch = null;
+ ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
+ for (RefModel branch : branches) {
+ if (branch.getObjectId().equals(defaultBranchId)) {
+ defaultBranch = branch;
+ break;
+ }
+ }
+ branches.remove(defaultBranch);
+ branches.add(0, defaultBranch);
+
+ // walk through each branch
+ for (RefModel branch : branches) {
+ if (excludedBranches.contains(branch.getName())) {
+ continue;
+ }
+
+ String branchName = branch.getName();
+ RevWalk revWalk = new RevWalk(reader);
+ RevCommit tip = revWalk.parseCommit(branch.getObjectId());
+ String tipId = tip.getId().getName();
+
+ String keyName = getBranchKey(branchName);
+ config.setString(CONF_ALIAS, null, keyName, branchName);
+ config.setString(CONF_BRANCH, null, keyName, tipId);
+
+ // index the blob contents of the tree
+ TreeWalk treeWalk = new TreeWalk(repository);
+ treeWalk.addTree(tip.getTree());
+ treeWalk.setRecursive(true);
+
+ Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
+ while (treeWalk.next()) {
+ paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
+ }
+
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
+ byte[] tmp = new byte[32767];
+
+ RevWalk commitWalk = new RevWalk(reader);
+ commitWalk.markStart(tip);
+
+ RevCommit commit;
+ while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
+ TreeWalk diffWalk = new TreeWalk(reader);
+ int parentCount = commit.getParentCount();
+ switch (parentCount) {
+ case 0:
+ diffWalk.addTree(new EmptyTreeIterator());
+ break;
+ case 1:
+ diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
+ break;
+ default:
+ // skip merge commits
+ continue;
+ }
+ diffWalk.addTree(getTree(commitWalk, commit));
+ diffWalk.setFilter(ANY_DIFF);
+ diffWalk.setRecursive(true);
+ while ((paths.size() > 0) && diffWalk.next()) {
+ String path = diffWalk.getPathString();
+ if (!paths.containsKey(path)) {
+ continue;
+ }
+
+ // remove path from set
+ ObjectId blobId = paths.remove(path);
+ result.blobCount++;
+
+ // index the blob metadata
+ String blobAuthor = getAuthor(commit);
+ String blobCommitter = getCommitter(commit);
+ String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
+ Resolution.MINUTE);
+
+ Document doc = new Document();
+ doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
+ doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));
+
+ // determine extension to compare to the extension
+ // blacklist
+ String ext = null;
+ String name = path.toLowerCase();
+ if (name.indexOf('.') > -1) {
+ ext = name.substring(name.lastIndexOf('.') + 1);
+ }
+
+ // index the blob content
+ if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
+ ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
+ InputStream in = ldr.openStream();
+ int n;
+ while ((n = in.read(tmp)) > 0) {
+ os.write(tmp, 0, n);
+ }
+ in.close();
+ byte[] content = os.toByteArray();
+ String str = new String(content, Constants.CHARACTER_ENCODING);
+ doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
+ os.reset();
+ }
+
+ // add the blob to the index
+ writer.addDocument(doc);
+ }
+ }
+
+ os.close();
+
+ // index the tip commit object
+ if (indexedCommits.add(tipId)) {
+ Document doc = createDocument(tip, tags.get(tipId));
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
+ writer.addDocument(doc);
+ result.commitCount += 1;
+ result.branchCount += 1;
+ }
+
+ // traverse the log and index the previous commit objects
+ RevWalk historyWalk = new RevWalk(reader);
+ historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
+ RevCommit rev;
+ while ((rev = historyWalk.next()) != null) {
+ String hash = rev.getId().getName();
+ if (indexedCommits.add(hash)) {
+ Document doc = createDocument(rev, tags.get(hash));
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
+ writer.addDocument(doc);
+ result.commitCount += 1;
+ }
+ }
+ }
+
+ // finished
+ reader.release();
+
+ // this repository has a gb-issues branch, index all issues
+ if (IssueUtils.getIssuesBranch(repository) != null) {
+ List<IssueModel> issues = IssueUtils.getIssues(repository, null);
+ if (issues.size() > 0) {
+ result.branchCount += 1;
+ }
+ for (IssueModel issue : issues) {
+ result.issueCount++;
+ Document doc = createDocument(issue);
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
+ writer.addDocument(doc);
+ }
+ }
+
+ // commit all changes and reset the searcher
+ config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
+ config.save();
+ resetIndexSearcher(repositoryName);
+ writer.commit();
+ result.success();
+ } catch (Exception e) {
+ logger.error("Exception while reindexing " + repositoryName, e);
+ }
+ return result;
+ }
+
+ /**
+ * Get the tree associated with the given commit.
+ *
+ * @param walk
+ * @param commit
+ * @return tree
+ * @throws IOException
+ */
+ protected RevTree getTree(final RevWalk walk, final RevCommit commit)
+ throws IOException {
+ final RevTree tree = commit.getTree();
+ if (tree != null) {
+ return tree;
+ }
+ walk.parseHeaders(commit);
+ return commit.getTree();
+ }
+
+ /**
+ * Incrementally update the index with the specified commit for the
+ * repository.
+ *
+ * @param repositoryName
+ * @param repository
+ * @param branch
+ * the fully qualified branch name (e.g. refs/heads/master)
+ * @param commit
+ * @return true, if successful
+ */
+ private IndexResult index(String repositoryName, Repository repository,
+ String branch, RevCommit commit) {
+ IndexResult result = new IndexResult();
+ try {
+ if (excludedBranches.contains(branch)) {
+ if (IssueUtils.GB_ISSUES.equals(branch)) {
+ // index an issue
+ String issueId = commit.getShortMessage().substring(2).trim();
+ IssueModel issue = IssueUtils.getIssue(repository, issueId);
+ if (issue == null) {
+ // issue was deleted, remove from index
+ deleteIssue(repositoryName, issueId);
+ result.success = true;
+ return result;
+ }
+ result.success = index(repositoryName, issue);
+ result.issueCount++;
+ return result;
+
+ }
+ return result;
+ }
+ List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
+ String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
+ Resolution.MINUTE);
+ IndexWriter writer = getIndexWriter(repositoryName);
+ for (PathChangeModel path : changedPaths) {
+ // delete the indexed blob
+ deleteBlob(repositoryName, branch, path.path);
+
+ // re-index the blob
+ if (!ChangeType.DELETE.equals(path.changeType)) {
+ result.blobCount++;
+ Document doc = new Document();
+ doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
+ Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
+ doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
+
+ // determine extension to compare to the extension
+ // blacklist
+ String ext = null;
+ String name = path.name.toLowerCase();
+ if (name.indexOf('.') > -1) {
+ ext = name.substring(name.lastIndexOf('.') + 1);
+ }
+
+ if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
+ // read the blob content
+ String str = JGitUtils.getStringContent(repository, commit.getTree(),
+ path.path);
+ doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
+ writer.addDocument(doc);
+ }
+ }
+ }
+ writer.commit();
+
+ Document doc = createDocument(commit, null);
+ result.commitCount++;
+ result.success = index(repositoryName, doc);
+ } catch (Exception e) {
+ logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
+ }
+ return result;
+ }
+
+ /**
+ * Incrementally update the index with the specified issue for the
+ * repository.
+ *
+ * @param repositoryName
+ * @param issue
+ * @return true, if successful
+ */
+ public boolean index(String repositoryName, IssueModel issue) {
+ try {
+ // delete the old issue from the index, if exists
+ deleteIssue(repositoryName, issue.id);
+ Document doc = createDocument(issue);
+ return index(repositoryName, doc);
+ } catch (Exception e) {
+ logger.error(MessageFormat.format("Error while indexing issue {0} in {1}", issue.id, repositoryName), e);
+ }
+ return false;
+ }
+
+ /**
+ * Delete an issue from the repository index.
+ *
+ * @param repositoryName
+ * @param issueId
+ * @throws Exception
+ */
+ private void deleteIssue(String repositoryName, String issueId) throws Exception {
+ BooleanQuery query = new BooleanQuery();
+ Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.issue.name());
+ query.add(new TermQuery(objectTerm), Occur.MUST);
+ Term issueidTerm = new Term(FIELD_ISSUE, issueId);
+ query.add(new TermQuery(issueidTerm), Occur.MUST);
+
+ IndexWriter writer = getIndexWriter(repositoryName);
+ writer.deleteDocuments(query);
+ writer.commit();
+ }
+
+ /**
+ * Delete a blob from the specified branch of the repository index.
+ *
+ * @param repositoryName
+ * @param branch
+ * @param path
+ * @throws Exception
+ */
+ private void deleteBlob(String repositoryName, String branch, String path) throws Exception {
+ BooleanQuery query = new BooleanQuery();
+ Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.blob.name());
+ query.add(new TermQuery(objectTerm), Occur.MUST);
+ Term branchTerm = new Term(FIELD_BRANCH, branch);
+ query.add(new TermQuery(branchTerm), Occur.MUST);
+ Term pathTerm = new Term(FIELD_PATH, path);
+ query.add(new TermQuery(pathTerm), Occur.MUST);
+
+ IndexWriter writer = getIndexWriter(repositoryName);
+ writer.deleteDocuments(query);
+ writer.commit();
+ }
+
+ /**
+ * Updates a repository index incrementally from the last indexed commits.
+ *
+ * @param repositoryName
+ * @param repository
+ * @return IndexResult
+ */
+ protected IndexResult updateIndex(String repositoryName, Repository repository) {
+ IndexResult result = new IndexResult();
+ try {
+ FileBasedConfig config = getConfig(repository);
+ config.load();
+
+ // build a quick lookup of annotated tags
+ Map<String, List<String>> tags = new HashMap<String, List<String>>();
+ for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
+ if (!tag.isAnnotatedTag()) {
+ // skip non-annotated tags
+ continue;
+ }
+ if (!tags.containsKey(tag.getObjectId())) {
+ tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
+ }
+ tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
+ }
+
+ // detect branch deletion
+ // first assume all branches are deleted and then remove each
+ // existing branch from deletedBranches during indexing
+ Set<String> deletedBranches = new TreeSet<String>();
+ for (String alias : config.getNames(CONF_ALIAS)) {
+ String branch = config.getString(CONF_ALIAS, null, alias);
+ deletedBranches.add(branch);
+ }
+
+ // walk through each branches
+ List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
+ for (RefModel branch : branches) {
+ String branchName = branch.getName();
+
+ // remove this branch from the deletedBranches set
+ deletedBranches.remove(branchName);
+
+ // determine last commit
+ String keyName = getBranchKey(branchName);
+ String lastCommit = config.getString(CONF_BRANCH, null, keyName);
+
+ List<RevCommit> revs;
+ if (StringUtils.isEmpty(lastCommit)) {
+ // new branch/unindexed branch, get all commits on branch
+ revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
+ } else {
+ // pre-existing branch, get changes since last commit
+ revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
+ }
+
+ if (revs.size() > 0) {
+ result.branchCount += 1;
+ }
+
+ // reverse the list of commits so we start with the first commit
+ Collections.reverse(revs);
+ for (RevCommit commit : revs) {
+ result.add(index(repositoryName, repository, branchName, commit));
+ }
+
+ // update the config
+ config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
+ config.setString(CONF_ALIAS, null, keyName, branchName);
+ config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
+ config.save();
+ }
+
+ // the deletedBranches set will normally be empty by this point
+ // unless a branch really was deleted and no longer exists
+ if (deletedBranches.size() > 0) {
+ for (String branch : deletedBranches) {
+ IndexWriter writer = getIndexWriter(repositoryName);
+ writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
+ writer.commit();
+ }
+ }
+ result.success = true;
+ } catch (Throwable t) {
+ logger.error(MessageFormat.format("Exception while updating {0} Lucene index", repositoryName), t);
+ }
+ return result;
+ }
+
+ /**
+ * Creates a Lucene document from an issue.
+ *
+ * @param issue
+ * @return a Lucene document
+ */
+ private Document createDocument(IssueModel issue) {
+ Document doc = new Document();
+ doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.issue.name(), Store.YES,
+ Field.Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),
+ Store.YES, Field.Index.NO));
+ doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.ANALYZED));
+ List<String> attachments = new ArrayList<String>();
+ for (Attachment attachment : issue.getAttachments()) {
+ attachments.add(attachment.name.toLowerCase());
+ }
+ doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES,
+ Index.ANALYZED));
+ doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES,
+ Index.ANALYZED));
+ return doc;
+ }
+
+ /**
+ * Creates a Lucene document for a commit
+ *
+ * @param commit
+ * @param tags
+ * @return a Lucene document
+ */
+ private Document createDocument(RevCommit commit, List<String> tags) {
+ Document doc = new Document();
+ doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), Store.YES,
+ Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
+ Resolution.MINUTE), Store.YES, Index.NO));
+ doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED));
+ doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.YES, Index.ANALYZED));
+ if (!ArrayUtils.isEmpty(tags)) {
+ doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), Store.YES, Index.ANALYZED));
+ }
+ return doc;
+ }
+
+ /**
+ * Incrementally index an object for the repository.
+ *
+ * @param repositoryName
+ * @param doc
+ * @return true, if successful
+ */
+ private boolean index(String repositoryName, Document doc) {
+ try {
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));
+ IndexWriter writer = getIndexWriter(repositoryName);
+ writer.addDocument(doc);
+ resetIndexSearcher(repositoryName);
+ writer.commit();
+ return true;
+ } catch (Exception e) {
+ logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
+ }
+ return false;
+ }
+
+ private SearchResult createSearchResult(Document doc, float score) throws ParseException {
+ SearchResult result = new SearchResult();
+ result.score = score;
+ result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
+ result.summary = doc.get(FIELD_SUMMARY);
+ result.author = doc.get(FIELD_AUTHOR);
+ result.committer = doc.get(FIELD_COMMITTER);
+ result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
+ result.repository = doc.get(FIELD_REPOSITORY);
+ result.branch = doc.get(FIELD_BRANCH);
+ result.commitId = doc.get(FIELD_COMMIT);
+ result.issueId = doc.get(FIELD_ISSUE);
+ result.path = doc.get(FIELD_PATH);
+ if (doc.get(FIELD_TAG) != null) {
+ result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
+ }
+ if (doc.get(FIELD_LABEL) != null) {
+ result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));
+ }
+ return result;
+ }
+
+ private synchronized void resetIndexSearcher(String repository) throws IOException {
+ IndexSearcher searcher = searchers.remove(repository);
+ if (searcher != null) {
+ searcher.close();
+ }
+ }
+
+ /**
+ * Gets an index searcher for the repository.
+ *
+ * @param repository
+ * @return
+ * @throws IOException
+ */
+ private IndexSearcher getIndexSearcher(String repository) throws IOException {
+ IndexSearcher searcher = searchers.get(repository);
+ if (searcher == null) {
+ IndexWriter writer = getIndexWriter(repository);
+ searcher = new IndexSearcher(IndexReader.open(writer, true));
+ searchers.put(repository, searcher);
+ }
+ return searcher;
+ }
+
+ /**
+ * Gets an index writer for the repository. The index will be created if it
+ * does not already exist or if forceCreate is specified.
+ *
+ * @param repository
+ * @return an IndexWriter
+ * @throws IOException
+ */
+ private IndexWriter getIndexWriter(String repository) throws IOException {
+ IndexWriter indexWriter = writers.get(repository);
+ File repositoryFolder = new File(repositoriesFolder, repository);
+ File indexFolder = new File(repositoryFolder, LUCENE_DIR);
+ Directory directory = FSDirectory.open(indexFolder);
+
+ if (indexWriter == null) {
+ if (!indexFolder.exists()) {
+ indexFolder.mkdirs();
+ }
+ StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
+ IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
+ config.setOpenMode(OpenMode.CREATE_OR_APPEND);
+ indexWriter = new IndexWriter(directory, config);
+ writers.put(repository, indexWriter);
+ }
+ return indexWriter;
+ }
+
+ /**
+ * Searches the specified repositories for the given text or query
+ *
+ * @param text
+ * if the text is null or empty, null is returned
+ * @param maximumHits
+ * the maximum number of hits to collect
+ * @param repositories
+ * a list of repositories to search. if no repositories are
+ * specified null is returned.
+ * @return a list of SearchResults in order from highest to the lowest score
+ *
+ */
+ public List<SearchResult> search(String text, int maximumHits, List<String> repositories) {
+ if (ArrayUtils.isEmpty(repositories)) {
+ return null;
+ }
+ return search(text, maximumHits, repositories.toArray(new String[0]));
+ }
+
+ /**
+ * Searches the specified repositories for the given text or query
+ *
+ * @param text
+ * if the text is null or empty, null is returned
+ * @param maximumHits
+ * the maximum number of hits to collect
+ * @param repositories
+ * a list of repositories to search. if no repositories are
+ * specified null is returned.
+ * @return a list of SearchResults in order from highest to the lowest score
+ *
+ */
+ public List<SearchResult> search(String text, int maximumHits, String... repositories) {
+ if (StringUtils.isEmpty(text)) {
+ return null;
+ }
+ if (ArrayUtils.isEmpty(repositories)) {
+ return null;
+ }
+ Set<SearchResult> results = new LinkedHashSet<SearchResult>();
+ StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
+ try {
+ // default search checks summary and content
+ BooleanQuery query = new BooleanQuery();
+ QueryParser qp;
+ qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
+ qp.setAllowLeadingWildcard(true);
+ query.add(qp.parse(text), Occur.SHOULD);
+
+ qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
+ qp.setAllowLeadingWildcard(true);
+ query.add(qp.parse(text), Occur.SHOULD);
+
+ IndexSearcher searcher;
+ if (repositories.length == 1) {
+ // single repository search
+ searcher = getIndexSearcher(repositories[0]);
+ } else {
+ // multiple repository search
+ List<IndexReader> readers = new ArrayList<IndexReader>();
+ for (String repository : repositories) {
+ IndexSearcher repositoryIndex = getIndexSearcher(repository);
+ readers.add(repositoryIndex.getIndexReader());
+ }
+ IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
+ MultiReader reader = new MultiReader(rdrs);
+ searcher = new IndexSearcher(reader);
+ }
+ Query rewrittenQuery = searcher.rewrite(query);
+ TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true);
+ searcher.search(rewrittenQuery, collector);
+ ScoreDoc[] hits = collector.topDocs().scoreDocs;
+ for (int i = 0; i < hits.length; i++) {
+ int docId = hits[i].doc;
+ Document doc = searcher.doc(docId);
+ // TODO identify the source index for the doc, then eliminate FIELD_REPOSITORY
+ SearchResult result = createSearchResult(doc, hits[i].score);
+ String content = doc.get(FIELD_CONTENT);
+ result.fragment = getHighlightedFragment(analyzer, query, content, result);
+ results.add(result);
+ }
+ } catch (Exception e) {
+ logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
+ }
+ return new ArrayList<SearchResult>(results);
+ }
+
+ /**
+ *
+ * @param analyzer
+ * @param query
+ * @param content
+ * @param result
+ * @return
+ * @throws IOException
+ * @throws InvalidTokenOffsetsException
+ */
+ private String getHighlightedFragment(Analyzer analyzer, Query query,
+ String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
+ content = content == null ? "":StringUtils.escapeForHtml(content, false);
+
+ QueryScorer scorer = new QueryScorer(query, "content");
+ Fragmenter fragmenter;
+
+ // TODO improve the fragmenter - hopefully on line breaks
+ if (SearchObjectType.commit == result.type) {
+ fragmenter = new SimpleSpanFragmenter(scorer, 1024);
+ } else {
+ fragmenter = new SimpleSpanFragmenter(scorer, 150);
+ }
+
+ // use an artificial delimiter for the token
+ String termTag = "<!--[";
+ String termTagEnd = "]-->";
+ SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
+ Highlighter highlighter = new Highlighter(formatter, scorer);
+ highlighter.setTextFragmenter(fragmenter);
+
+ String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 5);
+ if (ArrayUtils.isEmpty(fragments)) {
+ if (SearchObjectType.blob == result.type) {
+ return "";
+ }
+ return "<pre class=\"text\">" + content + "</pre>";
+ }
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0, len = fragments.length; i < len; i++) {
+ String fragment = fragments[i];
+
+ // resurrect the raw fragment from removing the artificial delimiters
+ String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
+ sb.append(getPreTag(result, raw, content));
+
+ // replace the artificial delimiter with html tags
+ String html = fragment.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
+ sb.append(html);
+ sb.append("</pre>");
+ if (i < len - 1) {
+ sb.append("<span class=\"ellipses\">...</span><br/>");
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Returns the appropriate tag for a fragment. Commit messages are visually
+ * differentiated from blob fragments.
+ *
+ * @param result
+ * @param fragment
+ * @param content
+ * @return an html tag appropriate for the fragment
+ */
+ private String getPreTag(SearchResult result, String fragment, String content) {
+ String pre = "<pre class=\"text\">";
+ if (SearchObjectType.blob == result.type) {
+ int line = StringUtils.countLines(content.substring(0, content.indexOf(fragment)));
+ int lastDot = result.path.lastIndexOf('.');
+ if (lastDot > -1) {
+ String ext = result.path.substring(lastDot + 1).toLowerCase();
+ pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0} lang-{1}\">", line, ext);
+ } else {
+ pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}\">", line);
+ }
+ }
+ return pre;
+ }
+
+ /**
+ * Simple class to track the results of an index update.
+ */
+ private class IndexResult {
+ long startTime = System.currentTimeMillis();
+ long endTime = startTime;
+ boolean success;
+ int branchCount;
+ int commitCount;
+ int blobCount;
+ int issueCount;
+
+ void add(IndexResult result) {
+ this.branchCount += result.branchCount;
+ this.commitCount += result.commitCount;
+ this.blobCount += result.blobCount;
+ this.issueCount += result.issueCount;
+ }
+
+ void success() {
+ success = true;
+ endTime = System.currentTimeMillis();
+ }
+
+ float duration() {
+ return (endTime - startTime)/1000f;
+ }
}
}
diff --git a/src/com/gitblit/models/SearchResult.java b/src/com/gitblit/models/SearchResult.java index 181eb0e0..56623075 100644 --- a/src/com/gitblit/models/SearchResult.java +++ b/src/com/gitblit/models/SearchResult.java @@ -4,7 +4,7 @@ import java.io.Serializable; import java.util.Date;
import java.util.List;
-import com.gitblit.utils.LuceneUtils.ObjectType;
+import com.gitblit.Constants.SearchObjectType;
/**
* Model class that represents a search result.
@@ -42,7 +42,7 @@ public class SearchResult implements Serializable { public List<String> labels;
- public ObjectType type;
+ public SearchObjectType type;
public SearchResult() {
}
diff --git a/src/com/gitblit/utils/LuceneUtils.java b/src/com/gitblit/utils/LuceneUtils.java deleted file mode 100644 index ca1bbf1d..00000000 --- a/src/com/gitblit/utils/LuceneUtils.java +++ /dev/null @@ -1,1018 +0,0 @@ -/*
- * Copyright 2012 gitblit.com.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.gitblit.utils;
-
-import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.text.MessageFormat;
-import java.text.ParseException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.concurrent.ConcurrentHashMap;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.DateTools;
-import org.apache.lucene.document.DateTools.Resolution;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Field.Index;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.index.MultiReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TopScoreDocCollector;
-import org.apache.lucene.search.highlight.Fragmenter;
-import org.apache.lucene.search.highlight.Highlighter;
-import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
-import org.apache.lucene.search.highlight.QueryScorer;
-import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
-import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
-import org.apache.lucene.search.highlight.TokenSources;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.Version;
-import org.eclipse.jgit.diff.DiffEntry.ChangeType;
-import org.eclipse.jgit.lib.Constants;
-import org.eclipse.jgit.lib.ObjectId;
-import org.eclipse.jgit.lib.ObjectLoader;
-import org.eclipse.jgit.lib.ObjectReader;
-import org.eclipse.jgit.lib.Repository;
-import org.eclipse.jgit.revwalk.RevCommit;
-import org.eclipse.jgit.revwalk.RevTree;
-import org.eclipse.jgit.revwalk.RevWalk;
-import org.eclipse.jgit.storage.file.FileBasedConfig;
-import org.eclipse.jgit.treewalk.EmptyTreeIterator;
-import org.eclipse.jgit.treewalk.TreeWalk;
-import org.eclipse.jgit.util.FS;
-
-import com.gitblit.models.IssueModel;
-import com.gitblit.models.IssueModel.Attachment;
-import com.gitblit.models.PathModel.PathChangeModel;
-import com.gitblit.models.RefModel;
-import com.gitblit.models.SearchResult;
-
-/**
- * A collection of utility methods for indexing and querying a Lucene repository
- * index.
- *
- * @author James Moger
- *
- */
-public class LuceneUtils {
-
- /**
- * The types of objects that can be indexed and queried.
- */
- public static enum ObjectType {
- commit, blob, issue;
-
- static ObjectType fromName(String name) {
- for (ObjectType value : values()) {
- if (value.name().equals(name)) {
- return value;
- }
- }
- return null;
- }
- }
-
- private static final Version LUCENE_VERSION = Version.LUCENE_35;
- private static final int INDEX_VERSION = 1;
-
- private static final String FIELD_OBJECT_TYPE = "type";
- private static final String FIELD_ISSUE = "issue";
- private static final String FIELD_PATH = "path";
- private static final String FIELD_COMMIT = "commit";
- private static final String FIELD_BRANCH = "branch";
- private static final String FIELD_REPOSITORY = "repository";
- private static final String FIELD_SUMMARY = "summary";
- private static final String FIELD_CONTENT = "content";
- private static final String FIELD_AUTHOR = "author";
- private static final String FIELD_COMMITTER = "committer";
- private static final String FIELD_DATE = "date";
- private static final String FIELD_TAG = "tag";
- private static final String FIELD_LABEL = "label";
- private static final String FIELD_ATTACHMENT = "attachment";
-
- private static Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",
- "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
- "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
-
- private static Set<String> excludedBranches = new TreeSet<String>(
- Arrays.asList("/refs/heads/gb-issues"));
-
- private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();
- private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();
-
- private static final String LUCENE_DIR = "lucene";
- private static final String CONF_FILE = "lucene.conf";
- private static final String CONF_INDEX = "index";
- private static final String CONF_VERSION = "version";
- private static final String CONF_ALIAS = "aliases";
- private static final String CONF_BRANCH = "branches";
-
- /**
- * Returns the author for the commit, if this information is available.
- *
- * @param commit
- * @return an author or unknown
- */
- private static String getAuthor(RevCommit commit) {
- String name = "unknown";
- try {
- name = commit.getAuthorIdent().getName();
- if (StringUtils.isEmpty(name)) {
- name = commit.getAuthorIdent().getEmailAddress();
- }
- } catch (NullPointerException n) {
- }
- return name;
- }
-
- /**
- * Returns the committer for the commit, if this information is available.
- *
- * @param commit
- * @return an committer or unknown
- */
- private static String getCommitter(RevCommit commit) {
- String name = "unknown";
- try {
- name = commit.getCommitterIdent().getName();
- if (StringUtils.isEmpty(name)) {
- name = commit.getCommitterIdent().getEmailAddress();
- }
- } catch (NullPointerException n) {
- }
- return name;
- }
-
- /**
- * Construct a keyname from the branch.
- *
- * @param branchName
- * @return a keyname appropriate for the Git config file format
- */
- private static String getBranchKey(String branchName) {
- return StringUtils.getSHA1(branchName);
- }
-
- /**
- * Returns the Lucene configuration for the specified repository.
- *
- * @param repository
- * @return a config object
- */
- private static FileBasedConfig getConfig(Repository repository) {
- File file = new File(repository.getDirectory(), CONF_FILE);
- FileBasedConfig config = new FileBasedConfig(file, FS.detect());
- return config;
- }
-
- /**
- * Reads the Lucene config file for the repository to check the index
- * version. If the index version is different, then rebuild the repository
- * index.
- *
- * @param repository
- * @return true of the on-disk index format is different than INDEX_VERSION
- */
- public static boolean shouldReindex(Repository repository) {
- try {
- FileBasedConfig config = getConfig(repository);
- config.load();
- int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
- // reindex if versions do not match
- return indexVersion != INDEX_VERSION;
- } catch (Throwable t) {
- }
- return true;
- }
-
- /**
- * Deletes the Lucene index for the specified repository.
- *
- * @param repository
- * @return true, if successful
- */
- public static boolean deleteIndex(Repository repository) {
- try {
- File luceneIndex = new File(repository.getDirectory(), LUCENE_DIR);
- if (luceneIndex.exists()) {
- org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
- org.eclipse.jgit.util.FileUtils.RECURSIVE);
- }
- File luceneConfig = new File(repository.getDirectory(), CONF_FILE);
- if (luceneConfig.exists()) {
- luceneConfig.delete();
- }
- return true;
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- /**
- * This completely indexes the repository and will destroy any existing
- * index.
- *
- * @param repositoryName
- * @param repository
- * @return IndexResult
- */
- public static IndexResult reindex(String repositoryName, Repository repository) {
- IndexResult result = new IndexResult();
- if (!LuceneUtils.deleteIndex(repository)) {
- return result;
- }
- try {
- FileBasedConfig config = getConfig(repository);
- Set<String> indexedCommits = new TreeSet<String>();
- IndexWriter writer = getIndexWriter(repository, true);
- // build a quick lookup of tags
- Map<String, List<String>> tags = new HashMap<String, List<String>>();
- for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
- if (!tag.isAnnotatedTag()) {
- // skip non-annotated tags
- continue;
- }
- if (!tags.containsKey(tag.getObjectId())) {
- tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
- }
- tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
- }
-
- ObjectReader reader = repository.newObjectReader();
-
- // get the local branches
- List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
-
- // sort them by most recently updated
- Collections.sort(branches, new Comparator<RefModel>() {
- @Override
- public int compare(RefModel ref1, RefModel ref2) {
- return ref2.getDate().compareTo(ref1.getDate());
- }
- });
-
- // reorder default branch to first position
- RefModel defaultBranch = null;
- ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
- for (RefModel branch : branches) {
- if (branch.getObjectId().equals(defaultBranchId)) {
- defaultBranch = branch;
- break;
- }
- }
- branches.remove(defaultBranch);
- branches.add(0, defaultBranch);
-
- // walk through each branch
- for (RefModel branch : branches) {
- if (excludedBranches.contains(branch.getName())) {
- continue;
- }
-
- String branchName = branch.getName();
- RevWalk revWalk = new RevWalk(reader);
- RevCommit tip = revWalk.parseCommit(branch.getObjectId());
- String tipId = tip.getId().getName();
-
- String keyName = getBranchKey(branchName);
- config.setString(CONF_ALIAS, null, keyName, branchName);
- config.setString(CONF_BRANCH, null, keyName, tipId);
-
- // index the blob contents of the tree
- TreeWalk treeWalk = new TreeWalk(repository);
- treeWalk.addTree(tip.getTree());
- treeWalk.setRecursive(true);
-
- Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
- while (treeWalk.next()) {
- paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
- }
-
- ByteArrayOutputStream os = new ByteArrayOutputStream();
- byte[] tmp = new byte[32767];
-
- RevWalk commitWalk = new RevWalk(reader);
- commitWalk.markStart(tip);
-
- RevCommit commit;
- while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
- TreeWalk diffWalk = new TreeWalk(reader);
- int parentCount = commit.getParentCount();
- switch (parentCount) {
- case 0:
- diffWalk.addTree(new EmptyTreeIterator());
- break;
- case 1:
- diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
- break;
- default:
- // skip merge commits
- continue;
- }
- diffWalk.addTree(getTree(commitWalk, commit));
- diffWalk.setFilter(ANY_DIFF);
- diffWalk.setRecursive(true);
- while ((paths.size() > 0) && diffWalk.next()) {
- String path = diffWalk.getPathString();
- if (!paths.containsKey(path)) {
- continue;
- }
-
- // remove path from set
- ObjectId blobId = paths.remove(path);
- result.blobCount++;
-
- // index the blob metadata
- String blobAuthor = getAuthor(commit);
- String blobCommitter = getCommitter(commit);
- String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
- Resolution.MINUTE);
-
- Document doc = new Document();
- doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
- doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));
-
- // determine extension to compare to the extension
- // blacklist
- String ext = null;
- String name = path.toLowerCase();
- if (name.indexOf('.') > -1) {
- ext = name.substring(name.lastIndexOf('.') + 1);
- }
-
- // index the blob content
- if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
- ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
- InputStream in = ldr.openStream();
- int n;
- while ((n = in.read(tmp)) > 0) {
- os.write(tmp, 0, n);
- }
- in.close();
- byte[] content = os.toByteArray();
- String str = new String(content, Constants.CHARACTER_ENCODING);
- doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
- os.reset();
- }
-
- // add the blob to the index
- writer.addDocument(doc);
- }
- }
-
- os.close();
-
- // index the tip commit object
- if (indexedCommits.add(tipId)) {
- Document doc = createDocument(tip, tags.get(tipId));
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
- writer.addDocument(doc);
- result.commitCount += 1;
- result.branchCount += 1;
- }
-
- // traverse the log and index the previous commit objects
- RevWalk historyWalk = new RevWalk(reader);
- historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
- RevCommit rev;
- while ((rev = historyWalk.next()) != null) {
- String hash = rev.getId().getName();
- if (indexedCommits.add(hash)) {
- Document doc = createDocument(rev, tags.get(hash));
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
- writer.addDocument(doc);
- result.commitCount += 1;
- }
- }
- }
-
- // finished
- reader.release();
-
- // this repository has a gb-issues branch, index all issues
- if (IssueUtils.getIssuesBranch(repository) != null) {
- List<IssueModel> issues = IssueUtils.getIssues(repository, null);
- if (issues.size() > 0) {
- result.branchCount += 1;
- }
- for (IssueModel issue : issues) {
- result.issueCount++;
- Document doc = createDocument(issue);
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
- writer.addDocument(doc);
- }
- }
-
- // commit all changes and reset the searcher
- config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
- config.save();
- resetIndexSearcher(repository);
- writer.commit();
- result.success = true;
- } catch (Exception e) {
- e.printStackTrace();
- }
- return result;
- }
-
- /**
- * Get the tree associated with the given commit.
- *
- * @param walk
- * @param commit
- * @return tree
- * @throws IOException
- */
- protected static RevTree getTree(final RevWalk walk, final RevCommit commit)
- throws IOException {
- final RevTree tree = commit.getTree();
- if (tree != null) {
- return tree;
- }
- walk.parseHeaders(commit);
- return commit.getTree();
- }
-
- /**
- * Incrementally update the index with the specified commit for the
- * repository.
- *
- * @param repositoryName
- * @param repository
- * @param branch
- * the fully qualified branch name (e.g. refs/heads/master)
- * @param commit
- * @return true, if successful
- */
- private static IndexResult index(String repositoryName, Repository repository,
- String branch, RevCommit commit) {
- IndexResult result = new IndexResult();
- try {
- if (excludedBranches.contains(branch)) {
- if (IssueUtils.GB_ISSUES.equals(branch)) {
- // index an issue
- String issueId = commit.getShortMessage().substring(2).trim();
- IssueModel issue = IssueUtils.getIssue(repository, issueId);
- if (issue == null) {
- // issue was deleted, remove from index
- IndexWriter writer = getIndexWriter(repository, false);
- writer.deleteDocuments(
- new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
- FIELD_ISSUE, issueId));
- writer.commit();
- result.success = true;
- return result;
- }
- result.success = index(repositoryName, repository, issue);
- result.issueCount++;
- return result;
-
- }
- return result;
- }
- List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
- String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
- Resolution.MINUTE);
- IndexWriter writer = getIndexWriter(repository, false);
- for (PathChangeModel path : changedPaths) {
- // delete the indexed blob
- writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()),
- new Term(FIELD_BRANCH, branch), new Term(FIELD_PATH, path.path));
-
- // re-index the blob
- if (!ChangeType.DELETE.equals(path.changeType)) {
- result.blobCount++;
- Document doc = new Document();
- doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES,
- Index.NOT_ANALYZED));
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
- doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
-
- // determine extension to compare to the extension
- // blacklist
- String ext = null;
- String name = path.name.toLowerCase();
- if (name.indexOf('.') > -1) {
- ext = name.substring(name.lastIndexOf('.') + 1);
- }
-
- if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
- // read the blob content
- String str = JGitUtils.getStringContent(repository, commit.getTree(),
- path.path);
- doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
- writer.addDocument(doc);
- }
- }
- }
- writer.commit();
-
- Document doc = createDocument(commit, null);
- result.commitCount++;
- result.success = index(repositoryName, repository, doc);
- } catch (Exception e) {
- e.printStackTrace();
- }
- return result;
- }
-
- /**
- * Incrementally update the index with the specified issue for the
- * repository.
- *
- * @param repository
- * @param issue
- * @return true, if successful
- */
- public static boolean index(String repositoryName, Repository repository, IssueModel issue) {
- try {
- // delete the old issue from the index, if exists
- IndexWriter writer = getIndexWriter(repository, false);
- writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
- FIELD_ISSUE, String.valueOf(issue.id)));
- writer.commit();
-
- Document doc = createDocument(issue);
- return index(repositoryName, repository, doc);
- } catch (Exception e) {
- e.printStackTrace();
- }
- return false;
- }
-
- /**
- * Updates a repository index incrementally from the last indexed commits.
- *
- * @param repositoryName
- * @param repository
- * @return IndexResult
- */
- public static IndexResult updateIndex(String repositoryName, Repository repository) {
- IndexResult result = new IndexResult();
- try {
- FileBasedConfig config = getConfig(repository);
- config.load();
-
- // build a quick lookup of annotated tags
- Map<String, List<String>> tags = new HashMap<String, List<String>>();
- for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
- if (!tag.isAnnotatedTag()) {
- // skip non-annotated tags
- continue;
- }
- if (!tags.containsKey(tag.getObjectId())) {
- tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
- }
- tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
- }
-
- // detect branch deletion
- // first assume all branches are deleted and then remove each
- // existing branch from deletedBranches during indexing
- Set<String> deletedBranches = new TreeSet<String>();
- for (String alias : config.getNames(CONF_ALIAS)) {
- String branch = config.getString(CONF_ALIAS, null, alias);
- deletedBranches.add(branch);
- }
-
- // walk through each branches
- List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
- for (RefModel branch : branches) {
- String branchName = branch.getName();
-
- // remove this branch from the deletedBranches set
- deletedBranches.remove(branchName);
-
- // determine last commit
- String keyName = getBranchKey(branchName);
- String lastCommit = config.getString(CONF_BRANCH, null, keyName);
-
- List<RevCommit> revs;
- if (StringUtils.isEmpty(lastCommit)) {
- // new branch/unindexed branch, get all commits on branch
- revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
- } else {
- // pre-existing branch, get changes since last commit
- revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
- }
-
- if (revs.size() > 0) {
- result.branchCount += 1;
- }
-
- // reverse the list of commits so we start with the first commit
- Collections.reverse(revs);
- for (RevCommit commit : revs) {
- result.add(index(repositoryName, repository, branchName, commit));
- }
-
- // update the config
- config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
- config.setString(CONF_ALIAS, null, keyName, branchName);
- config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
- config.save();
- }
-
- // the deletedBranches set will normally be empty by this point
- // unless a branch really was deleted and no longer exists
- if (deletedBranches.size() > 0) {
- for (String branch : deletedBranches) {
- IndexWriter writer = getIndexWriter(repository, false);
- writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
- writer.commit();
- }
- }
- result.success = true;
- } catch (Throwable t) {
- t.printStackTrace();
- }
- return result;
- }
-
- /**
- * Creates a Lucene document from an issue.
- *
- * @param issue
- * @return a Lucene document
- */
- private static Document createDocument(IssueModel issue) {
- Document doc = new Document();
- doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES,
- Field.Index.NOT_ANALYZED));
- doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),
- Store.YES, Field.Index.NO));
- doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.ANALYZED));
- List<String> attachments = new ArrayList<String>();
- for (Attachment attachment : issue.getAttachments()) {
- attachments.add(attachment.name.toLowerCase());
- }
- doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES,
- Index.ANALYZED));
- doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES,
- Index.ANALYZED));
- return doc;
- }
-
- /**
- * Creates a Lucene document for a commit
- *
- * @param commit
- * @param tags
- * @return a Lucene document
- */
- private static Document createDocument(RevCommit commit, List<String> tags) {
- Document doc = new Document();
- doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.commit.name(), Store.YES,
- Index.NOT_ANALYZED));
- doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
- Resolution.MINUTE), Store.YES, Index.NO));
- doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED));
- doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.YES, Index.ANALYZED));
- if (!ArrayUtils.isEmpty(tags)) {
- doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), Store.YES, Index.ANALYZED));
- }
- return doc;
- }
-
- /**
- * Incrementally index an object for the repository.
- *
- * @param repositoryName
- * @param repository
- * @param doc
- * @return true, if successful
- */
- private static boolean index(String repositoryName, Repository repository, Document doc) {
- try {
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));
- IndexWriter writer = getIndexWriter(repository, false);
- writer.addDocument(doc);
- resetIndexSearcher(repository);
- writer.commit();
- return true;
- } catch (Exception e) {
- e.printStackTrace();
- }
- return false;
- }
-
- private static SearchResult createSearchResult(Document doc, float score) throws ParseException {
- SearchResult result = new SearchResult();
- result.score = score;
- result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
- result.summary = doc.get(FIELD_SUMMARY);
- result.author = doc.get(FIELD_AUTHOR);
- result.committer = doc.get(FIELD_COMMITTER);
- result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
- result.repository = doc.get(FIELD_REPOSITORY);
- result.branch = doc.get(FIELD_BRANCH);
- result.commitId = doc.get(FIELD_COMMIT);
- result.issueId = doc.get(FIELD_ISSUE);
- result.path = doc.get(FIELD_PATH);
- if (doc.get(FIELD_TAG) != null) {
- result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
- }
- if (doc.get(FIELD_LABEL) != null) {
- result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));
- }
- return result;
- }
-
- private static void resetIndexSearcher(Repository repository) throws IOException {
- IndexSearcher searcher = SEARCHERS.get(repository.getDirectory());
- if (searcher != null) {
- SEARCHERS.remove(repository.getDirectory());
- searcher.close();
- }
- }
-
- /**
- * Gets an index searcher for the repository.
- *
- * @param repository
- * @return
- * @throws IOException
- */
- private static IndexSearcher getIndexSearcher(Repository repository) throws IOException {
- IndexSearcher searcher = SEARCHERS.get(repository.getDirectory());
- if (searcher == null) {
- IndexWriter writer = getIndexWriter(repository, false);
- searcher = new IndexSearcher(IndexReader.open(writer, true));
- SEARCHERS.put(repository.getDirectory(), searcher);
- }
- return searcher;
- }
-
- /**
- * Gets an index writer for the repository. The index will be created if it
- * does not already exist or if forceCreate is specified.
- *
- * @param repository
- * @param forceCreate
- * @return an IndexWriter
- * @throws IOException
- */
- private static IndexWriter getIndexWriter(Repository repository, boolean forceCreate)
- throws IOException {
- IndexWriter indexWriter = WRITERS.get(repository.getDirectory());
- File indexFolder = new File(repository.getDirectory(), LUCENE_DIR);
- Directory directory = FSDirectory.open(indexFolder);
- if (forceCreate || !indexFolder.exists()) {
- // if the writer is going to blow away the existing index and create
- // a new one then it should not be cached. instead, close any open
- // writer, create a new one, and return.
- if (indexWriter != null) {
- indexWriter.close();
- indexWriter = null;
- WRITERS.remove(repository.getDirectory());
- }
- indexFolder.mkdirs();
- IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer(
- LUCENE_VERSION));
- config.setOpenMode(OpenMode.CREATE);
- IndexWriter writer = new IndexWriter(directory, config);
- writer.close();
- }
-
- if (indexWriter == null) {
- IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer(
- LUCENE_VERSION));
- config.setOpenMode(OpenMode.APPEND);
- indexWriter = new IndexWriter(directory, config);
- WRITERS.put(repository.getDirectory(), indexWriter);
- }
- return indexWriter;
- }
-
- /**
- * Searches the specified repositories for the given text or query
- *
- * @param text
- * if the text is null or empty, null is returned
- * @param maximumHits
- * the maximum number of hits to collect
- * @param repositories
- * a list of repositories to search. if no repositories are
- * specified null is returned.
- * @return a list of SearchResults in order from highest to the lowest score
- *
- */
- public static List<SearchResult> search(String text, int maximumHits,
- Repository... repositories) {
- if (StringUtils.isEmpty(text)) {
- return null;
- }
- if (repositories.length == 0) {
- return null;
- }
- Set<SearchResult> results = new LinkedHashSet<SearchResult>();
- StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
- try {
- // default search checks summary and content
- BooleanQuery query = new BooleanQuery();
- QueryParser qp;
- qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
- qp.setAllowLeadingWildcard(true);
- query.add(qp.parse(text), Occur.SHOULD);
-
- qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
- qp.setAllowLeadingWildcard(true);
- query.add(qp.parse(text), Occur.SHOULD);
-
- IndexSearcher searcher;
- if (repositories.length == 1) {
- // single repository search
- searcher = getIndexSearcher(repositories[0]);
- } else {
- // multiple repository search
- List<IndexReader> readers = new ArrayList<IndexReader>();
- for (Repository repository : repositories) {
- IndexSearcher repositoryIndex = getIndexSearcher(repository);
- readers.add(repositoryIndex.getIndexReader());
- }
- IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
- MultiReader reader = new MultiReader(rdrs);
- searcher = new IndexSearcher(reader);
- }
- Query rewrittenQuery = searcher.rewrite(query);
- TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true);
- searcher.search(rewrittenQuery, collector);
- ScoreDoc[] hits = collector.topDocs().scoreDocs;
- for (int i = 0; i < hits.length; i++) {
- int docId = hits[i].doc;
- Document doc = searcher.doc(docId);
- SearchResult result = createSearchResult(doc, hits[i].score);
- String content = doc.get(FIELD_CONTENT);
-
- result.fragment = getHighlightedFragment(analyzer, query, content, result);
- results.add(result);
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
- return new ArrayList<SearchResult>(results);
- }
-
- private static String getHighlightedFragment(Analyzer analyzer, Query query,
- String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
- content = content == null ? "":StringUtils.escapeForHtml(content, false);
-
- TokenStream stream = TokenSources.getTokenStream("content", content, analyzer);
- QueryScorer scorer = new QueryScorer(query, "content");
- Fragmenter fragmenter;
-
- if (ObjectType.commit == result.type) {
- fragmenter = new SimpleSpanFragmenter(scorer, 1024);
- } else {
- fragmenter = new SimpleSpanFragmenter(scorer, 150);
- }
-
- // use an artificial delimiter for the token
- String termTag = "<!--[";
- String termTagEnd = "]-->";
- SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
- Highlighter highlighter = new Highlighter(formatter, scorer);
- highlighter.setTextFragmenter(fragmenter);
-
- String [] fragments = highlighter.getBestFragments(stream, content, 5);
- if (ArrayUtils.isEmpty(fragments)) {
- if (ObjectType.blob == result.type) {
- return "";
- }
- return "<pre class=\"text\">" + content + "</pre>";
- }
- StringBuilder sb = new StringBuilder();
- for (int i = 0, len = fragments.length; i < len; i++) {
- String fragment = fragments[i];
-
- // resurrect the raw fragment from removing the artificial delimiters
- String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
- sb.append(getPreTag(result, raw, content));
-
- // replace the artificial delimiter with html tags
- String html = fragment.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
- sb.append(html);
- sb.append("</pre>");
- if (i < len - 1) {
- sb.append("<span class=\"ellipses\">...</span><br/>");
- }
- }
- return sb.toString();
- }
-
- private static String getPreTag(SearchResult result, String fragment, String content) {
- String pre = "<pre class=\"text\">";
- if (ObjectType.blob == result.type) {
- int line = StringUtils.countLines(content.substring(0, content.indexOf(fragment)));
- int lastDot = result.path.lastIndexOf('.');
- if (lastDot > -1) {
- String ext = result.path.substring(lastDot + 1).toLowerCase();
- pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0} lang-{1}\">", line, ext);
- } else {
- pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}\">", line);
- }
- }
- return pre;
- }
-
- /**
- * Close all the index writers and searchers
- */
- public static void close() {
- // close writers
- for (File file : WRITERS.keySet()) {
- try {
- WRITERS.get(file).close(true);
- } catch (Throwable t) {
- t.printStackTrace();
- }
- }
- WRITERS.clear();
-
- // close searchers
- for (File file : SEARCHERS.keySet()) {
- try {
- SEARCHERS.get(file).close();
- } catch (Throwable t) {
- t.printStackTrace();
- }
- }
- SEARCHERS.clear();
- }
-
- public static class IndexResult {
- public boolean success;
- public int branchCount;
- public int commitCount;
- public int blobCount;
- public int issueCount;
-
- public void add(IndexResult result) {
- this.branchCount += result.branchCount;
- this.commitCount += result.commitCount;
- this.blobCount += result.blobCount;
- this.issueCount += result.issueCount;
- }
- }
-}
diff --git a/src/com/gitblit/wicket/pages/LucenePage.java b/src/com/gitblit/wicket/pages/LucenePage.java index d9b4d5ba..51456b12 100644 --- a/src/com/gitblit/wicket/pages/LucenePage.java +++ b/src/com/gitblit/wicket/pages/LucenePage.java @@ -28,7 +28,6 @@ import org.apache.wicket.markup.repeater.data.DataView; import org.apache.wicket.markup.repeater.data.ListDataProvider;
import org.apache.wicket.model.Model;
import org.eclipse.jgit.lib.Constants;
-import org.eclipse.jgit.lib.Repository;
import com.gitblit.Constants.SearchType;
import com.gitblit.GitBlit;
@@ -36,7 +35,6 @@ import com.gitblit.models.RepositoryModel; import com.gitblit.models.SearchResult;
import com.gitblit.models.UserModel;
import com.gitblit.utils.ArrayUtils;
-import com.gitblit.utils.LuceneUtils;
import com.gitblit.utils.StringUtils;
import com.gitblit.wicket.GitBlitWebSession;
import com.gitblit.wicket.StringChoiceRenderer;
@@ -131,7 +129,9 @@ public class LucenePage extends RootPage { // execute search
final List<SearchResult> results = new ArrayList<SearchResult>();
- results.addAll(search(repositories, query));
+ if (!ArrayUtils.isEmpty(repositories) && !StringUtils.isEmpty(query)) {
+ results.addAll(GitBlit.self().search(query, 100, repositories));
+ }
// search results view
ListDataProvider<SearchResult> resultsDp = new ListDataProvider<SearchResult>(results);
@@ -170,20 +170,5 @@ public class LucenePage extends RootPage { }
};
add(resultsView.setVisible(results.size() > 0));
- }
-
- private List<SearchResult> search(List<String> repositories, String query) {
- if (ArrayUtils.isEmpty(repositories) || StringUtils.isEmpty(query)) {
- return new ArrayList<SearchResult>();
- }
- List<Repository> repos = new ArrayList<Repository>();
- for (String r : repositories) {
- repos.add(GitBlit.self().getRepository(r));
- }
- List<SearchResult> srs = LuceneUtils.search(query, 100, repos.toArray(new Repository[repos.size()]));
- for (Repository r : repos) {
- r.close();
- }
- return srs;
- }
+ }
}
diff --git a/tests/com/gitblit/tests/GitBlitSuite.java b/tests/com/gitblit/tests/GitBlitSuite.java index 4a70964b..af18083d 100644 --- a/tests/com/gitblit/tests/GitBlitSuite.java +++ b/tests/com/gitblit/tests/GitBlitSuite.java @@ -52,7 +52,7 @@ import com.gitblit.utils.JGitUtils; ObjectCacheTest.class, UserServiceTest.class, MarkdownUtilsTest.class, JGitUtilsTest.class,
SyndicationUtilsTest.class, DiffUtilsTest.class, MetricUtilsTest.class,
TicgitUtilsTest.class, GitBlitTest.class, FederationTests.class, RpcTests.class,
- GitServletTest.class, GroovyScriptTest.class, LuceneUtilsTest.class, IssuesTest.class })
+ GitServletTest.class, GroovyScriptTest.class, LuceneExecutorTest.class, IssuesTest.class })
public class GitBlitSuite {
public static final File REPOSITORIES = new File("git");
@@ -87,6 +87,7 @@ public class GitBlitSuite { }
public static Repository getIssuesTestRepository() throws Exception {
+ JGitUtils.createRepository(REPOSITORIES, "gb-issues.git").close();
return new FileRepository(new File(REPOSITORIES, "gb-issues.git"));
}
@@ -137,8 +138,6 @@ public class GitBlitSuite { cloneOrFetch("test/theoretical-physics.git", "https://github.com/certik/theoretical-physics.git");
cloneOrFetch("test/gitective.git", "https://github.com/kevinsawicki/gitective.git");
- JGitUtils.createRepository(REPOSITORIES, "gb-issues.git").close();
-
enableTickets("ticgit.git");
enableDocs("ticgit.git");
showRemoteBranches("ticgit.git");
diff --git a/tests/com/gitblit/tests/IssuesTest.java b/tests/com/gitblit/tests/IssuesTest.java index eb7b66dd..9133f9b1 100644 --- a/tests/com/gitblit/tests/IssuesTest.java +++ b/tests/com/gitblit/tests/IssuesTest.java @@ -26,6 +26,7 @@ import org.bouncycastle.util.Arrays; import org.eclipse.jgit.lib.Repository;
import org.junit.Test;
+import com.gitblit.LuceneExecutor;
import com.gitblit.models.IssueModel;
import com.gitblit.models.IssueModel.Attachment;
import com.gitblit.models.IssueModel.Change;
@@ -35,7 +36,6 @@ import com.gitblit.models.IssueModel.Status; import com.gitblit.models.SearchResult;
import com.gitblit.utils.IssueUtils;
import com.gitblit.utils.IssueUtils.IssueFilter;
-import com.gitblit.utils.LuceneUtils;
import com.gitblit.utils.StringUtils;
/**
@@ -123,17 +123,18 @@ public class IssuesTest { return issue.status.isClosed();
}
});
-
+
assertTrue(allIssues.size() > 0);
assertEquals(1, openIssues.size());
assertEquals(1, closedIssues.size());
// build a new Lucene index
- LuceneUtils.deleteIndex(repository);
+ LuceneExecutor lucene = new LuceneExecutor(null, GitBlitSuite.REPOSITORIES);
+ lucene.deleteIndex(name);
for (IssueModel anIssue : allIssues) {
- LuceneUtils.index(name, repository, anIssue);
+ lucene.index(name, anIssue);
}
- List<SearchResult> hits = LuceneUtils.search("working", 10, repository);
+ List<SearchResult> hits = lucene.search("working", 10, name);
assertTrue(hits.size() > 0);
// reindex an issue
@@ -142,14 +143,14 @@ public class IssuesTest { change.comment("this is a test of reindexing an issue");
IssueUtils.updateIssue(repository, issue.id, change);
issue = IssueUtils.getIssue(repository, issue.id);
- LuceneUtils.index(name, repository, issue);
+ lucene.index(name, issue);
// delete all issues
for (IssueModel anIssue : allIssues) {
assertTrue(IssueUtils.deleteIssue(repository, anIssue.id, "D"));
}
- LuceneUtils.close();
+ lucene.close();
repository.close();
}
diff --git a/tests/com/gitblit/tests/LuceneUtilsTest.java b/tests/com/gitblit/tests/LuceneExecutorTest.java index 01858f50..ddc9bf85 100644 --- a/tests/com/gitblit/tests/LuceneUtilsTest.java +++ b/tests/com/gitblit/tests/LuceneExecutorTest.java @@ -17,13 +17,14 @@ package com.gitblit.tests; import static org.junit.Assert.assertEquals;
+import java.util.ArrayList;
import java.util.List;
import org.eclipse.jgit.lib.Repository;
import org.junit.Test;
+import com.gitblit.LuceneExecutor;
import com.gitblit.models.SearchResult;
-import com.gitblit.utils.LuceneUtils;
import com.gitblit.utils.StringUtils;
/**
@@ -32,95 +33,118 @@ import com.gitblit.utils.StringUtils; * @author James Moger
*
*/
-public class LuceneUtilsTest {
+public class LuceneExecutorTest {
+ private LuceneExecutor newLuceneExecutor() {
+ return new LuceneExecutor(null, GitBlitSuite.REPOSITORIES);
+ }
+
+ private String getName(Repository repository) {
+ return StringUtils.getRelativePath(GitBlitSuite.REPOSITORIES.getAbsolutePath(),
+ repository.getDirectory().getAbsolutePath());
+ }
+
@Test
public void testIndex() throws Exception {
+ LuceneExecutor lucene = newLuceneExecutor();
+
// reindex helloworld
Repository repository = GitBlitSuite.getHelloworldRepository();
- String name = StringUtils.getRelativePath(GitBlitSuite.REPOSITORIES.getAbsolutePath(),
- repository.getDirectory().getAbsolutePath());
- LuceneUtils.reindex(name, repository);
- SearchResult result = LuceneUtils.search("type:blob AND path:bit.bit", 1, repository).get(0);
- assertEquals("Mike Donaghy", result.author);
- result = LuceneUtils.search("type:blob AND path:clipper.prg", 1, repository).get(0);
- assertEquals("tinogomes", result.author);
+ String name = getName(repository);
+ lucene.reindex(name, repository);
repository.close();
+
+ SearchResult result = lucene.search("type:blob AND path:bit.bit", 1, name).get(0);
+ assertEquals("Mike Donaghy", result.author);
+ result = lucene.search("type:blob AND path:clipper.prg", 1, name).get(0);
+ assertEquals("tinogomes", result.author);
// reindex theoretical physics
repository = GitBlitSuite.getTheoreticalPhysicsRepository();
- name = StringUtils.getRelativePath(GitBlitSuite.REPOSITORIES.getAbsolutePath(),
- repository.getDirectory().getAbsolutePath());
- LuceneUtils.reindex(name, repository);
+ name = getName(repository);
+ lucene.reindex(name, repository);
repository.close();
// reindex JGit
repository = GitBlitSuite.getJGitRepository();
- name = StringUtils.getRelativePath(GitBlitSuite.REPOSITORIES.getAbsolutePath(),
- repository.getDirectory().getAbsolutePath());
- LuceneUtils.reindex(name, repository);
+ name = getName(repository);
+ lucene.reindex(name, repository);
repository.close();
- LuceneUtils.close();
+ lucene.close();
}
@Test
public void testQuery() throws Exception {
+ LuceneExecutor lucene = new LuceneExecutor(null, GitBlitSuite.REPOSITORIES);
+
// 2 occurrences on the master branch
- Repository repository = GitBlitSuite.getHelloworldRepository();
- List<SearchResult> results = LuceneUtils.search("ada", 10, repository);
+ Repository repository = GitBlitSuite.getHelloworldRepository();
+ String name = getName(repository);
+ repository.close();
+
+ List<SearchResult> results = lucene.search("ada", 10, name);
assertEquals(2, results.size());
for (SearchResult res : results) {
assertEquals("refs/heads/master", res.branch);
}
// author test
- results = LuceneUtils.search("author: tinogomes", 10, repository);
+ results = lucene.search("author: tinogomes AND type:commit", 10, name);
assertEquals(2, results.size());
-
- repository.close();
+
// blob test
- results = LuceneUtils.search("type: blob AND \"import std.stdio\"", 10, repository);
+ results = lucene.search("type: blob AND \"import std.stdio\"", 10, name);
assertEquals(1, results.size());
assertEquals("d.D", results.get(0).path);
// 1 occurrence on the gh-pages branch
repository = GitBlitSuite.getTheoreticalPhysicsRepository();
- results = LuceneUtils.search("\"add the .nojekyll file\"", 10, repository);
+ name = getName(repository);
+ repository.close();
+
+ results = lucene.search("\"add the .nojekyll file\"", 10, name);
assertEquals(1, results.size());
assertEquals("Ondrej Certik", results.get(0).author);
assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).commitId);
assertEquals("refs/heads/gh-pages", results.get(0).branch);
- results = LuceneUtils.search("type:blob AND \"src/intro.rst\"", 10, repository);
+ results = lucene.search("type:blob AND \"src/intro.rst\"", 10, name);
assertEquals(4, results.size());
// hash id tests
- results = LuceneUtils.search("commit:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, repository);
+ results = lucene.search("commit:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, name);
assertEquals(1, results.size());
- results = LuceneUtils.search("commit:57c4f26f157*", 10, repository);
- assertEquals(1, results.size());
-
- repository.close();
+ results = lucene.search("commit:57c4f26f157*", 10, name);
+ assertEquals(1, results.size());
// annotated tag test
repository = GitBlitSuite.getJGitRepository();
- results = LuceneUtils.search("I663208919f297836a9c16bf458e4a43ffaca4c12", 10, repository);
- assertEquals(1, results.size());
- assertEquals("[v1.3.0.201202151440-r]", results.get(0).labels.toString());
-
+ name = getName(repository);
repository.close();
- LuceneUtils.close();
+ results = lucene.search("I663208919f297836a9c16bf458e4a43ffaca4c12", 10, name);
+ assertEquals(1, results.size());
+ assertEquals("[v1.3.0.201202151440-r]", results.get(0).tags.toString());
+
+ lucene.close();
}
@Test
public void testMultiSearch() throws Exception {
- List<SearchResult> results = LuceneUtils.search("test", 10,
- GitBlitSuite.getHelloworldRepository(),
- GitBlitSuite.getJGitRepository());
- LuceneUtils.close();
+ LuceneExecutor lucene = newLuceneExecutor();
+ List<String> list = new ArrayList<String>();
+ Repository repository = GitBlitSuite.getHelloworldRepository();
+ list.add(getName(repository));
+ repository.close();
+
+ repository = GitBlitSuite.getJGitRepository();
+ list.add(getName(repository));
+ repository.close();
+
+ List<SearchResult> results = lucene.search("test", 10, list);
+ lucene.close();
assertEquals(10, results.size());
}
}
\ No newline at end of file |