summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJames Moger <james.moger@gitblit.com>2012-02-26 17:40:48 -0500
committerJames Moger <james.moger@gitblit.com>2012-02-26 17:40:48 -0500
commite31da050c6ab5ece38fb18196948337395ae59e6 (patch)
tree3f5b785e45a2609713f1eb74ef31279e7270f25c /src
parent85e0731a160baf33336596c6c8bb1cb042ac85bc (diff)
downloadgitblit-e31da050c6ab5ece38fb18196948337395ae59e6.tar.gz
gitblit-e31da050c6ab5ece38fb18196948337395ae59e6.zip
Partially working Lucene executor. Needs refactoring. (issue 16)
Diffstat (limited to 'src')
-rw-r--r--src/com/gitblit/GitBlit.java10
-rw-r--r--src/com/gitblit/LuceneExecutor.java177
-rw-r--r--src/com/gitblit/utils/LuceneUtils.java209
3 files changed, 356 insertions, 40 deletions
diff --git a/src/com/gitblit/GitBlit.java b/src/com/gitblit/GitBlit.java
index e6f07e08..580bf628 100644
--- a/src/com/gitblit/GitBlit.java
+++ b/src/com/gitblit/GitBlit.java
@@ -136,6 +136,8 @@ public class GitBlit implements ServletContextListener {
private MailExecutor mailExecutor;
+ private LuceneExecutor luceneExecutor;
+
private TimeZone timezone;
public GitBlit() {
@@ -1806,10 +1808,18 @@ public class GitBlit implements ServletContextListener {
setUserService(loginService);
mailExecutor = new MailExecutor(settings);
if (mailExecutor.isReady()) {
+ logger.info("Mail executor is scheduled to process the message queue every 2 minutes.");
scheduledExecutor.scheduleAtFixedRate(mailExecutor, 1, 2, TimeUnit.MINUTES);
} else {
logger.warn("Mail server is not properly configured. Mail services disabled.");
}
+ luceneExecutor = new LuceneExecutor(settings);
+ if (luceneExecutor.isReady()) {
+ logger.info("Lucene executor is scheduled to process the repository queue every 10 minutes.");
+ scheduledExecutor.scheduleAtFixedRate(luceneExecutor, 1, 10, TimeUnit.MINUTES);
+ } else {
+ logger.warn("Lucene executor is disabled.");
+ }
if (startFederation) {
configureFederation();
}
diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java
new file mode 100644
index 00000000..32004a73
--- /dev/null
+++ b/src/com/gitblit/LuceneExecutor.java
@@ -0,0 +1,177 @@
+/*
+ * Copyright 2012 gitblit.com.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.gitblit;
+
+import java.text.MessageFormat;
+import java.util.HashSet;
+import java.util.Queue;
+import java.util.Set;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.eclipse.jgit.lib.Repository;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.gitblit.models.RepositoryModel;
+import com.gitblit.utils.JGitUtils;
+import com.gitblit.utils.LuceneUtils;
+
+/**
+ * The Lucene executor handles indexing repositories synchronously and
+ * asynchronously from a queue.
+ *
+ * @author James Moger
+ *
+ */
+public class LuceneExecutor implements Runnable {
+
+ private final Logger logger = LoggerFactory.getLogger(LuceneExecutor.class);
+
+ private final Queue<String> queue = new ConcurrentLinkedQueue<String>();
+
+ private final IStoredSettings settings;
+
+ private final boolean isLuceneEnabled;
+
+ private final boolean isPollingMode;
+
+ private final AtomicBoolean firstRun = new AtomicBoolean(true);
+
+ public LuceneExecutor(IStoredSettings settings) {
+ this.settings = settings;
+ this.isLuceneEnabled = settings.getBoolean("lucene.enableLucene", false);
+ this.isPollingMode = settings.getBoolean("lucene.pollingMode", false);
+ }
+
+ /**
+ * Indicates if the Lucene executor can index repositories.
+ *
+ * @return true if the Lucene executor is ready to index repositories
+ */
+ public boolean isReady() {
+ return isLuceneEnabled;
+ }
+
+ /**
+ * Returns the status of the Lucene queue.
+ *
+ * @return true, if the queue is empty
+ */
+ public boolean hasEmptyQueue() {
+ return queue.isEmpty();
+ }
+
+ /**
+ * Queues a repository to be asynchronously indexed.
+ *
+ * @param repository
+ * @return true if the repository was queued
+ */
+ public boolean queue(RepositoryModel repository) {
+ if (!isReady()) {
+ return false;
+ }
+ queue.add(repository.name);
+ return true;
+ }
+
+ @Override
+ public void run() {
+ if (!isLuceneEnabled) {
+ return;
+ }
+
+ if (firstRun.get() || isPollingMode) {
+ // update all indexes on first run or if polling mode
+ firstRun.set(false);
+ queue.addAll(GitBlit.self().getRepositoryList());
+ }
+
+ Set<String> processed = new HashSet<String>();
+ if (!queue.isEmpty()) {
+ // update the repository Lucene index
+ String repositoryName = null;
+ while ((repositoryName = queue.poll()) != null) {
+ if (processed.contains(repositoryName)) {
+ // skipping multi-queued repository
+ continue;
+ }
+ try {
+ Repository repository = GitBlit.self().getRepository(repositoryName);
+ if (repository == null) {
+ logger.warn(MessageFormat.format(
+ "Lucene executor could not find repository {0}. Skipping.",
+ repositoryName));
+ continue;
+ }
+ index(repositoryName, repository);
+ repository.close();
+ processed.add(repositoryName);
+ } catch (Throwable e) {
+ logger.error(MessageFormat.format("Failed to update {0} Lucene index",
+ repositoryName), e);
+ }
+ }
+ }
+ }
+
+ /**
+ * Synchronously indexes a repository. This may build a complete index of a
+ * repository or it may update an existing index.
+ *
+ * @param repositoryName
+ * the name of the repository
+ * @param repository
+ * the repository object
+ */
+ public void index(String repositoryName, Repository repository) {
+ try {
+ if (JGitUtils.hasCommits(repository)) {
+ if (LuceneUtils.shouldReindex(repository)) {
+ // (re)build the entire index
+ long start = System.currentTimeMillis();
+ boolean success = LuceneUtils.reindex(repository);
+ long duration = System.currentTimeMillis() - start;
+ if (success) {
+ String msg = "Built {0} Lucene index in {1} msecs";
+ logger.info(MessageFormat.format(msg, repositoryName, duration));
+ } else {
+ String msg = "Could not build {0} Lucene index!";
+ logger.error(MessageFormat.format(msg, repositoryName));
+ }
+ } else {
+ // update the index with latest commits
+ long start = System.currentTimeMillis();
+ boolean success = LuceneUtils.updateIndex(repository);
+ long duration = System.currentTimeMillis() - start;
+ if (success) {
+ String msg = "Updated {0} Lucene index in {1} msecs";
+ logger.info(MessageFormat.format(msg, repositoryName, duration));
+ } else {
+ String msg = "Could not update {0} Lucene index!";
+ logger.error(MessageFormat.format(msg, repositoryName));
+ }
+ }
+ } else {
+ logger.info(MessageFormat.format("Skipped Lucene index of empty repository {0}",
+ repositoryName));
+ }
+ } catch (Throwable t) {
+ logger.error(MessageFormat.format("Lucene indexing failure for {0}", repositoryName), t);
+ }
+ }
+}
diff --git a/src/com/gitblit/utils/LuceneUtils.java b/src/com/gitblit/utils/LuceneUtils.java
index 738382a4..eaf02dfb 100644
--- a/src/com/gitblit/utils/LuceneUtils.java
+++ b/src/com/gitblit/utils/LuceneUtils.java
@@ -7,6 +7,7 @@ import java.io.InputStream;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
@@ -47,7 +48,9 @@ import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevWalk;
+import org.eclipse.jgit.storage.file.FileBasedConfig;
import org.eclipse.jgit.treewalk.TreeWalk;
+import org.eclipse.jgit.util.FS;
import com.gitblit.models.IssueModel;
import com.gitblit.models.IssueModel.Attachment;
@@ -81,6 +84,7 @@ public class LuceneUtils {
}
private static final Version LUCENE_VERSION = Version.LUCENE_35;
+ private static final int INDEX_VERSION = 1;
private static final String FIELD_OBJECT_TYPE = "type";
private static final String FIELD_OBJECT_ID = "id";
@@ -94,11 +98,9 @@ public class LuceneUtils {
private static final String FIELD_LABEL = "label";
private static final String FIELD_ATTACHMENT = "attachment";
- private static Set<String> excludedExtensions = new TreeSet<String>(
- Arrays.asList("7z", "arc", "arj", "bin", "bmp", "dll", "doc",
- "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh",
- "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls",
- "xlsx", "zip"));
+ private static Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",
+ "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
+ "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
private static Set<String> excludedBranches = new TreeSet<String>(
Arrays.asList("/refs/heads/gb-issues"));
@@ -106,6 +108,12 @@ public class LuceneUtils {
private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();
private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();
+ private static final String CONF_FILE = "lucene.conf";
+ private static final String CONF_INDEX = "index";
+ private static final String CONF_VERSION = "version";
+ private static final String CONF_ALIAS = "aliases";
+ private static final String CONF_BRANCH = "branches";
+
/**
* Returns the name of the repository.
*
@@ -119,7 +127,49 @@ public class LuceneUtils {
return repository.getDirectory().getParentFile().getName();
}
}
-
+
+ /**
+ * Construct a keyname from the branch.
+ *
+ * @param branchName
+ * @return a keyname appropriate for the Git config file format
+ */
+ private static String getBranchKey(String branchName) {
+ return StringUtils.getSHA1(branchName);
+ }
+
+ /**
+ * Returns the Lucene configuration for the specified repository.
+ *
+ * @param repository
+ * @return a config object
+ */
+ private static FileBasedConfig getConfig(Repository repository) {
+ File file = new File(repository.getDirectory(), CONF_FILE);
+ FileBasedConfig config = new FileBasedConfig(file, FS.detect());
+ return config;
+ }
+
+ /**
+ * Reads the Lucene config file for the repository to check the index
+ * version. If the index version is different, then rebuild the repository
+ * index.
+ *
+ * @param repository
+ * @return true of the on-disk index format is different than INDEX_VERSION
+ */
+ public static boolean shouldReindex(Repository repository) {
+ try {
+ FileBasedConfig config = getConfig(repository);
+ config.load();
+ int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
+ // reindex if versions do not match
+ return indexVersion != INDEX_VERSION;
+ } catch (Throwable t) {
+ }
+ return true;
+ }
+
/**
* Deletes the Lucene index for the specified repository.
*
@@ -133,6 +183,10 @@ public class LuceneUtils {
org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
org.eclipse.jgit.util.FileUtils.RECURSIVE);
}
+ File luceneConfig = new File(repository.getDirectory(), CONF_FILE);
+ if (luceneConfig.exists()) {
+ luceneConfig.delete();
+ }
return true;
} catch (IOException e) {
throw new RuntimeException(e);
@@ -146,14 +200,22 @@ public class LuceneUtils {
* @param repository
* @return true if the indexing has succeeded
*/
- public static boolean index(Repository repository) {
+ public static boolean reindex(Repository repository) {
+ if (!LuceneUtils.deleteIndex(repository)) {
+ return false;
+ }
try {
String repositoryName = getName(repository);
+ FileBasedConfig config = getConfig(repository);
Set<String> indexedCommits = new TreeSet<String>();
IndexWriter writer = getIndexWriter(repository, true);
// build a quick lookup of tags
Map<String, List<String>> tags = new HashMap<String, List<String>>();
for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
+ if (!tag.isAnnotatedTag()) {
+ // skip non-annotated tags
+ continue;
+ }
if (!tags.containsKey(tag.getObjectId())) {
tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
}
@@ -170,6 +232,10 @@ public class LuceneUtils {
RevWalk revWalk = new RevWalk(repository);
RevCommit rev = revWalk.parseCommit(branch.getObjectId());
+ String keyName = getBranchKey(branchName);
+ config.setString(CONF_ALIAS, null, keyName, branchName);
+ config.setString(CONF_BRANCH, null, keyName, rev.getName());
+
// index the blob contents of the tree
ByteArrayOutputStream os = new ByteArrayOutputStream();
byte[] tmp = new byte[32767];
@@ -184,8 +250,7 @@ public class LuceneUtils {
Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
Index.NOT_ANALYZED));
- doc.add(new Field(FIELD_BRANCH, branchName, Store.YES,
- Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES,
Index.NOT_ANALYZED));
doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
@@ -233,8 +298,7 @@ public class LuceneUtils {
Document doc = createDocument(rev, tags.get(head));
doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
Index.NOT_ANALYZED));
- doc.add(new Field(FIELD_BRANCH, branchName, Store.YES,
- Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
writer.addDocument(doc);
}
@@ -246,8 +310,7 @@ public class LuceneUtils {
Document doc = createDocument(rev, tags.get(hash));
doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
Index.NOT_ANALYZED));
- doc.add(new Field(FIELD_BRANCH, branchName, Store.YES,
- Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
writer.addDocument(doc);
}
}
@@ -268,6 +331,8 @@ public class LuceneUtils {
}
// commit all changes and reset the searcher
+ config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
+ config.save();
resetIndexSearcher(repository);
writer.commit();
return true;
@@ -288,13 +353,22 @@ public class LuceneUtils {
* @return true, if successful
*/
public static boolean index(Repository repository, String branch, RevCommit commit) {
- try {
+ try {
if (excludedBranches.contains(branch)) {
if (IssueUtils.GB_ISSUES.equals(branch)) {
// index an issue
String issueId = commit.getShortMessage().substring(2).trim();
IssueModel issue = IssueUtils.getIssue(repository, issueId);
- return index(repository, issue, true);
+ if (issue == null) {
+ // delete the old issue from the index, if exists
+ IndexWriter writer = getIndexWriter(repository, false);
+ writer.deleteDocuments(
+ new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
+ FIELD_OBJECT_ID, issueId));
+ writer.commit();
+ return true;
+ }
+ return index(repository, issue);
}
return false;
}
@@ -306,9 +380,8 @@ public class LuceneUtils {
for (PathChangeModel path : changedPaths) {
// delete the indexed blob
writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()),
- new Term(FIELD_BRANCH, branch),
- new Term(FIELD_OBJECT_ID, path.path));
-
+ new Term(FIELD_BRANCH, branch), new Term(FIELD_OBJECT_ID, path.path));
+
// re-index the blob
if (!ChangeType.DELETE.equals(path.changeType)) {
Document doc = new Document();
@@ -317,8 +390,7 @@ public class LuceneUtils {
doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
Index.NOT_ANALYZED));
doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.NOT_ANALYZED));
- doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES,
- Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES, Index.NOT_ANALYZED));
doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
doc.add(new Field(FIELD_AUTHOR, commit.getAuthorIdent().getName(), Store.YES,
Index.NOT_ANALYZED_NO_NORMS));
@@ -336,15 +408,15 @@ public class LuceneUtils {
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
// read the blob content
- String str = JGitUtils.getStringContent(repository,
- commit.getTree(), path.path);
+ String str = JGitUtils.getStringContent(repository, commit.getTree(),
+ path.path);
doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));
writer.addDocument(doc);
}
}
}
writer.commit();
-
+
Document doc = createDocument(commit, null);
return index(repository, doc);
} catch (Exception e) {
@@ -359,21 +431,17 @@ public class LuceneUtils {
*
* @param repository
* @param issue
- * @param reindex
- * if true, the old index entry for this issue will be deleted.
- * This is only appropriate for pre-existing/indexed issues.
* @return true, if successful
*/
- public static boolean index(Repository repository, IssueModel issue, boolean reindex) {
+ public static boolean index(Repository repository, IssueModel issue) {
try {
+ // delete the old issue from the index, if exists
+ IndexWriter writer = getIndexWriter(repository, false);
+ writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
+ FIELD_OBJECT_ID, String.valueOf(issue.id)));
+ writer.commit();
+
Document doc = createDocument(issue);
- if (reindex) {
- // delete the old issue from the index, if exists
- IndexWriter writer = getIndexWriter(repository, false);
- writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()),
- new Term(FIELD_OBJECT_ID, String.valueOf(issue.id)));
- writer.commit();
- }
return index(repository, doc);
} catch (Exception e) {
e.printStackTrace();
@@ -382,6 +450,68 @@ public class LuceneUtils {
}
/**
+ * Updates a repository index incrementally from the last indexed commits.
+ *
+ * @param repository
+ */
+ public static boolean updateIndex(Repository repository) {
+ boolean success = false;
+ try {
+ FileBasedConfig config = getConfig(repository);
+ config.load();
+
+ // build a quick lookup of annotated tags
+ Map<String, List<String>> tags = new HashMap<String, List<String>>();
+ for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
+ if (!tag.isAnnotatedTag()) {
+ // skip non-annotated tags
+ continue;
+ }
+ if (!tags.containsKey(tag.getObjectId())) {
+ tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
+ }
+ tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
+ }
+
+ List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
+ // TODO detect branch deletion
+
+ // walk through each branch
+ for (RefModel branch : branches) {
+ // determine last commit
+ String branchName = branch.getName();
+ String keyName = getBranchKey(branchName);
+ String lastCommit = config.getString(CONF_BRANCH, null, keyName);
+
+ List<RevCommit> revs;
+ if (StringUtils.isEmpty(lastCommit)) {
+ // new branch/unindexed branch, get all commits on branch
+ revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
+ } else {
+ // pre-existing branch, get changes since last commit
+ revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
+ }
+
+ // reverse the list of commits so we start with the first commit
+ Collections.reverse(revs);
+ for (RevCommit commit : revs) {
+ index(repository, branchName, commit);
+ }
+
+ // update the config
+ config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
+ config.setString(CONF_ALIAS, null, keyName, branchName);
+ config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
+ config.save();
+ }
+ success = true;
+ } catch (Throwable t) {
+ t.printStackTrace();
+ }
+ return success;
+ }
+
+ /**
* Creates a Lucene document from an issue.
*
* @param issue
@@ -446,8 +576,7 @@ public class LuceneUtils {
private static boolean index(Repository repository, Document doc) {
try {
String repositoryName = getName(repository);
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
- Index.NOT_ANALYZED));
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));
IndexWriter writer = getIndexWriter(repository, false);
writer.addDocument(doc);
resetIndexSearcher(repository);
@@ -587,9 +716,9 @@ public class LuceneUtils {
for (Repository repository : repositories) {
IndexSearcher repositoryIndex = getIndexSearcher(repository);
readers.add(repositoryIndex.getIndexReader());
- }
- IndexReader [] rdrs = readers.toArray(new IndexReader[readers.size()]);
- MultiReader reader = new MultiReader(rdrs);
+ }
+ IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
+ MultiReader reader = new MultiReader(rdrs);
searcher = new IndexSearcher(reader);
}
Query rewrittenQuery = searcher.rewrite(query);
@@ -606,7 +735,7 @@ public class LuceneUtils {
e.printStackTrace();
}
return new ArrayList<SearchResult>(results);
- }
+ }
/**
* Close all the index writers and searchers