--- /dev/null
+/*\r
+ * Copyright 2012 gitblit.com.\r
+ *\r
+ * Licensed under the Apache License, Version 2.0 (the "License");\r
+ * you may not use this file except in compliance with the License.\r
+ * You may obtain a copy of the License at\r
+ *\r
+ * http://www.apache.org/licenses/LICENSE-2.0\r
+ *\r
+ * Unless required by applicable law or agreed to in writing, software\r
+ * distributed under the License is distributed on an "AS IS" BASIS,\r
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ * See the License for the specific language governing permissions and\r
+ * limitations under the License.\r
+ */\r
+package com.gitblit;\r
+\r
+import java.text.MessageFormat;\r
+import java.util.HashSet;\r
+import java.util.Queue;\r
+import java.util.Set;\r
+import java.util.concurrent.ConcurrentLinkedQueue;\r
+import java.util.concurrent.atomic.AtomicBoolean;\r
+\r
+import org.eclipse.jgit.lib.Repository;\r
+import org.slf4j.Logger;\r
+import org.slf4j.LoggerFactory;\r
+\r
+import com.gitblit.models.RepositoryModel;\r
+import com.gitblit.utils.JGitUtils;\r
+import com.gitblit.utils.LuceneUtils;\r
+\r
+/**\r
+ * The Lucene executor handles indexing repositories synchronously and\r
+ * asynchronously from a queue.\r
+ * \r
+ * @author James Moger\r
+ * \r
+ */\r
+public class LuceneExecutor implements Runnable {\r
+\r
+ private final Logger logger = LoggerFactory.getLogger(LuceneExecutor.class);\r
+\r
+ private final Queue<String> queue = new ConcurrentLinkedQueue<String>();\r
+\r
+ private final IStoredSettings settings;\r
+\r
+ private final boolean isLuceneEnabled;\r
+\r
+ private final boolean isPollingMode;\r
+\r
+ private final AtomicBoolean firstRun = new AtomicBoolean(true);\r
+\r
+ public LuceneExecutor(IStoredSettings settings) {\r
+ this.settings = settings;\r
+ this.isLuceneEnabled = settings.getBoolean("lucene.enableLucene", false);\r
+ this.isPollingMode = settings.getBoolean("lucene.pollingMode", false); \r
+ }\r
+\r
+ /**\r
+ * Indicates if the Lucene executor can index repositories.\r
+ * \r
+ * @return true if the Lucene executor is ready to index repositories\r
+ */\r
+ public boolean isReady() {\r
+ return isLuceneEnabled;\r
+ }\r
+\r
+ /**\r
+ * Returns the status of the Lucene queue.\r
+ * \r
+ * @return true, if the queue is empty\r
+ */\r
+ public boolean hasEmptyQueue() {\r
+ return queue.isEmpty();\r
+ }\r
+\r
+ /**\r
+ * Queues a repository to be asynchronously indexed.\r
+ * \r
+ * @param repository\r
+ * @return true if the repository was queued\r
+ */\r
+ public boolean queue(RepositoryModel repository) {\r
+ if (!isReady()) {\r
+ return false;\r
+ }\r
+ queue.add(repository.name);\r
+ return true;\r
+ }\r
+\r
+ @Override\r
+ public void run() {\r
+ if (!isLuceneEnabled) {\r
+ return;\r
+ }\r
+ \r
+ if (firstRun.get() || isPollingMode) {\r
+ // update all indexes on first run or if polling mode \r
+ firstRun.set(false);\r
+ queue.addAll(GitBlit.self().getRepositoryList());\r
+ }\r
+\r
+ Set<String> processed = new HashSet<String>();\r
+ if (!queue.isEmpty()) {\r
+ // update the repository Lucene index\r
+ String repositoryName = null;\r
+ while ((repositoryName = queue.poll()) != null) {\r
+ if (processed.contains(repositoryName)) {\r
+ // skipping multi-queued repository\r
+ continue;\r
+ }\r
+ try {\r
+ Repository repository = GitBlit.self().getRepository(repositoryName);\r
+ if (repository == null) {\r
+ logger.warn(MessageFormat.format(\r
+ "Lucene executor could not find repository {0}. Skipping.",\r
+ repositoryName));\r
+ continue;\r
+ } \r
+ index(repositoryName, repository);\r
+ repository.close();\r
+ processed.add(repositoryName);\r
+ } catch (Throwable e) {\r
+ logger.error(MessageFormat.format("Failed to update {0} Lucene index",\r
+ repositoryName), e);\r
+ }\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Synchronously indexes a repository. This may build a complete index of a\r
+ * repository or it may update an existing index.\r
+ * \r
+ * @param repositoryName\r
+ * the name of the repository\r
+ * @param repository\r
+ * the repository object\r
+ */\r
+ public void index(String repositoryName, Repository repository) {\r
+ try {\r
+ if (JGitUtils.hasCommits(repository)) {\r
+ if (LuceneUtils.shouldReindex(repository)) {\r
+ // (re)build the entire index\r
+ long start = System.currentTimeMillis();\r
+ boolean success = LuceneUtils.reindex(repository);\r
+ long duration = System.currentTimeMillis() - start;\r
+ if (success) {\r
+ String msg = "Built {0} Lucene index in {1} msecs";\r
+ logger.info(MessageFormat.format(msg, repositoryName, duration));\r
+ } else {\r
+ String msg = "Could not build {0} Lucene index!";\r
+ logger.error(MessageFormat.format(msg, repositoryName));\r
+ }\r
+ } else {\r
+ // update the index with latest commits\r
+ long start = System.currentTimeMillis();\r
+ boolean success = LuceneUtils.updateIndex(repository);\r
+ long duration = System.currentTimeMillis() - start;\r
+ if (success) {\r
+ String msg = "Updated {0} Lucene index in {1} msecs";\r
+ logger.info(MessageFormat.format(msg, repositoryName, duration));\r
+ } else {\r
+ String msg = "Could not update {0} Lucene index!";\r
+ logger.error(MessageFormat.format(msg, repositoryName));\r
+ }\r
+ }\r
+ } else {\r
+ logger.info(MessageFormat.format("Skipped Lucene index of empty repository {0}",\r
+ repositoryName));\r
+ }\r
+ } catch (Throwable t) {\r
+ logger.error(MessageFormat.format("Lucene indexing failure for {0}", repositoryName), t);\r
+ }\r
+ }\r
+}\r
import java.text.ParseException;\r
import java.util.ArrayList;\r
import java.util.Arrays;\r
+import java.util.Collections;\r
import java.util.HashMap;\r
import java.util.LinkedHashSet;\r
import java.util.List;\r
import org.eclipse.jgit.revwalk.RevCommit;\r
import org.eclipse.jgit.revwalk.RevObject;\r
import org.eclipse.jgit.revwalk.RevWalk;\r
+import org.eclipse.jgit.storage.file.FileBasedConfig;\r
import org.eclipse.jgit.treewalk.TreeWalk;\r
+import org.eclipse.jgit.util.FS;\r
\r
import com.gitblit.models.IssueModel;\r
import com.gitblit.models.IssueModel.Attachment;\r
}\r
\r
private static final Version LUCENE_VERSION = Version.LUCENE_35;\r
+ private static final int INDEX_VERSION = 1;\r
\r
private static final String FIELD_OBJECT_TYPE = "type";\r
private static final String FIELD_OBJECT_ID = "id";\r
private static final String FIELD_LABEL = "label";\r
private static final String FIELD_ATTACHMENT = "attachment";\r
\r
- private static Set<String> excludedExtensions = new TreeSet<String>(\r
- Arrays.asList("7z", "arc", "arj", "bin", "bmp", "dll", "doc",\r
- "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", \r
- "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls",\r
- "xlsx", "zip"));\r
+ private static Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",\r
+ "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",\r
+ "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));\r
\r
private static Set<String> excludedBranches = new TreeSet<String>(\r
Arrays.asList("/refs/heads/gb-issues"));\r
private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();\r
private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();\r
\r
+ private static final String CONF_FILE = "lucene.conf";\r
+ private static final String CONF_INDEX = "index";\r
+ private static final String CONF_VERSION = "version";\r
+ private static final String CONF_ALIAS = "aliases";\r
+ private static final String CONF_BRANCH = "branches";\r
+\r
/**\r
* Returns the name of the repository.\r
* \r
return repository.getDirectory().getParentFile().getName();\r
}\r
}\r
- \r
+\r
+ /**\r
+ * Construct a keyname from the branch.\r
+ * \r
+ * @param branchName\r
+ * @return a keyname appropriate for the Git config file format\r
+ */\r
+ private static String getBranchKey(String branchName) {\r
+ return StringUtils.getSHA1(branchName);\r
+ }\r
+\r
+ /**\r
+ * Returns the Lucene configuration for the specified repository.\r
+ * \r
+ * @param repository\r
+ * @return a config object\r
+ */\r
+ private static FileBasedConfig getConfig(Repository repository) {\r
+ File file = new File(repository.getDirectory(), CONF_FILE);\r
+ FileBasedConfig config = new FileBasedConfig(file, FS.detect());\r
+ return config;\r
+ }\r
+\r
+ /**\r
+ * Reads the Lucene config file for the repository to check the index\r
+ * version. If the index version is different, then rebuild the repository\r
+ * index.\r
+ * \r
+ * @param repository\r
+ * @return true of the on-disk index format is different than INDEX_VERSION\r
+ */\r
+ public static boolean shouldReindex(Repository repository) {\r
+ try {\r
+ FileBasedConfig config = getConfig(repository);\r
+ config.load();\r
+ int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);\r
+ // reindex if versions do not match\r
+ return indexVersion != INDEX_VERSION;\r
+ } catch (Throwable t) {\r
+ }\r
+ return true;\r
+ }\r
+\r
/**\r
* Deletes the Lucene index for the specified repository.\r
* \r
org.eclipse.jgit.util.FileUtils.delete(luceneIndex,\r
org.eclipse.jgit.util.FileUtils.RECURSIVE);\r
}\r
+ File luceneConfig = new File(repository.getDirectory(), CONF_FILE);\r
+ if (luceneConfig.exists()) {\r
+ luceneConfig.delete();\r
+ }\r
return true;\r
} catch (IOException e) {\r
throw new RuntimeException(e);\r
* @param repository\r
* @return true if the indexing has succeeded\r
*/\r
- public static boolean index(Repository repository) {\r
+ public static boolean reindex(Repository repository) {\r
+ if (!LuceneUtils.deleteIndex(repository)) {\r
+ return false;\r
+ }\r
try {\r
String repositoryName = getName(repository);\r
+ FileBasedConfig config = getConfig(repository);\r
Set<String> indexedCommits = new TreeSet<String>();\r
IndexWriter writer = getIndexWriter(repository, true);\r
// build a quick lookup of tags\r
Map<String, List<String>> tags = new HashMap<String, List<String>>();\r
for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {\r
+ if (!tag.isAnnotatedTag()) {\r
+ // skip non-annotated tags\r
+ continue;\r
+ }\r
if (!tags.containsKey(tag.getObjectId())) {\r
tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());\r
}\r
RevWalk revWalk = new RevWalk(repository);\r
RevCommit rev = revWalk.parseCommit(branch.getObjectId());\r
\r
+ String keyName = getBranchKey(branchName);\r
+ config.setString(CONF_ALIAS, null, keyName, branchName);\r
+ config.setString(CONF_BRANCH, null, keyName, rev.getName());\r
+\r
// index the blob contents of the tree\r
ByteArrayOutputStream os = new ByteArrayOutputStream();\r
byte[] tmp = new byte[32767];\r
Index.NOT_ANALYZED_NO_NORMS));\r
doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
Index.NOT_ANALYZED));\r
- doc.add(new Field(FIELD_BRANCH, branchName, Store.YES,\r
- Index.NOT_ANALYZED));\r
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));\r
doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES,\r
Index.NOT_ANALYZED));\r
doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));\r
Document doc = createDocument(rev, tags.get(head));\r
doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
Index.NOT_ANALYZED));\r
- doc.add(new Field(FIELD_BRANCH, branchName, Store.YES,\r
- Index.NOT_ANALYZED));\r
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));\r
writer.addDocument(doc);\r
}\r
\r
Document doc = createDocument(rev, tags.get(hash));\r
doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
Index.NOT_ANALYZED));\r
- doc.add(new Field(FIELD_BRANCH, branchName, Store.YES,\r
- Index.NOT_ANALYZED));\r
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));\r
writer.addDocument(doc);\r
}\r
}\r
}\r
\r
// commit all changes and reset the searcher\r
+ config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);\r
+ config.save();\r
resetIndexSearcher(repository);\r
writer.commit();\r
return true;\r
* @return true, if successful\r
*/\r
public static boolean index(Repository repository, String branch, RevCommit commit) {\r
- try { \r
+ try {\r
if (excludedBranches.contains(branch)) {\r
if (IssueUtils.GB_ISSUES.equals(branch)) {\r
// index an issue\r
String issueId = commit.getShortMessage().substring(2).trim();\r
IssueModel issue = IssueUtils.getIssue(repository, issueId);\r
- return index(repository, issue, true);\r
+ if (issue == null) {\r
+ // delete the old issue from the index, if exists\r
+ IndexWriter writer = getIndexWriter(repository, false);\r
+ writer.deleteDocuments(\r
+ new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(\r
+ FIELD_OBJECT_ID, issueId));\r
+ writer.commit();\r
+ return true;\r
+ }\r
+ return index(repository, issue);\r
}\r
return false;\r
}\r
for (PathChangeModel path : changedPaths) {\r
// delete the indexed blob\r
writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()),\r
- new Term(FIELD_BRANCH, branch),\r
- new Term(FIELD_OBJECT_ID, path.path));\r
- \r
+ new Term(FIELD_BRANCH, branch), new Term(FIELD_OBJECT_ID, path.path));\r
+\r
// re-index the blob\r
if (!ChangeType.DELETE.equals(path.changeType)) {\r
Document doc = new Document();\r
doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
Index.NOT_ANALYZED));\r
doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.NOT_ANALYZED));\r
- doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES,\r
- Index.NOT_ANALYZED));\r
+ doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES, Index.NOT_ANALYZED));\r
doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));\r
doc.add(new Field(FIELD_AUTHOR, commit.getAuthorIdent().getName(), Store.YES,\r
Index.NOT_ANALYZED_NO_NORMS));\r
\r
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {\r
// read the blob content\r
- String str = JGitUtils.getStringContent(repository, \r
- commit.getTree(), path.path);\r
+ String str = JGitUtils.getStringContent(repository, commit.getTree(),\r
+ path.path);\r
doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));\r
writer.addDocument(doc);\r
}\r
}\r
}\r
writer.commit();\r
- \r
+\r
Document doc = createDocument(commit, null);\r
return index(repository, doc);\r
} catch (Exception e) {\r
* \r
* @param repository\r
* @param issue\r
- * @param reindex\r
- * if true, the old index entry for this issue will be deleted.\r
- * This is only appropriate for pre-existing/indexed issues.\r
* @return true, if successful\r
*/\r
- public static boolean index(Repository repository, IssueModel issue, boolean reindex) {\r
+ public static boolean index(Repository repository, IssueModel issue) {\r
try {\r
+ // delete the old issue from the index, if exists\r
+ IndexWriter writer = getIndexWriter(repository, false);\r
+ writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(\r
+ FIELD_OBJECT_ID, String.valueOf(issue.id)));\r
+ writer.commit();\r
+\r
Document doc = createDocument(issue);\r
- if (reindex) {\r
- // delete the old issue from the index, if exists\r
- IndexWriter writer = getIndexWriter(repository, false);\r
- writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()),\r
- new Term(FIELD_OBJECT_ID, String.valueOf(issue.id)));\r
- writer.commit();\r
- }\r
return index(repository, doc);\r
} catch (Exception e) {\r
e.printStackTrace();\r
return false;\r
}\r
\r
+ /**\r
+ * Updates a repository index incrementally from the last indexed commits.\r
+ * \r
+ * @param repository\r
+ */\r
+ public static boolean updateIndex(Repository repository) {\r
+ boolean success = false;\r
+ try {\r
+ FileBasedConfig config = getConfig(repository);\r
+ config.load();\r
+\r
+ // build a quick lookup of annotated tags\r
+ Map<String, List<String>> tags = new HashMap<String, List<String>>();\r
+ for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {\r
+ if (!tag.isAnnotatedTag()) {\r
+ // skip non-annotated tags\r
+ continue;\r
+ }\r
+ if (!tags.containsKey(tag.getObjectId())) {\r
+ tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());\r
+ }\r
+ tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);\r
+ }\r
+\r
+ List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);\r
+ // TODO detect branch deletion\r
+\r
+ // walk through each branch\r
+ for (RefModel branch : branches) {\r
+ // determine last commit\r
+ String branchName = branch.getName();\r
+ String keyName = getBranchKey(branchName);\r
+ String lastCommit = config.getString(CONF_BRANCH, null, keyName);\r
+\r
+ List<RevCommit> revs;\r
+ if (StringUtils.isEmpty(lastCommit)) {\r
+ // new branch/unindexed branch, get all commits on branch\r
+ revs = JGitUtils.getRevLog(repository, branchName, 0, -1);\r
+ } else {\r
+ // pre-existing branch, get changes since last commit\r
+ revs = JGitUtils.getRevLog(repository, lastCommit, branchName);\r
+ }\r
+\r
+ // reverse the list of commits so we start with the first commit\r
+ Collections.reverse(revs);\r
+ for (RevCommit commit : revs) {\r
+ index(repository, branchName, commit);\r
+ }\r
+\r
+ // update the config\r
+ config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);\r
+ config.setString(CONF_ALIAS, null, keyName, branchName);\r
+ config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());\r
+ config.save();\r
+ }\r
+ success = true;\r
+ } catch (Throwable t) {\r
+ t.printStackTrace();\r
+ }\r
+ return success;\r
+ }\r
+\r
/**\r
* Creates a Lucene document from an issue.\r
* \r
private static boolean index(Repository repository, Document doc) {\r
try {\r
String repositoryName = getName(repository);\r
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
- Index.NOT_ANALYZED));\r
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));\r
IndexWriter writer = getIndexWriter(repository, false);\r
writer.addDocument(doc);\r
resetIndexSearcher(repository);\r
for (Repository repository : repositories) {\r
IndexSearcher repositoryIndex = getIndexSearcher(repository);\r
readers.add(repositoryIndex.getIndexReader());\r
- } \r
- IndexReader [] rdrs = readers.toArray(new IndexReader[readers.size()]);\r
- MultiReader reader = new MultiReader(rdrs); \r
+ }\r
+ IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);\r
+ MultiReader reader = new MultiReader(rdrs);\r
searcher = new IndexSearcher(reader);\r
}\r
Query rewrittenQuery = searcher.rewrite(query);\r
e.printStackTrace();\r
}\r
return new ArrayList<SearchResult>(results);\r
- } \r
+ }\r
\r
/**\r
* Close all the index writers and searchers\r