groovy.postReceiveScripts =\r
\r
# If true, a Lucene index will be generated and maintained for each repository.\r
-# Lucene search replaces brute-force Git repository traversal.\r
+# Lucene search replaces brute-force Git repository traversal. Initial indexing\r
+# or reindexing of a repository can be memory intensive so be advised that you\r
+# may need to adjust your JVM heap setting accordingly (e.g. -Xmx1024M)\r
#\r
# SINCE 0.9.0\r
# RESTART REQUIRED\r
lucene.enable = false\r
\r
-# If *lucene.pollingMode* = true, Gitblit will periodically check all repositories\r
-# for branch updates.\r
-# If *lucene.pollingMode* = false, repositories will only be indexed on pushes\r
-# to Gitblit.\r
+# This value specifies the idle period for the Lucene executor to wait between\r
+# repository ref checks. If refs have been changed since the last check, the\r
+# executor will incrementally index the changes.\r
#\r
-# Regardless of this setting, Gitblit will check all repositories for branch\r
-# updates 1 minute after startup. Indexes will automatically be built for any\r
-# repository that is missing its index or if an index version change is detected.\r
+# Gitblit will check all repositories for branch updates 1 minute after startup.\r
+# Indexes will automatically be built for any repository that is missing its index\r
+# or if an index version change is detected.\r
+#\r
+# The shortest frequency allowed is every 2 minutes\r
+# Decimal frequency values are cast to integers\r
+# Frequency values may be specified in mins, hours, or days\r
#\r
# SINCE 0.9.0\r
# RESTART REQUIRED\r
-lucene.pollingMode = false\r
+lucene.frequency = 2 mins\r
\r
#\r
# Authentication Settings\r
*/\r
package com.gitblit;\r
\r
+\r
/**\r
* Constant values used by Gitblit.\r
* \r
return name().toLowerCase();\r
}\r
}\r
+ \r
+ /**\r
+ * The types of objects that can be indexed and queried.\r
+ */\r
+ public static enum SearchObjectType {\r
+ commit, blob, issue;\r
+\r
+ static SearchObjectType fromName(String name) {\r
+ for (SearchObjectType value : values()) {\r
+ if (value.name().equals(name)) {\r
+ return value;\r
+ }\r
+ }\r
+ return null;\r
+ }\r
+ }\r
}\r
import com.gitblit.models.FederationSet;\r
import com.gitblit.models.Metric;\r
import com.gitblit.models.RepositoryModel;\r
+import com.gitblit.models.SearchResult;\r
import com.gitblit.models.ServerSettings;\r
import com.gitblit.models.ServerStatus;\r
import com.gitblit.models.SettingModel;\r
import com.gitblit.utils.MetricUtils;\r
import com.gitblit.utils.ObjectCache;\r
import com.gitblit.utils.StringUtils;\r
+import com.gitblit.utils.TimeUtils;\r
\r
/**\r
* GitBlit is the servlet context listener singleton that acts as the core for\r
}\r
return scripts;\r
}\r
+ \r
+ /**\r
+ * Search the specified repositories using the Lucene query.\r
+ * \r
+ * @param query\r
+ * @param maximumHits\r
+ * @param repositories\r
+ * @return\r
+ */\r
+ public List<SearchResult> search(String query, int maximumHits, List<String> repositories) {\r
+ List<SearchResult> srs = luceneExecutor.search(query, maximumHits, repositories);\r
+ return srs;\r
+ }\r
\r
/**\r
* Notify the administrators by email.\r
}\r
}\r
\r
- /**\r
- * Update the Lucene index of a repository.\r
- * \r
- * @param repository\r
- */\r
- public void updateLuceneIndex(RepositoryModel repository) {\r
- luceneExecutor.queue(repository);\r
- }\r
-\r
/**\r
* Returns the descriptions/comments of the Gitblit config settings.\r
* \r
} else {\r
logger.warn("Mail server is not properly configured. Mail services disabled.");\r
}\r
- luceneExecutor = new LuceneExecutor(settings);\r
+ luceneExecutor = new LuceneExecutor(settings, repositoriesFolder);\r
if (luceneExecutor.isReady()) {\r
- logger.info("Lucene executor is scheduled to process the repository queue every 2 minutes.");\r
- scheduledExecutor.scheduleAtFixedRate(luceneExecutor, 1, 2, TimeUnit.MINUTES);\r
+ String idle = settings.getString(Keys.lucene.frequency, "2 mins");\r
+ int mins = TimeUtils.convertFrequencyToMinutes(idle);\r
+ if (mins <= 2) {\r
+ mins = 2;\r
+ idle = mins + " mins";\r
+ }\r
+ logger.info("Lucene executor is scheduled to process ref changes every " + idle);\r
+ scheduledExecutor.scheduleAtFixedRate(luceneExecutor, 1, mins, TimeUnit.MINUTES);\r
} else {\r
- logger.warn("Lucene executor is disabled.");\r
+ logger.warn("Lucene integration is disabled.");\r
}\r
if (startFederation) {\r
configureFederation();\r
\r
// Experimental\r
// runNativeScript(rp, "hooks/post-receive", commands);\r
- \r
- // Update the Lucene search index\r
- GitBlit.self().updateLuceneIndex(repository);\r
}\r
\r
/**\r
*/\r
package com.gitblit;\r
\r
+import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;\r
+\r
+import java.io.ByteArrayOutputStream;\r
+import java.io.File;\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
import java.text.MessageFormat;\r
-import java.util.HashSet;\r
-import java.util.Queue;\r
+import java.text.ParseException;\r
+import java.util.ArrayList;\r
+import java.util.Arrays;\r
+import java.util.Collections;\r
+import java.util.Comparator;\r
+import java.util.HashMap;\r
+import java.util.LinkedHashSet;\r
+import java.util.List;\r
+import java.util.Map;\r
import java.util.Set;\r
-import java.util.concurrent.ConcurrentLinkedQueue;\r
-import java.util.concurrent.atomic.AtomicBoolean;\r
+import java.util.TreeMap;\r
+import java.util.TreeSet;\r
+import java.util.concurrent.ConcurrentHashMap;\r
\r
+import org.apache.lucene.analysis.Analyzer;\r
+import org.apache.lucene.analysis.standard.StandardAnalyzer;\r
+import org.apache.lucene.document.DateTools;\r
+import org.apache.lucene.document.DateTools.Resolution;\r
+import org.apache.lucene.document.Document;\r
+import org.apache.lucene.document.Field;\r
+import org.apache.lucene.document.Field.Index;\r
+import org.apache.lucene.document.Field.Store;\r
+import org.apache.lucene.index.IndexReader;\r
+import org.apache.lucene.index.IndexWriter;\r
+import org.apache.lucene.index.IndexWriterConfig;\r
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;\r
+import org.apache.lucene.index.MultiReader;\r
+import org.apache.lucene.index.Term;\r
+import org.apache.lucene.queryParser.QueryParser;\r
+import org.apache.lucene.search.BooleanClause.Occur;\r
+import org.apache.lucene.search.BooleanQuery;\r
+import org.apache.lucene.search.IndexSearcher;\r
+import org.apache.lucene.search.Query;\r
+import org.apache.lucene.search.ScoreDoc;\r
+import org.apache.lucene.search.TermQuery;\r
+import org.apache.lucene.search.TopScoreDocCollector;\r
+import org.apache.lucene.search.highlight.Fragmenter;\r
+import org.apache.lucene.search.highlight.Highlighter;\r
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;\r
+import org.apache.lucene.search.highlight.QueryScorer;\r
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;\r
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;\r
+import org.apache.lucene.store.Directory;\r
+import org.apache.lucene.store.FSDirectory;\r
+import org.apache.lucene.util.Version;\r
+import org.eclipse.jgit.diff.DiffEntry.ChangeType;\r
+import org.eclipse.jgit.lib.Constants;\r
+import org.eclipse.jgit.lib.ObjectId;\r
+import org.eclipse.jgit.lib.ObjectLoader;\r
+import org.eclipse.jgit.lib.ObjectReader;\r
import org.eclipse.jgit.lib.Repository;\r
+import org.eclipse.jgit.revwalk.RevCommit;\r
+import org.eclipse.jgit.revwalk.RevTree;\r
+import org.eclipse.jgit.revwalk.RevWalk;\r
+import org.eclipse.jgit.storage.file.FileBasedConfig;\r
+import org.eclipse.jgit.treewalk.EmptyTreeIterator;\r
+import org.eclipse.jgit.treewalk.TreeWalk;\r
+import org.eclipse.jgit.util.FS;\r
import org.slf4j.Logger;\r
import org.slf4j.LoggerFactory;\r
\r
-import com.gitblit.models.RepositoryModel;\r
+import com.gitblit.Constants.SearchObjectType;\r
+import com.gitblit.models.IssueModel;\r
+import com.gitblit.models.IssueModel.Attachment;\r
+import com.gitblit.models.PathModel.PathChangeModel;\r
+import com.gitblit.models.RefModel;\r
+import com.gitblit.models.SearchResult;\r
+import com.gitblit.utils.ArrayUtils;\r
+import com.gitblit.utils.IssueUtils;\r
import com.gitblit.utils.JGitUtils;\r
-import com.gitblit.utils.LuceneUtils;\r
-import com.gitblit.utils.LuceneUtils.IndexResult;\r
+import com.gitblit.utils.StringUtils;\r
\r
/**\r
- * The Lucene executor handles indexing repositories synchronously and\r
- * asynchronously from a queue.\r
+ * The Lucene executor handles indexing and searching repositories.\r
* \r
* @author James Moger\r
* \r
*/\r
public class LuceneExecutor implements Runnable {\r
+ \r
+ \r
+ private static final int INDEX_VERSION = 1;\r
\r
- private final Logger logger = LoggerFactory.getLogger(LuceneExecutor.class);\r
-\r
- private final Queue<String> queue = new ConcurrentLinkedQueue<String>();\r
-\r
- private final IStoredSettings settings;\r
-\r
- private final boolean isLuceneEnabled;\r
+ private static final String FIELD_OBJECT_TYPE = "type";\r
+ private static final String FIELD_ISSUE = "issue";\r
+ private static final String FIELD_PATH = "path";\r
+ private static final String FIELD_COMMIT = "commit";\r
+ private static final String FIELD_BRANCH = "branch";\r
+ private static final String FIELD_REPOSITORY = "repository";\r
+ private static final String FIELD_SUMMARY = "summary";\r
+ private static final String FIELD_CONTENT = "content";\r
+ private static final String FIELD_AUTHOR = "author";\r
+ private static final String FIELD_COMMITTER = "committer";\r
+ private static final String FIELD_DATE = "date";\r
+ private static final String FIELD_TAG = "tag";\r
+ private static final String FIELD_LABEL = "label";\r
+ private static final String FIELD_ATTACHMENT = "attachment";\r
\r
- private final boolean isPollingMode;\r
-\r
- private final AtomicBoolean firstRun = new AtomicBoolean(true);\r
+ private static final String CONF_FILE = "lucene.conf";\r
+ private static final String LUCENE_DIR = "lucene";\r
+ private static final String CONF_INDEX = "index";\r
+ private static final String CONF_VERSION = "version";\r
+ private static final String CONF_ALIAS = "aliases";\r
+ private static final String CONF_BRANCH = "branches";\r
+ \r
+ private static final Version LUCENE_VERSION = Version.LUCENE_35;\r
+ \r
+ private final Logger logger = LoggerFactory.getLogger(LuceneExecutor.class);\r
+ \r
+ private final IStoredSettings storedSettings;\r
+ private final File repositoriesFolder;\r
+ \r
+ private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();\r
+ private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();\r
+ \r
+ private final Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",\r
+ "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",\r
+ "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));\r
\r
- public LuceneExecutor(IStoredSettings settings) {\r
- this.settings = settings;\r
- this.isLuceneEnabled = settings.getBoolean(Keys.lucene.enable, false);\r
- this.isPollingMode = settings.getBoolean(Keys.lucene.pollingMode, false);\r
+ private final Set<String> excludedBranches = new TreeSet<String>(\r
+ Arrays.asList("/refs/heads/gb-issues"));\r
+ \r
+ public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {\r
+ this.storedSettings = settings;\r
+ this.repositoriesFolder = repositoriesFolder;\r
}\r
\r
/**\r
* @return true if the Lucene executor is ready to index repositories\r
*/\r
public boolean isReady() {\r
- return isLuceneEnabled;\r
- }\r
-\r
- /**\r
- * Returns the status of the Lucene queue.\r
- * \r
- * @return true, if the queue is empty\r
- */\r
- public boolean hasEmptyQueue() {\r
- return queue.isEmpty();\r
+ return storedSettings.getBoolean(Keys.lucene.enable, false);\r
}\r
\r
/**\r
- * Queues a repository to be asynchronously indexed.\r
- * \r
- * @param repository\r
- * @return true if the repository was queued\r
+ * Run is executed by the gitblit executor service at whatever frequency\r
+ * is specified in the settings. Because this is called by an executor\r
+ * service, calls will queue - i.e. there can never be concurrent execution\r
+ * of repository index updates.\r
*/\r
- public boolean queue(RepositoryModel repository) {\r
- if (!isReady()) {\r
- return false;\r
- }\r
- queue.add(repository.name);\r
- return true;\r
- }\r
-\r
@Override\r
public void run() {\r
- if (!isLuceneEnabled) {\r
+ if (!isReady()) {\r
return;\r
}\r
\r
- if (firstRun.get() || isPollingMode) {\r
- // update all indexes on first run or if polling mode\r
- firstRun.set(false);\r
- queue.addAll(GitBlit.self().getRepositoryList());\r
- }\r
-\r
- Set<String> processed = new HashSet<String>();\r
- if (!queue.isEmpty()) {\r
- // update the repository Lucene index\r
- String name = null;\r
- while ((name = queue.poll()) != null) {\r
- if (processed.contains(name)) {\r
- // skipping multi-queued repository\r
- continue;\r
- }\r
- try {\r
- Repository repository = GitBlit.self().getRepository(name);\r
- if (repository == null) {\r
- logger.warn(MessageFormat.format(\r
- "Lucene executor could not find repository {0}. Skipping.",\r
- name));\r
- continue;\r
- }\r
- index(name, repository);\r
- repository.close();\r
- System.gc();\r
- processed.add(name);\r
- } catch (Throwable e) {\r
- logger.error(MessageFormat.format("Failed to update {0} Lucene index",\r
- name), e);\r
- }\r
+ for (String repositoryName : GitBlit.self().getRepositoryList()) {\r
+ Repository repository = GitBlit.self().getRepository(repositoryName);\r
+ if (repository == null) {\r
+ logger.warn(MessageFormat.format(\r
+ "Lucene executor could not find repository {0}. Skipping.",\r
+ repositoryName));\r
+ continue;\r
}\r
+ // TODO allow repository to bypass Lucene indexing \r
+ index(repositoryName, repository);\r
+ repository.close();\r
+ System.gc();\r
}\r
}\r
\r
* @param repository\r
* the repository object\r
*/\r
- public void index(String name, Repository repository) {\r
+ protected void index(String name, Repository repository) {\r
try {\r
if (JGitUtils.hasCommits(repository)) {\r
- if (LuceneUtils.shouldReindex(repository)) {\r
- // (re)build the entire index\r
- long start = System.currentTimeMillis();\r
- IndexResult result = LuceneUtils.reindex(name, repository);\r
- float duration = (System.currentTimeMillis() - start)/1000f;\r
+ if (shouldReindex(repository)) {\r
+ // (re)build the entire index \r
+ IndexResult result = reindex(name, repository);\r
+ \r
if (result.success) {\r
if (result.commitCount > 0) {\r
String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";\r
logger.info(MessageFormat.format(msg, name,\r
- result.commitCount, result.blobCount, result.branchCount, duration));\r
+ result.commitCount, result.blobCount, result.branchCount, result.duration()));\r
}\r
} else {\r
String msg = "Could not build {0} Lucene index!";\r
logger.error(MessageFormat.format(msg, name));\r
}\r
} else {\r
- // update the index with latest commits\r
- long start = System.currentTimeMillis();\r
- IndexResult result = LuceneUtils.updateIndex(name, repository);\r
- float duration = (System.currentTimeMillis() - start)/1000f;\r
+ // update the index with latest commits \r
+ IndexResult result = updateIndex(name, repository);\r
if (result.success) {\r
if (result.commitCount > 0) {\r
String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";\r
logger.info(MessageFormat.format(msg, name,\r
- result.commitCount, result.blobCount, result.branchCount, duration));\r
+ result.commitCount, result.blobCount, result.branchCount, result.duration()));\r
}\r
} else {\r
String msg = "Could not update {0} Lucene index!";\r
* \r
*/\r
public void close() {\r
- LuceneUtils.close();\r
+ // close all writers\r
+ for (String writer : writers.keySet()) {\r
+ try {\r
+ writers.get(writer).close(true);\r
+ } catch (Throwable t) {\r
+ logger.error("Failed to close Lucene writer for " + writer, t);\r
+ }\r
+ }\r
+ writers.clear();\r
+\r
+ // close all searchers\r
+ for (String searcher : searchers.keySet()) {\r
+ try {\r
+ searchers.get(searcher).close();\r
+ } catch (Throwable t) {\r
+ logger.error("Failed to close Lucene searcher for " + searcher, t);\r
+ }\r
+ }\r
+ searchers.clear();\r
+ }\r
+\r
+ \r
+ /**\r
+ * Deletes the Lucene index for the specified repository.\r
+ * \r
+ * @param repositoryName\r
+ * @return true, if successful\r
+ */\r
+ public boolean deleteIndex(String repositoryName) {\r
+ try {\r
+ // remove the repository index writer from the cache and close it\r
+ IndexWriter writer = writers.remove(repositoryName);\r
+ if (writer != null) {\r
+ writer.close();\r
+ writer = null;\r
+ }\r
+ // remove the repository index searcher from the cache and close it\r
+ IndexSearcher searcher = searchers.remove(repositoryName);\r
+ if (searcher != null) {\r
+ searcher.close();\r
+ searcher = null;\r
+ }\r
+ // delete the index folder\r
+ File repositoryFolder = new File(repositoriesFolder, repositoryName);\r
+ File luceneIndex = new File(repositoryFolder, LUCENE_DIR);\r
+ if (luceneIndex.exists()) {\r
+ org.eclipse.jgit.util.FileUtils.delete(luceneIndex,\r
+ org.eclipse.jgit.util.FileUtils.RECURSIVE);\r
+ }\r
+ // delete the config file\r
+ File luceneConfig = new File(repositoryFolder, CONF_FILE);\r
+ if (luceneConfig.exists()) {\r
+ luceneConfig.delete();\r
+ }\r
+ return true;\r
+ } catch (IOException e) {\r
+ throw new RuntimeException(e);\r
+ }\r
+ }\r
+\r
+ \r
+ /**\r
+ * Returns the author for the commit, if this information is available.\r
+ * \r
+ * @param commit\r
+ * @return an author or unknown\r
+ */\r
+ private String getAuthor(RevCommit commit) {\r
+ String name = "unknown";\r
+ try {\r
+ name = commit.getAuthorIdent().getName();\r
+ if (StringUtils.isEmpty(name)) {\r
+ name = commit.getAuthorIdent().getEmailAddress();\r
+ }\r
+ } catch (NullPointerException n) { \r
+ }\r
+ return name;\r
+ }\r
+ \r
+ /**\r
+ * Returns the committer for the commit, if this information is available.\r
+ * \r
+ * @param commit\r
+ * @return an committer or unknown\r
+ */\r
+ private String getCommitter(RevCommit commit) {\r
+ String name = "unknown";\r
+ try {\r
+ name = commit.getCommitterIdent().getName();\r
+ if (StringUtils.isEmpty(name)) {\r
+ name = commit.getCommitterIdent().getEmailAddress();\r
+ }\r
+ } catch (NullPointerException n) { \r
+ }\r
+ return name;\r
+ }\r
+\r
+ /**\r
+ * Construct a keyname from the branch.\r
+ * \r
+ * @param branchName\r
+ * @return a keyname appropriate for the Git config file format\r
+ */\r
+ private String getBranchKey(String branchName) {\r
+ return StringUtils.getSHA1(branchName);\r
+ }\r
+\r
+ /**\r
+ * Returns the Lucene configuration for the specified repository.\r
+ * \r
+ * @param repository\r
+ * @return a config object\r
+ */\r
+ private FileBasedConfig getConfig(Repository repository) {\r
+ File file = new File(repository.getDirectory(), CONF_FILE);\r
+ FileBasedConfig config = new FileBasedConfig(file, FS.detect());\r
+ return config;\r
+ }\r
+\r
+ /**\r
+ * Reads the Lucene config file for the repository to check the index\r
+ * version. If the index version is different, then rebuild the repository\r
+ * index.\r
+ * \r
+ * @param repository\r
+ * @return true of the on-disk index format is different than INDEX_VERSION\r
+ */\r
+ protected boolean shouldReindex(Repository repository) {\r
+ try {\r
+ FileBasedConfig config = getConfig(repository);\r
+ config.load();\r
+ int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);\r
+ // reindex if versions do not match\r
+ return indexVersion != INDEX_VERSION;\r
+ } catch (Throwable t) {\r
+ }\r
+ return true;\r
+ }\r
+\r
+\r
+ /**\r
+ * This completely indexes the repository and will destroy any existing\r
+ * index.\r
+ * \r
+ * @param repositoryName\r
+ * @param repository\r
+ * @return IndexResult\r
+ */\r
+ public IndexResult reindex(String repositoryName, Repository repository) {\r
+ IndexResult result = new IndexResult();\r
+ if (!deleteIndex(repositoryName)) {\r
+ return result;\r
+ }\r
+ try { \r
+ FileBasedConfig config = getConfig(repository);\r
+ Set<String> indexedCommits = new TreeSet<String>();\r
+ IndexWriter writer = getIndexWriter(repositoryName);\r
+ // build a quick lookup of tags\r
+ Map<String, List<String>> tags = new HashMap<String, List<String>>();\r
+ for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {\r
+ if (!tag.isAnnotatedTag()) {\r
+ // skip non-annotated tags\r
+ continue;\r
+ }\r
+ if (!tags.containsKey(tag.getObjectId())) {\r
+ tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());\r
+ }\r
+ tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);\r
+ }\r
+ \r
+ ObjectReader reader = repository.newObjectReader();\r
+\r
+ // get the local branches\r
+ List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);\r
+ \r
+ // sort them by most recently updated\r
+ Collections.sort(branches, new Comparator<RefModel>() {\r
+ @Override\r
+ public int compare(RefModel ref1, RefModel ref2) {\r
+ return ref2.getDate().compareTo(ref1.getDate());\r
+ }\r
+ });\r
+ \r
+ // reorder default branch to first position\r
+ RefModel defaultBranch = null;\r
+ ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);\r
+ for (RefModel branch : branches) {\r
+ if (branch.getObjectId().equals(defaultBranchId)) {\r
+ defaultBranch = branch; \r
+ break;\r
+ }\r
+ }\r
+ branches.remove(defaultBranch);\r
+ branches.add(0, defaultBranch);\r
+ \r
+ // walk through each branch\r
+ for (RefModel branch : branches) {\r
+ if (excludedBranches.contains(branch.getName())) {\r
+ continue;\r
+ }\r
+\r
+ String branchName = branch.getName();\r
+ RevWalk revWalk = new RevWalk(reader);\r
+ RevCommit tip = revWalk.parseCommit(branch.getObjectId());\r
+ String tipId = tip.getId().getName();\r
+\r
+ String keyName = getBranchKey(branchName);\r
+ config.setString(CONF_ALIAS, null, keyName, branchName);\r
+ config.setString(CONF_BRANCH, null, keyName, tipId);\r
+\r
+ // index the blob contents of the tree\r
+ TreeWalk treeWalk = new TreeWalk(repository);\r
+ treeWalk.addTree(tip.getTree());\r
+ treeWalk.setRecursive(true); \r
+ \r
+ Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();\r
+ while (treeWalk.next()) {\r
+ paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));\r
+ } \r
+\r
+ ByteArrayOutputStream os = new ByteArrayOutputStream();\r
+ byte[] tmp = new byte[32767];\r
+\r
+ RevWalk commitWalk = new RevWalk(reader);\r
+ commitWalk.markStart(tip);\r
+ \r
+ RevCommit commit;\r
+ while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {\r
+ TreeWalk diffWalk = new TreeWalk(reader);\r
+ int parentCount = commit.getParentCount();\r
+ switch (parentCount) {\r
+ case 0:\r
+ diffWalk.addTree(new EmptyTreeIterator());\r
+ break;\r
+ case 1:\r
+ diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));\r
+ break;\r
+ default:\r
+ // skip merge commits\r
+ continue;\r
+ }\r
+ diffWalk.addTree(getTree(commitWalk, commit));\r
+ diffWalk.setFilter(ANY_DIFF);\r
+ diffWalk.setRecursive(true);\r
+ while ((paths.size() > 0) && diffWalk.next()) {\r
+ String path = diffWalk.getPathString();\r
+ if (!paths.containsKey(path)) {\r
+ continue;\r
+ }\r
+ \r
+ // remove path from set\r
+ ObjectId blobId = paths.remove(path);\r
+ result.blobCount++;\r
+ \r
+ // index the blob metadata\r
+ String blobAuthor = getAuthor(commit);\r
+ String blobCommitter = getCommitter(commit);\r
+ String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,\r
+ Resolution.MINUTE);\r
+ \r
+ Document doc = new Document();\r
+ doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));\r
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));\r
+ doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED)); \r
+\r
+ // determine extension to compare to the extension\r
+ // blacklist\r
+ String ext = null;\r
+ String name = path.toLowerCase();\r
+ if (name.indexOf('.') > -1) {\r
+ ext = name.substring(name.lastIndexOf('.') + 1);\r
+ }\r
+\r
+ // index the blob content\r
+ if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { \r
+ ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);\r
+ InputStream in = ldr.openStream(); \r
+ int n;\r
+ while ((n = in.read(tmp)) > 0) {\r
+ os.write(tmp, 0, n);\r
+ }\r
+ in.close();\r
+ byte[] content = os.toByteArray();\r
+ String str = new String(content, Constants.CHARACTER_ENCODING);\r
+ doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));\r
+ os.reset();\r
+ } \r
+ \r
+ // add the blob to the index\r
+ writer.addDocument(doc);\r
+ }\r
+ }\r
+\r
+ os.close();\r
+\r
+ // index the tip commit object\r
+ if (indexedCommits.add(tipId)) {\r
+ Document doc = createDocument(tip, tags.get(tipId));\r
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));\r
+ writer.addDocument(doc);\r
+ result.commitCount += 1;\r
+ result.branchCount += 1;\r
+ }\r
+\r
+ // traverse the log and index the previous commit objects\r
+ RevWalk historyWalk = new RevWalk(reader);\r
+ historyWalk.markStart(historyWalk.parseCommit(tip.getId()));\r
+ RevCommit rev;\r
+ while ((rev = historyWalk.next()) != null) {\r
+ String hash = rev.getId().getName();\r
+ if (indexedCommits.add(hash)) {\r
+ Document doc = createDocument(rev, tags.get(hash));\r
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));\r
+ writer.addDocument(doc);\r
+ result.commitCount += 1;\r
+ }\r
+ }\r
+ }\r
+\r
+ // finished\r
+ reader.release();\r
+ \r
+ // this repository has a gb-issues branch, index all issues\r
+ if (IssueUtils.getIssuesBranch(repository) != null) {\r
+ List<IssueModel> issues = IssueUtils.getIssues(repository, null);\r
+ if (issues.size() > 0) {\r
+ result.branchCount += 1;\r
+ }\r
+ for (IssueModel issue : issues) {\r
+ result.issueCount++;\r
+ Document doc = createDocument(issue);\r
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));\r
+ writer.addDocument(doc);\r
+ }\r
+ }\r
+\r
+ // commit all changes and reset the searcher\r
+ config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);\r
+ config.save();\r
+ resetIndexSearcher(repositoryName);\r
+ writer.commit();\r
+ result.success();\r
+ } catch (Exception e) {\r
+ logger.error("Exception while reindexing " + repositoryName, e);\r
+ }\r
+ return result;\r
+ }\r
+ \r
+ /**\r
+ * Get the tree associated with the given commit.\r
+ *\r
+ * @param walk\r
+ * @param commit\r
+ * @return tree\r
+ * @throws IOException\r
+ */\r
+ protected RevTree getTree(final RevWalk walk, final RevCommit commit)\r
+ throws IOException {\r
+ final RevTree tree = commit.getTree();\r
+ if (tree != null) {\r
+ return tree;\r
+ }\r
+ walk.parseHeaders(commit);\r
+ return commit.getTree();\r
+ }\r
+\r
+ /**\r
+ * Incrementally update the index with the specified commit for the\r
+ * repository.\r
+ * \r
+ * @param repositoryName\r
+ * @param repository\r
+ * @param branch\r
+ * the fully qualified branch name (e.g. refs/heads/master)\r
+ * @param commit\r
+ * @return true, if successful\r
+ */\r
+ private IndexResult index(String repositoryName, Repository repository, \r
+ String branch, RevCommit commit) {\r
+ IndexResult result = new IndexResult();\r
+ try {\r
+ if (excludedBranches.contains(branch)) {\r
+ if (IssueUtils.GB_ISSUES.equals(branch)) {\r
+ // index an issue\r
+ String issueId = commit.getShortMessage().substring(2).trim();\r
+ IssueModel issue = IssueUtils.getIssue(repository, issueId);\r
+ if (issue == null) {\r
+ // issue was deleted, remove from index\r
+ deleteIssue(repositoryName, issueId);\r
+ result.success = true;\r
+ return result;\r
+ }\r
+ result.success = index(repositoryName, issue);\r
+ result.issueCount++;\r
+ return result;\r
+ \r
+ }\r
+ return result;\r
+ }\r
+ List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);\r
+ String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,\r
+ Resolution.MINUTE);\r
+ IndexWriter writer = getIndexWriter(repositoryName);\r
+ for (PathChangeModel path : changedPaths) {\r
+ // delete the indexed blob\r
+ deleteBlob(repositoryName, branch, path.path);\r
+\r
+ // re-index the blob\r
+ if (!ChangeType.DELETE.equals(path.changeType)) {\r
+ result.blobCount++;\r
+ Document doc = new Document();\r
+ doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,\r
+ Index.NOT_ANALYZED));\r
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));\r
+ doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));\r
+\r
+ // determine extension to compare to the extension\r
+ // blacklist\r
+ String ext = null;\r
+ String name = path.name.toLowerCase();\r
+ if (name.indexOf('.') > -1) {\r
+ ext = name.substring(name.lastIndexOf('.') + 1);\r
+ }\r
+\r
+ if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {\r
+ // read the blob content\r
+ String str = JGitUtils.getStringContent(repository, commit.getTree(),\r
+ path.path);\r
+ doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));\r
+ writer.addDocument(doc);\r
+ }\r
+ }\r
+ }\r
+ writer.commit();\r
+\r
+ Document doc = createDocument(commit, null);\r
+ result.commitCount++;\r
+ result.success = index(repositoryName, doc);\r
+ } catch (Exception e) {\r
+ logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);\r
+ }\r
+ return result;\r
+ }\r
+\r
+ /**\r
+ * Incrementally update the index with the specified issue for the\r
+ * repository.\r
+ * \r
+ * @param repositoryName\r
+ * @param issue\r
+ * @return true, if successful\r
+ */\r
+ public boolean index(String repositoryName, IssueModel issue) {\r
+ try {\r
+ // delete the old issue from the index, if exists\r
+ deleteIssue(repositoryName, issue.id);\r
+ Document doc = createDocument(issue);\r
+ return index(repositoryName, doc);\r
+ } catch (Exception e) {\r
+ logger.error(MessageFormat.format("Error while indexing issue {0} in {1}", issue.id, repositoryName), e);\r
+ }\r
+ return false;\r
+ }\r
+ \r
+ /**\r
+ * Delete an issue from the repository index.\r
+ * \r
+ * @param repositoryName\r
+ * @param issueId\r
+ * @throws Exception\r
+ */\r
+ private void deleteIssue(String repositoryName, String issueId) throws Exception {\r
+ BooleanQuery query = new BooleanQuery();\r
+ Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.issue.name());\r
+ query.add(new TermQuery(objectTerm), Occur.MUST);\r
+ Term issueidTerm = new Term(FIELD_ISSUE, issueId);\r
+ query.add(new TermQuery(issueidTerm), Occur.MUST);\r
+ \r
+ IndexWriter writer = getIndexWriter(repositoryName);\r
+ writer.deleteDocuments(query);\r
+ writer.commit();\r
+ }\r
+ \r
+ /**\r
+ * Delete a blob from the specified branch of the repository index.\r
+ * \r
+ * @param repositoryName\r
+ * @param branch\r
+ * @param path\r
+ * @throws Exception\r
+ */\r
+ private void deleteBlob(String repositoryName, String branch, String path) throws Exception {\r
+ BooleanQuery query = new BooleanQuery();\r
+ Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.blob.name());\r
+ query.add(new TermQuery(objectTerm), Occur.MUST);\r
+ Term branchTerm = new Term(FIELD_BRANCH, branch);\r
+ query.add(new TermQuery(branchTerm), Occur.MUST);\r
+ Term pathTerm = new Term(FIELD_PATH, path);\r
+ query.add(new TermQuery(pathTerm), Occur.MUST);\r
+ \r
+ IndexWriter writer = getIndexWriter(repositoryName);\r
+ writer.deleteDocuments(query);\r
+ writer.commit();\r
+ }\r
+\r
+ /**\r
+ * Updates a repository index incrementally from the last indexed commits.\r
+ * \r
+ * @param repositoryName\r
+ * @param repository\r
+ * @return IndexResult\r
+ */\r
+ protected IndexResult updateIndex(String repositoryName, Repository repository) {\r
+ IndexResult result = new IndexResult();\r
+ try {\r
+ FileBasedConfig config = getConfig(repository);\r
+ config.load();\r
+\r
+ // build a quick lookup of annotated tags\r
+ Map<String, List<String>> tags = new HashMap<String, List<String>>();\r
+ for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {\r
+ if (!tag.isAnnotatedTag()) {\r
+ // skip non-annotated tags\r
+ continue;\r
+ }\r
+ if (!tags.containsKey(tag.getObjectId())) {\r
+ tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());\r
+ }\r
+ tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);\r
+ }\r
+\r
+ // detect branch deletion\r
+ // first assume all branches are deleted and then remove each\r
+ // existing branch from deletedBranches during indexing\r
+ Set<String> deletedBranches = new TreeSet<String>();\r
+ for (String alias : config.getNames(CONF_ALIAS)) {\r
+ String branch = config.getString(CONF_ALIAS, null, alias);\r
+ deletedBranches.add(branch);\r
+ }\r
+\r
+ // walk through each branches\r
+ List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);\r
+ for (RefModel branch : branches) {\r
+ String branchName = branch.getName();\r
+\r
+ // remove this branch from the deletedBranches set\r
+ deletedBranches.remove(branchName);\r
+\r
+ // determine last commit\r
+ String keyName = getBranchKey(branchName);\r
+ String lastCommit = config.getString(CONF_BRANCH, null, keyName);\r
+\r
+ List<RevCommit> revs;\r
+ if (StringUtils.isEmpty(lastCommit)) {\r
+ // new branch/unindexed branch, get all commits on branch\r
+ revs = JGitUtils.getRevLog(repository, branchName, 0, -1);\r
+ } else {\r
+ // pre-existing branch, get changes since last commit\r
+ revs = JGitUtils.getRevLog(repository, lastCommit, branchName);\r
+ }\r
+\r
+ if (revs.size() > 0) {\r
+ result.branchCount += 1;\r
+ }\r
+ \r
+ // reverse the list of commits so we start with the first commit \r
+ Collections.reverse(revs);\r
+ for (RevCommit commit : revs) {\r
+ result.add(index(repositoryName, repository, branchName, commit)); \r
+ }\r
+\r
+ // update the config\r
+ config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);\r
+ config.setString(CONF_ALIAS, null, keyName, branchName);\r
+ config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());\r
+ config.save();\r
+ }\r
+\r
+ // the deletedBranches set will normally be empty by this point\r
+ // unless a branch really was deleted and no longer exists\r
+ if (deletedBranches.size() > 0) {\r
+ for (String branch : deletedBranches) {\r
+ IndexWriter writer = getIndexWriter(repositoryName);\r
+ writer.deleteDocuments(new Term(FIELD_BRANCH, branch));\r
+ writer.commit();\r
+ }\r
+ }\r
+ result.success = true;\r
+ } catch (Throwable t) {\r
+ logger.error(MessageFormat.format("Exception while updating {0} Lucene index", repositoryName), t);\r
+ }\r
+ return result;\r
+ }\r
+\r
+ /**\r
+ * Creates a Lucene document from an issue.\r
+ * \r
+ * @param issue\r
+ * @return a Lucene document\r
+ */\r
+ private Document createDocument(IssueModel issue) {\r
+ Document doc = new Document();\r
+ doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.issue.name(), Store.YES,\r
+ Field.Index.NOT_ANALYZED));\r
+ doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),\r
+ Store.YES, Field.Index.NO));\r
+ doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.ANALYZED));\r
+ List<String> attachments = new ArrayList<String>();\r
+ for (Attachment attachment : issue.getAttachments()) {\r
+ attachments.add(attachment.name.toLowerCase());\r
+ }\r
+ doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES,\r
+ Index.ANALYZED));\r
+ doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES,\r
+ Index.ANALYZED));\r
+ return doc;\r
+ }\r
+\r
+ /**\r
+ * Creates a Lucene document for a commit\r
+ * \r
+ * @param commit\r
+ * @param tags\r
+ * @return a Lucene document\r
+ */\r
+ private Document createDocument(RevCommit commit, List<String> tags) {\r
+ Document doc = new Document();\r
+ doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), Store.YES,\r
+ Index.NOT_ANALYZED));\r
+ doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,\r
+ Resolution.MINUTE), Store.YES, Index.NO));\r
+ doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED));\r
+ doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.YES, Index.ANALYZED));\r
+ if (!ArrayUtils.isEmpty(tags)) {\r
+ doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), Store.YES, Index.ANALYZED));\r
+ }\r
+ return doc;\r
+ }\r
+\r
+ /**\r
+ * Incrementally index an object for the repository.\r
+ * \r
+ * @param repositoryName\r
+ * @param doc\r
+ * @return true, if successful\r
+ */\r
+ private boolean index(String repositoryName, Document doc) {\r
+ try { \r
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));\r
+ IndexWriter writer = getIndexWriter(repositoryName);\r
+ writer.addDocument(doc);\r
+ resetIndexSearcher(repositoryName);\r
+ writer.commit();\r
+ return true;\r
+ } catch (Exception e) {\r
+ logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);\r
+ }\r
+ return false;\r
+ }\r
+\r
+ private SearchResult createSearchResult(Document doc, float score) throws ParseException {\r
+ SearchResult result = new SearchResult();\r
+ result.score = score;\r
+ result.date = DateTools.stringToDate(doc.get(FIELD_DATE));\r
+ result.summary = doc.get(FIELD_SUMMARY); \r
+ result.author = doc.get(FIELD_AUTHOR);\r
+ result.committer = doc.get(FIELD_COMMITTER);\r
+ result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));\r
+ result.repository = doc.get(FIELD_REPOSITORY);\r
+ result.branch = doc.get(FIELD_BRANCH);\r
+ result.commitId = doc.get(FIELD_COMMIT);\r
+ result.issueId = doc.get(FIELD_ISSUE);\r
+ result.path = doc.get(FIELD_PATH);\r
+ if (doc.get(FIELD_TAG) != null) {\r
+ result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));\r
+ }\r
+ if (doc.get(FIELD_LABEL) != null) {\r
+ result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));\r
+ }\r
+ return result;\r
+ }\r
+\r
+ private synchronized void resetIndexSearcher(String repository) throws IOException {\r
+ IndexSearcher searcher = searchers.remove(repository);\r
+ if (searcher != null) {\r
+ searcher.close();\r
+ }\r
+ }\r
+\r
+ /**\r
+ * Gets an index searcher for the repository.\r
+ * \r
+ * @param repository\r
+ * @return\r
+ * @throws IOException\r
+ */\r
+ private IndexSearcher getIndexSearcher(String repository) throws IOException {\r
+ IndexSearcher searcher = searchers.get(repository);\r
+ if (searcher == null) {\r
+ IndexWriter writer = getIndexWriter(repository);\r
+ searcher = new IndexSearcher(IndexReader.open(writer, true));\r
+ searchers.put(repository, searcher);\r
+ }\r
+ return searcher;\r
+ }\r
+\r
+ /**\r
+ * Gets an index writer for the repository. The index will be created if it\r
+ * does not already exist or if forceCreate is specified.\r
+ * \r
+ * @param repository\r
+ * @return an IndexWriter\r
+ * @throws IOException\r
+ */\r
+ private IndexWriter getIndexWriter(String repository) throws IOException {\r
+ IndexWriter indexWriter = writers.get(repository); \r
+ File repositoryFolder = new File(repositoriesFolder, repository);\r
+ File indexFolder = new File(repositoryFolder, LUCENE_DIR);\r
+ Directory directory = FSDirectory.open(indexFolder); \r
+\r
+ if (indexWriter == null) {\r
+ if (!indexFolder.exists()) {\r
+ indexFolder.mkdirs();\r
+ }\r
+ StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);\r
+ IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);\r
+ config.setOpenMode(OpenMode.CREATE_OR_APPEND);\r
+ indexWriter = new IndexWriter(directory, config);\r
+ writers.put(repository, indexWriter);\r
+ }\r
+ return indexWriter;\r
+ }\r
+\r
+ /**\r
+ * Searches the specified repositories for the given text or query\r
+ * \r
+ * @param text\r
+ * if the text is null or empty, null is returned\r
+ * @param maximumHits\r
+ * the maximum number of hits to collect\r
+ * @param repositories\r
+ * a list of repositories to search. if no repositories are\r
+ * specified null is returned.\r
+ * @return a list of SearchResults in order from highest to the lowest score\r
+ * \r
+ */\r
+ public List<SearchResult> search(String text, int maximumHits, List<String> repositories) {\r
+ if (ArrayUtils.isEmpty(repositories)) {\r
+ return null;\r
+ }\r
+ return search(text, maximumHits, repositories.toArray(new String[0]));\r
+ }\r
+ \r
+ /**\r
+ * Searches the specified repositories for the given text or query\r
+ * \r
+ * @param text\r
+ * if the text is null or empty, null is returned\r
+ * @param maximumHits\r
+ * the maximum number of hits to collect\r
+ * @param repositories\r
+ * a list of repositories to search. if no repositories are\r
+ * specified null is returned.\r
+ * @return a list of SearchResults in order from highest to the lowest score\r
+ * \r
+ */ \r
+ public List<SearchResult> search(String text, int maximumHits, String... repositories) {\r
+ if (StringUtils.isEmpty(text)) {\r
+ return null;\r
+ }\r
+ if (ArrayUtils.isEmpty(repositories)) {\r
+ return null;\r
+ }\r
+ Set<SearchResult> results = new LinkedHashSet<SearchResult>();\r
+ StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);\r
+ try {\r
+ // default search checks summary and content\r
+ BooleanQuery query = new BooleanQuery();\r
+ QueryParser qp;\r
+ qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);\r
+ qp.setAllowLeadingWildcard(true);\r
+ query.add(qp.parse(text), Occur.SHOULD);\r
+\r
+ qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);\r
+ qp.setAllowLeadingWildcard(true);\r
+ query.add(qp.parse(text), Occur.SHOULD);\r
+\r
+ IndexSearcher searcher;\r
+ if (repositories.length == 1) {\r
+ // single repository search\r
+ searcher = getIndexSearcher(repositories[0]);\r
+ } else {\r
+ // multiple repository search\r
+ List<IndexReader> readers = new ArrayList<IndexReader>();\r
+ for (String repository : repositories) {\r
+ IndexSearcher repositoryIndex = getIndexSearcher(repository);\r
+ readers.add(repositoryIndex.getIndexReader());\r
+ }\r
+ IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);\r
+ MultiReader reader = new MultiReader(rdrs);\r
+ searcher = new IndexSearcher(reader);\r
+ }\r
+ Query rewrittenQuery = searcher.rewrite(query);\r
+ TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true);\r
+ searcher.search(rewrittenQuery, collector);\r
+ ScoreDoc[] hits = collector.topDocs().scoreDocs;\r
+ for (int i = 0; i < hits.length; i++) {\r
+ int docId = hits[i].doc;\r
+ Document doc = searcher.doc(docId);\r
+ // TODO identify the source index for the doc, then eliminate FIELD_REPOSITORY\r
+ SearchResult result = createSearchResult(doc, hits[i].score);\r
+ String content = doc.get(FIELD_CONTENT); \r
+ result.fragment = getHighlightedFragment(analyzer, query, content, result);\r
+ results.add(result);\r
+ }\r
+ } catch (Exception e) {\r
+ logger.error(MessageFormat.format("Exception while searching for {0}", text), e);\r
+ }\r
+ return new ArrayList<SearchResult>(results);\r
+ }\r
+ \r
+ /**\r
+ * \r
+ * @param analyzer\r
+ * @param query\r
+ * @param content\r
+ * @param result\r
+ * @return\r
+ * @throws IOException\r
+ * @throws InvalidTokenOffsetsException\r
+ */\r
+ private String getHighlightedFragment(Analyzer analyzer, Query query,\r
+ String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {\r
+ content = content == null ? "":StringUtils.escapeForHtml(content, false);\r
+ \r
+ QueryScorer scorer = new QueryScorer(query, "content");\r
+ Fragmenter fragmenter;\r
+ \r
+ // TODO improve the fragmenter - hopefully on line breaks\r
+ if (SearchObjectType.commit == result.type) {\r
+ fragmenter = new SimpleSpanFragmenter(scorer, 1024); \r
+ } else {\r
+ fragmenter = new SimpleSpanFragmenter(scorer, 150);\r
+ }\r
+\r
+ // use an artificial delimiter for the token\r
+ String termTag = "<!--[";\r
+ String termTagEnd = "]-->";\r
+ SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);\r
+ Highlighter highlighter = new Highlighter(formatter, scorer); \r
+ highlighter.setTextFragmenter(fragmenter);\r
+ \r
+ String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 5);\r
+ if (ArrayUtils.isEmpty(fragments)) {\r
+ if (SearchObjectType.blob == result.type) {\r
+ return "";\r
+ }\r
+ return "<pre class=\"text\">" + content + "</pre>";\r
+ }\r
+ StringBuilder sb = new StringBuilder();\r
+ for (int i = 0, len = fragments.length; i < len; i++) {\r
+ String fragment = fragments[i];\r
+ \r
+ // resurrect the raw fragment from removing the artificial delimiters\r
+ String raw = fragment.replace(termTag, "").replace(termTagEnd, ""); \r
+ sb.append(getPreTag(result, raw, content));\r
+ \r
+ // replace the artificial delimiter with html tags\r
+ String html = fragment.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");\r
+ sb.append(html);\r
+ sb.append("</pre>");\r
+ if (i < len - 1) {\r
+ sb.append("<span class=\"ellipses\">...</span><br/>");\r
+ }\r
+ }\r
+ return sb.toString();\r
+ }\r
+ \r
+ /**\r
+ * Returns the appropriate tag for a fragment. Commit messages are visually\r
+ * differentiated from blob fragments.\r
+ * \r
+ * @param result\r
+ * @param fragment\r
+ * @param content\r
+ * @return an html tag appropriate for the fragment\r
+ */\r
+ private String getPreTag(SearchResult result, String fragment, String content) {\r
+ String pre = "<pre class=\"text\">";\r
+ if (SearchObjectType.blob == result.type) {\r
+ int line = StringUtils.countLines(content.substring(0, content.indexOf(fragment))); \r
+ int lastDot = result.path.lastIndexOf('.');\r
+ if (lastDot > -1) {\r
+ String ext = result.path.substring(lastDot + 1).toLowerCase();\r
+ pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0} lang-{1}\">", line, ext); \r
+ } else {\r
+ pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}\">", line);\r
+ }\r
+ }\r
+ return pre;\r
+ }\r
+ \r
+ /**\r
+ * Simple class to track the results of an index update. \r
+ */\r
+ private class IndexResult {\r
+ long startTime = System.currentTimeMillis();\r
+ long endTime = startTime;\r
+ boolean success;\r
+ int branchCount;\r
+ int commitCount;\r
+ int blobCount;\r
+ int issueCount;\r
+ \r
+ void add(IndexResult result) {\r
+ this.branchCount += result.branchCount;\r
+ this.commitCount += result.commitCount;\r
+ this.blobCount += result.blobCount;\r
+ this.issueCount += result.issueCount; \r
+ }\r
+ \r
+ void success() {\r
+ success = true;\r
+ endTime = System.currentTimeMillis();\r
+ }\r
+ \r
+ float duration() {\r
+ return (endTime - startTime)/1000f;\r
+ }\r
}\r
}\r
import java.util.Date;\r
import java.util.List;\r
\r
-import com.gitblit.utils.LuceneUtils.ObjectType;\r
+import com.gitblit.Constants.SearchObjectType;\r
\r
/**\r
* Model class that represents a search result.\r
\r
public List<String> labels;\r
\r
- public ObjectType type;\r
+ public SearchObjectType type;\r
\r
public SearchResult() {\r
}\r
+++ /dev/null
-/*\r
- * Copyright 2012 gitblit.com.\r
- *\r
- * Licensed under the Apache License, Version 2.0 (the "License");\r
- * you may not use this file except in compliance with the License.\r
- * You may obtain a copy of the License at\r
- *\r
- * http://www.apache.org/licenses/LICENSE-2.0\r
- *\r
- * Unless required by applicable law or agreed to in writing, software\r
- * distributed under the License is distributed on an "AS IS" BASIS,\r
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
- * See the License for the specific language governing permissions and\r
- * limitations under the License.\r
- */\r
-package com.gitblit.utils;\r
-\r
-import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;\r
-\r
-import java.io.ByteArrayOutputStream;\r
-import java.io.File;\r
-import java.io.IOException;\r
-import java.io.InputStream;\r
-import java.text.MessageFormat;\r
-import java.text.ParseException;\r
-import java.util.ArrayList;\r
-import java.util.Arrays;\r
-import java.util.Collections;\r
-import java.util.Comparator;\r
-import java.util.HashMap;\r
-import java.util.LinkedHashSet;\r
-import java.util.List;\r
-import java.util.Map;\r
-import java.util.Set;\r
-import java.util.TreeMap;\r
-import java.util.TreeSet;\r
-import java.util.concurrent.ConcurrentHashMap;\r
-\r
-import org.apache.lucene.analysis.Analyzer;\r
-import org.apache.lucene.analysis.TokenStream;\r
-import org.apache.lucene.analysis.standard.StandardAnalyzer;\r
-import org.apache.lucene.document.DateTools;\r
-import org.apache.lucene.document.DateTools.Resolution;\r
-import org.apache.lucene.document.Document;\r
-import org.apache.lucene.document.Field;\r
-import org.apache.lucene.document.Field.Index;\r
-import org.apache.lucene.document.Field.Store;\r
-import org.apache.lucene.index.IndexReader;\r
-import org.apache.lucene.index.IndexWriter;\r
-import org.apache.lucene.index.IndexWriterConfig;\r
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;\r
-import org.apache.lucene.index.MultiReader;\r
-import org.apache.lucene.index.Term;\r
-import org.apache.lucene.queryParser.QueryParser;\r
-import org.apache.lucene.search.BooleanClause.Occur;\r
-import org.apache.lucene.search.BooleanQuery;\r
-import org.apache.lucene.search.IndexSearcher;\r
-import org.apache.lucene.search.Query;\r
-import org.apache.lucene.search.ScoreDoc;\r
-import org.apache.lucene.search.TopScoreDocCollector;\r
-import org.apache.lucene.search.highlight.Fragmenter;\r
-import org.apache.lucene.search.highlight.Highlighter;\r
-import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;\r
-import org.apache.lucene.search.highlight.QueryScorer;\r
-import org.apache.lucene.search.highlight.SimpleHTMLFormatter;\r
-import org.apache.lucene.search.highlight.SimpleSpanFragmenter;\r
-import org.apache.lucene.search.highlight.TokenSources;\r
-import org.apache.lucene.store.Directory;\r
-import org.apache.lucene.store.FSDirectory;\r
-import org.apache.lucene.util.Version;\r
-import org.eclipse.jgit.diff.DiffEntry.ChangeType;\r
-import org.eclipse.jgit.lib.Constants;\r
-import org.eclipse.jgit.lib.ObjectId;\r
-import org.eclipse.jgit.lib.ObjectLoader;\r
-import org.eclipse.jgit.lib.ObjectReader;\r
-import org.eclipse.jgit.lib.Repository;\r
-import org.eclipse.jgit.revwalk.RevCommit;\r
-import org.eclipse.jgit.revwalk.RevTree;\r
-import org.eclipse.jgit.revwalk.RevWalk;\r
-import org.eclipse.jgit.storage.file.FileBasedConfig;\r
-import org.eclipse.jgit.treewalk.EmptyTreeIterator;\r
-import org.eclipse.jgit.treewalk.TreeWalk;\r
-import org.eclipse.jgit.util.FS;\r
-\r
-import com.gitblit.models.IssueModel;\r
-import com.gitblit.models.IssueModel.Attachment;\r
-import com.gitblit.models.PathModel.PathChangeModel;\r
-import com.gitblit.models.RefModel;\r
-import com.gitblit.models.SearchResult;\r
-\r
-/**\r
- * A collection of utility methods for indexing and querying a Lucene repository\r
- * index.\r
- * \r
- * @author James Moger\r
- * \r
- */\r
-public class LuceneUtils {\r
-\r
- /**\r
- * The types of objects that can be indexed and queried.\r
- */\r
- public static enum ObjectType {\r
- commit, blob, issue;\r
-\r
- static ObjectType fromName(String name) {\r
- for (ObjectType value : values()) {\r
- if (value.name().equals(name)) {\r
- return value;\r
- }\r
- }\r
- return null;\r
- }\r
- }\r
-\r
- private static final Version LUCENE_VERSION = Version.LUCENE_35;\r
- private static final int INDEX_VERSION = 1;\r
-\r
- private static final String FIELD_OBJECT_TYPE = "type";\r
- private static final String FIELD_ISSUE = "issue";\r
- private static final String FIELD_PATH = "path";\r
- private static final String FIELD_COMMIT = "commit";\r
- private static final String FIELD_BRANCH = "branch";\r
- private static final String FIELD_REPOSITORY = "repository";\r
- private static final String FIELD_SUMMARY = "summary";\r
- private static final String FIELD_CONTENT = "content";\r
- private static final String FIELD_AUTHOR = "author";\r
- private static final String FIELD_COMMITTER = "committer";\r
- private static final String FIELD_DATE = "date";\r
- private static final String FIELD_TAG = "tag";\r
- private static final String FIELD_LABEL = "label";\r
- private static final String FIELD_ATTACHMENT = "attachment";\r
-\r
- private static Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",\r
- "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",\r
- "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));\r
-\r
- private static Set<String> excludedBranches = new TreeSet<String>(\r
- Arrays.asList("/refs/heads/gb-issues"));\r
-\r
- private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();\r
- private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();\r
-\r
- private static final String LUCENE_DIR = "lucene";\r
- private static final String CONF_FILE = "lucene.conf";\r
- private static final String CONF_INDEX = "index";\r
- private static final String CONF_VERSION = "version";\r
- private static final String CONF_ALIAS = "aliases";\r
- private static final String CONF_BRANCH = "branches";\r
- \r
- /**\r
- * Returns the author for the commit, if this information is available.\r
- * \r
- * @param commit\r
- * @return an author or unknown\r
- */\r
- private static String getAuthor(RevCommit commit) {\r
- String name = "unknown";\r
- try {\r
- name = commit.getAuthorIdent().getName();\r
- if (StringUtils.isEmpty(name)) {\r
- name = commit.getAuthorIdent().getEmailAddress();\r
- }\r
- } catch (NullPointerException n) { \r
- }\r
- return name;\r
- }\r
- \r
- /**\r
- * Returns the committer for the commit, if this information is available.\r
- * \r
- * @param commit\r
- * @return an committer or unknown\r
- */\r
- private static String getCommitter(RevCommit commit) {\r
- String name = "unknown";\r
- try {\r
- name = commit.getCommitterIdent().getName();\r
- if (StringUtils.isEmpty(name)) {\r
- name = commit.getCommitterIdent().getEmailAddress();\r
- }\r
- } catch (NullPointerException n) { \r
- }\r
- return name;\r
- }\r
-\r
- /**\r
- * Construct a keyname from the branch.\r
- * \r
- * @param branchName\r
- * @return a keyname appropriate for the Git config file format\r
- */\r
- private static String getBranchKey(String branchName) {\r
- return StringUtils.getSHA1(branchName);\r
- }\r
-\r
- /**\r
- * Returns the Lucene configuration for the specified repository.\r
- * \r
- * @param repository\r
- * @return a config object\r
- */\r
- private static FileBasedConfig getConfig(Repository repository) {\r
- File file = new File(repository.getDirectory(), CONF_FILE);\r
- FileBasedConfig config = new FileBasedConfig(file, FS.detect());\r
- return config;\r
- }\r
-\r
- /**\r
- * Reads the Lucene config file for the repository to check the index\r
- * version. If the index version is different, then rebuild the repository\r
- * index.\r
- * \r
- * @param repository\r
- * @return true of the on-disk index format is different than INDEX_VERSION\r
- */\r
- public static boolean shouldReindex(Repository repository) {\r
- try {\r
- FileBasedConfig config = getConfig(repository);\r
- config.load();\r
- int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);\r
- // reindex if versions do not match\r
- return indexVersion != INDEX_VERSION;\r
- } catch (Throwable t) {\r
- }\r
- return true;\r
- }\r
-\r
- /**\r
- * Deletes the Lucene index for the specified repository.\r
- * \r
- * @param repository\r
- * @return true, if successful\r
- */\r
- public static boolean deleteIndex(Repository repository) {\r
- try {\r
- File luceneIndex = new File(repository.getDirectory(), LUCENE_DIR);\r
- if (luceneIndex.exists()) {\r
- org.eclipse.jgit.util.FileUtils.delete(luceneIndex,\r
- org.eclipse.jgit.util.FileUtils.RECURSIVE);\r
- }\r
- File luceneConfig = new File(repository.getDirectory(), CONF_FILE);\r
- if (luceneConfig.exists()) {\r
- luceneConfig.delete();\r
- }\r
- return true;\r
- } catch (IOException e) {\r
- throw new RuntimeException(e);\r
- }\r
- }\r
-\r
- /**\r
- * This completely indexes the repository and will destroy any existing\r
- * index.\r
- * \r
- * @param repositoryName\r
- * @param repository\r
- * @return IndexResult\r
- */\r
- public static IndexResult reindex(String repositoryName, Repository repository) {\r
- IndexResult result = new IndexResult();\r
- if (!LuceneUtils.deleteIndex(repository)) {\r
- return result;\r
- }\r
- try { \r
- FileBasedConfig config = getConfig(repository);\r
- Set<String> indexedCommits = new TreeSet<String>();\r
- IndexWriter writer = getIndexWriter(repository, true);\r
- // build a quick lookup of tags\r
- Map<String, List<String>> tags = new HashMap<String, List<String>>();\r
- for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {\r
- if (!tag.isAnnotatedTag()) {\r
- // skip non-annotated tags\r
- continue;\r
- }\r
- if (!tags.containsKey(tag.getObjectId())) {\r
- tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());\r
- }\r
- tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);\r
- }\r
- \r
- ObjectReader reader = repository.newObjectReader();\r
-\r
- // get the local branches\r
- List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);\r
- \r
- // sort them by most recently updated\r
- Collections.sort(branches, new Comparator<RefModel>() {\r
- @Override\r
- public int compare(RefModel ref1, RefModel ref2) {\r
- return ref2.getDate().compareTo(ref1.getDate());\r
- }\r
- });\r
- \r
- // reorder default branch to first position\r
- RefModel defaultBranch = null;\r
- ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);\r
- for (RefModel branch : branches) {\r
- if (branch.getObjectId().equals(defaultBranchId)) {\r
- defaultBranch = branch; \r
- break;\r
- }\r
- }\r
- branches.remove(defaultBranch);\r
- branches.add(0, defaultBranch);\r
- \r
- // walk through each branch\r
- for (RefModel branch : branches) {\r
- if (excludedBranches.contains(branch.getName())) {\r
- continue;\r
- }\r
-\r
- String branchName = branch.getName();\r
- RevWalk revWalk = new RevWalk(reader);\r
- RevCommit tip = revWalk.parseCommit(branch.getObjectId());\r
- String tipId = tip.getId().getName();\r
-\r
- String keyName = getBranchKey(branchName);\r
- config.setString(CONF_ALIAS, null, keyName, branchName);\r
- config.setString(CONF_BRANCH, null, keyName, tipId);\r
-\r
- // index the blob contents of the tree\r
- TreeWalk treeWalk = new TreeWalk(repository);\r
- treeWalk.addTree(tip.getTree());\r
- treeWalk.setRecursive(true); \r
- \r
- Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();\r
- while (treeWalk.next()) {\r
- paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));\r
- } \r
-\r
- ByteArrayOutputStream os = new ByteArrayOutputStream();\r
- byte[] tmp = new byte[32767];\r
-\r
- RevWalk commitWalk = new RevWalk(reader);\r
- commitWalk.markStart(tip);\r
- \r
- RevCommit commit;\r
- while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {\r
- TreeWalk diffWalk = new TreeWalk(reader);\r
- int parentCount = commit.getParentCount();\r
- switch (parentCount) {\r
- case 0:\r
- diffWalk.addTree(new EmptyTreeIterator());\r
- break;\r
- case 1:\r
- diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));\r
- break;\r
- default:\r
- // skip merge commits\r
- continue;\r
- }\r
- diffWalk.addTree(getTree(commitWalk, commit));\r
- diffWalk.setFilter(ANY_DIFF);\r
- diffWalk.setRecursive(true);\r
- while ((paths.size() > 0) && diffWalk.next()) {\r
- String path = diffWalk.getPathString();\r
- if (!paths.containsKey(path)) {\r
- continue;\r
- }\r
- \r
- // remove path from set\r
- ObjectId blobId = paths.remove(path);\r
- result.blobCount++;\r
- \r
- // index the blob metadata\r
- String blobAuthor = getAuthor(commit);\r
- String blobCommitter = getCommitter(commit);\r
- String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,\r
- Resolution.MINUTE);\r
- \r
- Document doc = new Document();\r
- doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));\r
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));\r
- doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED)); \r
-\r
- // determine extension to compare to the extension\r
- // blacklist\r
- String ext = null;\r
- String name = path.toLowerCase();\r
- if (name.indexOf('.') > -1) {\r
- ext = name.substring(name.lastIndexOf('.') + 1);\r
- }\r
-\r
- // index the blob content\r
- if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { \r
- ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);\r
- InputStream in = ldr.openStream(); \r
- int n;\r
- while ((n = in.read(tmp)) > 0) {\r
- os.write(tmp, 0, n);\r
- }\r
- in.close();\r
- byte[] content = os.toByteArray();\r
- String str = new String(content, Constants.CHARACTER_ENCODING);\r
- doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));\r
- os.reset();\r
- } \r
- \r
- // add the blob to the index\r
- writer.addDocument(doc);\r
- }\r
- }\r
-\r
- os.close();\r
-\r
- // index the tip commit object\r
- if (indexedCommits.add(tipId)) {\r
- Document doc = createDocument(tip, tags.get(tipId));\r
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));\r
- writer.addDocument(doc);\r
- result.commitCount += 1;\r
- result.branchCount += 1;\r
- }\r
-\r
- // traverse the log and index the previous commit objects\r
- RevWalk historyWalk = new RevWalk(reader);\r
- historyWalk.markStart(historyWalk.parseCommit(tip.getId()));\r
- RevCommit rev;\r
- while ((rev = historyWalk.next()) != null) {\r
- String hash = rev.getId().getName();\r
- if (indexedCommits.add(hash)) {\r
- Document doc = createDocument(rev, tags.get(hash));\r
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));\r
- writer.addDocument(doc);\r
- result.commitCount += 1;\r
- }\r
- }\r
- }\r
-\r
- // finished\r
- reader.release();\r
- \r
- // this repository has a gb-issues branch, index all issues\r
- if (IssueUtils.getIssuesBranch(repository) != null) {\r
- List<IssueModel> issues = IssueUtils.getIssues(repository, null);\r
- if (issues.size() > 0) {\r
- result.branchCount += 1;\r
- }\r
- for (IssueModel issue : issues) {\r
- result.issueCount++;\r
- Document doc = createDocument(issue);\r
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));\r
- writer.addDocument(doc);\r
- }\r
- }\r
-\r
- // commit all changes and reset the searcher\r
- config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);\r
- config.save();\r
- resetIndexSearcher(repository);\r
- writer.commit();\r
- result.success = true;\r
- } catch (Exception e) {\r
- e.printStackTrace();\r
- }\r
- return result;\r
- }\r
- \r
- /**\r
- * Get the tree associated with the given commit.\r
- *\r
- * @param walk\r
- * @param commit\r
- * @return tree\r
- * @throws IOException\r
- */\r
- protected static RevTree getTree(final RevWalk walk, final RevCommit commit)\r
- throws IOException {\r
- final RevTree tree = commit.getTree();\r
- if (tree != null) {\r
- return tree;\r
- }\r
- walk.parseHeaders(commit);\r
- return commit.getTree();\r
- }\r
-\r
- /**\r
- * Incrementally update the index with the specified commit for the\r
- * repository.\r
- * \r
- * @param repositoryName\r
- * @param repository\r
- * @param branch\r
- * the fully qualified branch name (e.g. refs/heads/master)\r
- * @param commit\r
- * @return true, if successful\r
- */\r
- private static IndexResult index(String repositoryName, Repository repository, \r
- String branch, RevCommit commit) {\r
- IndexResult result = new IndexResult();\r
- try {\r
- if (excludedBranches.contains(branch)) {\r
- if (IssueUtils.GB_ISSUES.equals(branch)) {\r
- // index an issue\r
- String issueId = commit.getShortMessage().substring(2).trim();\r
- IssueModel issue = IssueUtils.getIssue(repository, issueId);\r
- if (issue == null) {\r
- // issue was deleted, remove from index\r
- IndexWriter writer = getIndexWriter(repository, false);\r
- writer.deleteDocuments(\r
- new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(\r
- FIELD_ISSUE, issueId));\r
- writer.commit();\r
- result.success = true;\r
- return result;\r
- }\r
- result.success = index(repositoryName, repository, issue);\r
- result.issueCount++;\r
- return result;\r
- \r
- }\r
- return result;\r
- }\r
- List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);\r
- String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,\r
- Resolution.MINUTE);\r
- IndexWriter writer = getIndexWriter(repository, false);\r
- for (PathChangeModel path : changedPaths) {\r
- // delete the indexed blob\r
- writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()),\r
- new Term(FIELD_BRANCH, branch), new Term(FIELD_PATH, path.path));\r
-\r
- // re-index the blob\r
- if (!ChangeType.DELETE.equals(path.changeType)) {\r
- result.blobCount++;\r
- Document doc = new Document();\r
- doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES,\r
- Index.NOT_ANALYZED));\r
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));\r
- doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));\r
-\r
- // determine extension to compare to the extension\r
- // blacklist\r
- String ext = null;\r
- String name = path.name.toLowerCase();\r
- if (name.indexOf('.') > -1) {\r
- ext = name.substring(name.lastIndexOf('.') + 1);\r
- }\r
-\r
- if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {\r
- // read the blob content\r
- String str = JGitUtils.getStringContent(repository, commit.getTree(),\r
- path.path);\r
- doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));\r
- writer.addDocument(doc);\r
- }\r
- }\r
- }\r
- writer.commit();\r
-\r
- Document doc = createDocument(commit, null);\r
- result.commitCount++;\r
- result.success = index(repositoryName, repository, doc);\r
- } catch (Exception e) {\r
- e.printStackTrace();\r
- }\r
- return result;\r
- }\r
-\r
- /**\r
- * Incrementally update the index with the specified issue for the\r
- * repository.\r
- * \r
- * @param repository\r
- * @param issue\r
- * @return true, if successful\r
- */\r
- public static boolean index(String repositoryName, Repository repository, IssueModel issue) {\r
- try {\r
- // delete the old issue from the index, if exists\r
- IndexWriter writer = getIndexWriter(repository, false);\r
- writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(\r
- FIELD_ISSUE, String.valueOf(issue.id)));\r
- writer.commit();\r
-\r
- Document doc = createDocument(issue);\r
- return index(repositoryName, repository, doc);\r
- } catch (Exception e) {\r
- e.printStackTrace();\r
- }\r
- return false;\r
- }\r
-\r
- /**\r
- * Updates a repository index incrementally from the last indexed commits.\r
- * \r
- * @param repositoryName\r
- * @param repository\r
- * @return IndexResult\r
- */\r
- public static IndexResult updateIndex(String repositoryName, Repository repository) {\r
- IndexResult result = new IndexResult();\r
- try {\r
- FileBasedConfig config = getConfig(repository);\r
- config.load();\r
-\r
- // build a quick lookup of annotated tags\r
- Map<String, List<String>> tags = new HashMap<String, List<String>>();\r
- for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {\r
- if (!tag.isAnnotatedTag()) {\r
- // skip non-annotated tags\r
- continue;\r
- }\r
- if (!tags.containsKey(tag.getObjectId())) {\r
- tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());\r
- }\r
- tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);\r
- }\r
-\r
- // detect branch deletion\r
- // first assume all branches are deleted and then remove each\r
- // existing branch from deletedBranches during indexing\r
- Set<String> deletedBranches = new TreeSet<String>();\r
- for (String alias : config.getNames(CONF_ALIAS)) {\r
- String branch = config.getString(CONF_ALIAS, null, alias);\r
- deletedBranches.add(branch);\r
- }\r
-\r
- // walk through each branches\r
- List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);\r
- for (RefModel branch : branches) {\r
- String branchName = branch.getName();\r
-\r
- // remove this branch from the deletedBranches set\r
- deletedBranches.remove(branchName);\r
-\r
- // determine last commit\r
- String keyName = getBranchKey(branchName);\r
- String lastCommit = config.getString(CONF_BRANCH, null, keyName);\r
-\r
- List<RevCommit> revs;\r
- if (StringUtils.isEmpty(lastCommit)) {\r
- // new branch/unindexed branch, get all commits on branch\r
- revs = JGitUtils.getRevLog(repository, branchName, 0, -1);\r
- } else {\r
- // pre-existing branch, get changes since last commit\r
- revs = JGitUtils.getRevLog(repository, lastCommit, branchName);\r
- }\r
-\r
- if (revs.size() > 0) {\r
- result.branchCount += 1;\r
- }\r
- \r
- // reverse the list of commits so we start with the first commit \r
- Collections.reverse(revs);\r
- for (RevCommit commit : revs) {\r
- result.add(index(repositoryName, repository, branchName, commit)); \r
- }\r
-\r
- // update the config\r
- config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);\r
- config.setString(CONF_ALIAS, null, keyName, branchName);\r
- config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());\r
- config.save();\r
- }\r
-\r
- // the deletedBranches set will normally be empty by this point\r
- // unless a branch really was deleted and no longer exists\r
- if (deletedBranches.size() > 0) {\r
- for (String branch : deletedBranches) {\r
- IndexWriter writer = getIndexWriter(repository, false);\r
- writer.deleteDocuments(new Term(FIELD_BRANCH, branch));\r
- writer.commit();\r
- }\r
- }\r
- result.success = true;\r
- } catch (Throwable t) {\r
- t.printStackTrace();\r
- }\r
- return result;\r
- }\r
-\r
- /**\r
- * Creates a Lucene document from an issue.\r
- * \r
- * @param issue\r
- * @return a Lucene document\r
- */\r
- private static Document createDocument(IssueModel issue) {\r
- Document doc = new Document();\r
- doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES,\r
- Field.Index.NOT_ANALYZED));\r
- doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),\r
- Store.YES, Field.Index.NO));\r
- doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.ANALYZED));\r
- List<String> attachments = new ArrayList<String>();\r
- for (Attachment attachment : issue.getAttachments()) {\r
- attachments.add(attachment.name.toLowerCase());\r
- }\r
- doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES,\r
- Index.ANALYZED));\r
- doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES,\r
- Index.ANALYZED));\r
- return doc;\r
- }\r
-\r
- /**\r
- * Creates a Lucene document for a commit\r
- * \r
- * @param commit\r
- * @param tags\r
- * @return a Lucene document\r
- */\r
- private static Document createDocument(RevCommit commit, List<String> tags) {\r
- Document doc = new Document();\r
- doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.commit.name(), Store.YES,\r
- Index.NOT_ANALYZED));\r
- doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,\r
- Resolution.MINUTE), Store.YES, Index.NO));\r
- doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED));\r
- doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.YES, Index.ANALYZED));\r
- if (!ArrayUtils.isEmpty(tags)) {\r
- doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), Store.YES, Index.ANALYZED));\r
- }\r
- return doc;\r
- }\r
-\r
- /**\r
- * Incrementally index an object for the repository.\r
- * \r
- * @param repositoryName\r
- * @param repository\r
- * @param doc\r
- * @return true, if successful\r
- */\r
- private static boolean index(String repositoryName, Repository repository, Document doc) {\r
- try { \r
- doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));\r
- IndexWriter writer = getIndexWriter(repository, false);\r
- writer.addDocument(doc);\r
- resetIndexSearcher(repository);\r
- writer.commit();\r
- return true;\r
- } catch (Exception e) {\r
- e.printStackTrace();\r
- }\r
- return false;\r
- }\r
-\r
- private static SearchResult createSearchResult(Document doc, float score) throws ParseException {\r
- SearchResult result = new SearchResult();\r
- result.score = score;\r
- result.date = DateTools.stringToDate(doc.get(FIELD_DATE));\r
- result.summary = doc.get(FIELD_SUMMARY); \r
- result.author = doc.get(FIELD_AUTHOR);\r
- result.committer = doc.get(FIELD_COMMITTER);\r
- result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));\r
- result.repository = doc.get(FIELD_REPOSITORY);\r
- result.branch = doc.get(FIELD_BRANCH);\r
- result.commitId = doc.get(FIELD_COMMIT);\r
- result.issueId = doc.get(FIELD_ISSUE);\r
- result.path = doc.get(FIELD_PATH);\r
- if (doc.get(FIELD_TAG) != null) {\r
- result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));\r
- }\r
- if (doc.get(FIELD_LABEL) != null) {\r
- result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));\r
- }\r
- return result;\r
- }\r
-\r
- private static void resetIndexSearcher(Repository repository) throws IOException {\r
- IndexSearcher searcher = SEARCHERS.get(repository.getDirectory());\r
- if (searcher != null) {\r
- SEARCHERS.remove(repository.getDirectory());\r
- searcher.close();\r
- }\r
- }\r
-\r
- /**\r
- * Gets an index searcher for the repository.\r
- * \r
- * @param repository\r
- * @return\r
- * @throws IOException\r
- */\r
- private static IndexSearcher getIndexSearcher(Repository repository) throws IOException {\r
- IndexSearcher searcher = SEARCHERS.get(repository.getDirectory());\r
- if (searcher == null) {\r
- IndexWriter writer = getIndexWriter(repository, false);\r
- searcher = new IndexSearcher(IndexReader.open(writer, true));\r
- SEARCHERS.put(repository.getDirectory(), searcher);\r
- }\r
- return searcher;\r
- }\r
-\r
- /**\r
- * Gets an index writer for the repository. The index will be created if it\r
- * does not already exist or if forceCreate is specified.\r
- * \r
- * @param repository\r
- * @param forceCreate\r
- * @return an IndexWriter\r
- * @throws IOException\r
- */\r
- private static IndexWriter getIndexWriter(Repository repository, boolean forceCreate)\r
- throws IOException {\r
- IndexWriter indexWriter = WRITERS.get(repository.getDirectory());\r
- File indexFolder = new File(repository.getDirectory(), LUCENE_DIR);\r
- Directory directory = FSDirectory.open(indexFolder);\r
- if (forceCreate || !indexFolder.exists()) {\r
- // if the writer is going to blow away the existing index and create\r
- // a new one then it should not be cached. instead, close any open\r
- // writer, create a new one, and return.\r
- if (indexWriter != null) {\r
- indexWriter.close();\r
- indexWriter = null;\r
- WRITERS.remove(repository.getDirectory());\r
- }\r
- indexFolder.mkdirs();\r
- IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer(\r
- LUCENE_VERSION));\r
- config.setOpenMode(OpenMode.CREATE);\r
- IndexWriter writer = new IndexWriter(directory, config);\r
- writer.close();\r
- }\r
-\r
- if (indexWriter == null) {\r
- IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer(\r
- LUCENE_VERSION));\r
- config.setOpenMode(OpenMode.APPEND);\r
- indexWriter = new IndexWriter(directory, config);\r
- WRITERS.put(repository.getDirectory(), indexWriter);\r
- }\r
- return indexWriter;\r
- }\r
-\r
- /**\r
- * Searches the specified repositories for the given text or query\r
- * \r
- * @param text\r
- * if the text is null or empty, null is returned\r
- * @param maximumHits\r
- * the maximum number of hits to collect\r
- * @param repositories\r
- * a list of repositories to search. if no repositories are\r
- * specified null is returned.\r
- * @return a list of SearchResults in order from highest to the lowest score\r
- * \r
- */\r
- public static List<SearchResult> search(String text, int maximumHits,\r
- Repository... repositories) {\r
- if (StringUtils.isEmpty(text)) {\r
- return null;\r
- }\r
- if (repositories.length == 0) {\r
- return null;\r
- }\r
- Set<SearchResult> results = new LinkedHashSet<SearchResult>();\r
- StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);\r
- try {\r
- // default search checks summary and content\r
- BooleanQuery query = new BooleanQuery();\r
- QueryParser qp;\r
- qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);\r
- qp.setAllowLeadingWildcard(true);\r
- query.add(qp.parse(text), Occur.SHOULD);\r
-\r
- qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);\r
- qp.setAllowLeadingWildcard(true);\r
- query.add(qp.parse(text), Occur.SHOULD);\r
-\r
- IndexSearcher searcher;\r
- if (repositories.length == 1) {\r
- // single repository search\r
- searcher = getIndexSearcher(repositories[0]);\r
- } else {\r
- // multiple repository search\r
- List<IndexReader> readers = new ArrayList<IndexReader>();\r
- for (Repository repository : repositories) {\r
- IndexSearcher repositoryIndex = getIndexSearcher(repository);\r
- readers.add(repositoryIndex.getIndexReader());\r
- }\r
- IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);\r
- MultiReader reader = new MultiReader(rdrs);\r
- searcher = new IndexSearcher(reader);\r
- }\r
- Query rewrittenQuery = searcher.rewrite(query);\r
- TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true);\r
- searcher.search(rewrittenQuery, collector);\r
- ScoreDoc[] hits = collector.topDocs().scoreDocs;\r
- for (int i = 0; i < hits.length; i++) {\r
- int docId = hits[i].doc;\r
- Document doc = searcher.doc(docId);\r
- SearchResult result = createSearchResult(doc, hits[i].score);\r
- String content = doc.get(FIELD_CONTENT);\r
- \r
- result.fragment = getHighlightedFragment(analyzer, query, content, result);\r
- results.add(result);\r
- }\r
- } catch (Exception e) {\r
- e.printStackTrace();\r
- }\r
- return new ArrayList<SearchResult>(results);\r
- }\r
- \r
- private static String getHighlightedFragment(Analyzer analyzer, Query query,\r
- String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {\r
- content = content == null ? "":StringUtils.escapeForHtml(content, false);\r
- \r
- TokenStream stream = TokenSources.getTokenStream("content", content, analyzer);\r
- QueryScorer scorer = new QueryScorer(query, "content");\r
- Fragmenter fragmenter;\r
- \r
- if (ObjectType.commit == result.type) {\r
- fragmenter = new SimpleSpanFragmenter(scorer, 1024); \r
- } else {\r
- fragmenter = new SimpleSpanFragmenter(scorer, 150);\r
- }\r
-\r
- // use an artificial delimiter for the token\r
- String termTag = "<!--[";\r
- String termTagEnd = "]-->";\r
- SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);\r
- Highlighter highlighter = new Highlighter(formatter, scorer); \r
- highlighter.setTextFragmenter(fragmenter);\r
- \r
- String [] fragments = highlighter.getBestFragments(stream, content, 5);\r
- if (ArrayUtils.isEmpty(fragments)) {\r
- if (ObjectType.blob == result.type) {\r
- return "";\r
- }\r
- return "<pre class=\"text\">" + content + "</pre>";\r
- }\r
- StringBuilder sb = new StringBuilder();\r
- for (int i = 0, len = fragments.length; i < len; i++) {\r
- String fragment = fragments[i];\r
- \r
- // resurrect the raw fragment from removing the artificial delimiters\r
- String raw = fragment.replace(termTag, "").replace(termTagEnd, ""); \r
- sb.append(getPreTag(result, raw, content));\r
- \r
- // replace the artificial delimiter with html tags\r
- String html = fragment.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");\r
- sb.append(html);\r
- sb.append("</pre>");\r
- if (i < len - 1) {\r
- sb.append("<span class=\"ellipses\">...</span><br/>");\r
- }\r
- }\r
- return sb.toString();\r
- }\r
- \r
- private static String getPreTag(SearchResult result, String fragment, String content) {\r
- String pre = "<pre class=\"text\">";\r
- if (ObjectType.blob == result.type) {\r
- int line = StringUtils.countLines(content.substring(0, content.indexOf(fragment))); \r
- int lastDot = result.path.lastIndexOf('.');\r
- if (lastDot > -1) {\r
- String ext = result.path.substring(lastDot + 1).toLowerCase();\r
- pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0} lang-{1}\">", line, ext); \r
- } else {\r
- pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}\">", line);\r
- }\r
- }\r
- return pre;\r
- }\r
-\r
- /**\r
- * Close all the index writers and searchers\r
- */\r
- public static void close() {\r
- // close writers\r
- for (File file : WRITERS.keySet()) {\r
- try {\r
- WRITERS.get(file).close(true);\r
- } catch (Throwable t) {\r
- t.printStackTrace();\r
- }\r
- }\r
- WRITERS.clear();\r
-\r
- // close searchers\r
- for (File file : SEARCHERS.keySet()) {\r
- try {\r
- SEARCHERS.get(file).close();\r
- } catch (Throwable t) {\r
- t.printStackTrace();\r
- }\r
- }\r
- SEARCHERS.clear();\r
- }\r
-\r
- public static class IndexResult {\r
- public boolean success;\r
- public int branchCount;\r
- public int commitCount;\r
- public int blobCount;\r
- public int issueCount;\r
- \r
- public void add(IndexResult result) {\r
- this.branchCount += result.branchCount;\r
- this.commitCount += result.commitCount;\r
- this.blobCount += result.blobCount;\r
- this.issueCount += result.issueCount; \r
- }\r
- }\r
-}\r
import org.apache.wicket.markup.repeater.data.ListDataProvider;\r
import org.apache.wicket.model.Model;\r
import org.eclipse.jgit.lib.Constants;\r
-import org.eclipse.jgit.lib.Repository;\r
\r
import com.gitblit.Constants.SearchType;\r
import com.gitblit.GitBlit;\r
import com.gitblit.models.SearchResult;\r
import com.gitblit.models.UserModel;\r
import com.gitblit.utils.ArrayUtils;\r
-import com.gitblit.utils.LuceneUtils;\r
import com.gitblit.utils.StringUtils;\r
import com.gitblit.wicket.GitBlitWebSession;\r
import com.gitblit.wicket.StringChoiceRenderer;\r
\r
// execute search\r
final List<SearchResult> results = new ArrayList<SearchResult>();\r
- results.addAll(search(repositories, query));\r
+ if (!ArrayUtils.isEmpty(repositories) && !StringUtils.isEmpty(query)) {\r
+ results.addAll(GitBlit.self().search(query, 100, repositories));\r
+ }\r
\r
// search results view\r
ListDataProvider<SearchResult> resultsDp = new ListDataProvider<SearchResult>(results);\r
}\r
};\r
add(resultsView.setVisible(results.size() > 0));\r
- }\r
- \r
- private List<SearchResult> search(List<String> repositories, String query) {\r
- if (ArrayUtils.isEmpty(repositories) || StringUtils.isEmpty(query)) {\r
- return new ArrayList<SearchResult>();\r
- }\r
- List<Repository> repos = new ArrayList<Repository>();\r
- for (String r : repositories) {\r
- repos.add(GitBlit.self().getRepository(r));\r
- }\r
- List<SearchResult> srs = LuceneUtils.search(query, 100, repos.toArray(new Repository[repos.size()]));\r
- for (Repository r : repos) {\r
- r.close();\r
- }\r
- return srs;\r
- }\r
+ } \r
}\r
ObjectCacheTest.class, UserServiceTest.class, MarkdownUtilsTest.class, JGitUtilsTest.class,\r
SyndicationUtilsTest.class, DiffUtilsTest.class, MetricUtilsTest.class,\r
TicgitUtilsTest.class, GitBlitTest.class, FederationTests.class, RpcTests.class,\r
- GitServletTest.class, GroovyScriptTest.class, LuceneUtilsTest.class, IssuesTest.class })\r
+ GitServletTest.class, GroovyScriptTest.class, LuceneExecutorTest.class, IssuesTest.class })\r
public class GitBlitSuite {\r
\r
public static final File REPOSITORIES = new File("git");\r
}\r
\r
public static Repository getIssuesTestRepository() throws Exception {\r
+ JGitUtils.createRepository(REPOSITORIES, "gb-issues.git").close();\r
return new FileRepository(new File(REPOSITORIES, "gb-issues.git"));\r
}\r
\r
cloneOrFetch("test/theoretical-physics.git", "https://github.com/certik/theoretical-physics.git");\r
cloneOrFetch("test/gitective.git", "https://github.com/kevinsawicki/gitective.git");\r
\r
- JGitUtils.createRepository(REPOSITORIES, "gb-issues.git").close();\r
-\r
enableTickets("ticgit.git");\r
enableDocs("ticgit.git");\r
showRemoteBranches("ticgit.git");\r
import org.eclipse.jgit.lib.Repository;\r
import org.junit.Test;\r
\r
+import com.gitblit.LuceneExecutor;\r
import com.gitblit.models.IssueModel;\r
import com.gitblit.models.IssueModel.Attachment;\r
import com.gitblit.models.IssueModel.Change;\r
import com.gitblit.models.SearchResult;\r
import com.gitblit.utils.IssueUtils;\r
import com.gitblit.utils.IssueUtils.IssueFilter;\r
-import com.gitblit.utils.LuceneUtils;\r
import com.gitblit.utils.StringUtils;\r
\r
/**\r
return issue.status.isClosed();\r
}\r
});\r
-\r
+ \r
assertTrue(allIssues.size() > 0);\r
assertEquals(1, openIssues.size());\r
assertEquals(1, closedIssues.size());\r
\r
// build a new Lucene index\r
- LuceneUtils.deleteIndex(repository);\r
+ LuceneExecutor lucene = new LuceneExecutor(null, GitBlitSuite.REPOSITORIES);\r
+ lucene.deleteIndex(name);\r
for (IssueModel anIssue : allIssues) {\r
- LuceneUtils.index(name, repository, anIssue);\r
+ lucene.index(name, anIssue);\r
}\r
- List<SearchResult> hits = LuceneUtils.search("working", 10, repository);\r
+ List<SearchResult> hits = lucene.search("working", 10, name);\r
assertTrue(hits.size() > 0);\r
\r
// reindex an issue\r
change.comment("this is a test of reindexing an issue");\r
IssueUtils.updateIssue(repository, issue.id, change);\r
issue = IssueUtils.getIssue(repository, issue.id);\r
- LuceneUtils.index(name, repository, issue);\r
+ lucene.index(name, issue);\r
\r
// delete all issues\r
for (IssueModel anIssue : allIssues) {\r
assertTrue(IssueUtils.deleteIssue(repository, anIssue.id, "D"));\r
}\r
\r
- LuceneUtils.close();\r
+ lucene.close();\r
repository.close();\r
}\r
\r
--- /dev/null
+/*\r
+ * Copyright 2012 gitblit.com.\r
+ *\r
+ * Licensed under the Apache License, Version 2.0 (the "License");\r
+ * you may not use this file except in compliance with the License.\r
+ * You may obtain a copy of the License at\r
+ *\r
+ * http://www.apache.org/licenses/LICENSE-2.0\r
+ *\r
+ * Unless required by applicable law or agreed to in writing, software\r
+ * distributed under the License is distributed on an "AS IS" BASIS,\r
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ * See the License for the specific language governing permissions and\r
+ * limitations under the License.\r
+ */\r
+package com.gitblit.tests;\r
+\r
+import static org.junit.Assert.assertEquals;\r
+\r
+import java.util.ArrayList;\r
+import java.util.List;\r
+\r
+import org.eclipse.jgit.lib.Repository;\r
+import org.junit.Test;\r
+\r
+import com.gitblit.LuceneExecutor;\r
+import com.gitblit.models.SearchResult;\r
+import com.gitblit.utils.StringUtils;\r
+\r
+/**\r
+ * Tests Lucene indexing and querying.\r
+ * \r
+ * @author James Moger\r
+ * \r
+ */\r
+public class LuceneExecutorTest {\r
+\r
+ private LuceneExecutor newLuceneExecutor() {\r
+ return new LuceneExecutor(null, GitBlitSuite.REPOSITORIES);\r
+ }\r
+ \r
+ private String getName(Repository repository) {\r
+ return StringUtils.getRelativePath(GitBlitSuite.REPOSITORIES.getAbsolutePath(),\r
+ repository.getDirectory().getAbsolutePath());\r
+ }\r
+ \r
+ @Test\r
+ public void testIndex() throws Exception {\r
+ LuceneExecutor lucene = newLuceneExecutor();\r
+ \r
+ // reindex helloworld\r
+ Repository repository = GitBlitSuite.getHelloworldRepository();\r
+ String name = getName(repository);\r
+ lucene.reindex(name, repository);\r
+ repository.close();\r
+ \r
+ SearchResult result = lucene.search("type:blob AND path:bit.bit", 1, name).get(0); \r
+ assertEquals("Mike Donaghy", result.author);\r
+ result = lucene.search("type:blob AND path:clipper.prg", 1, name).get(0); \r
+ assertEquals("tinogomes", result.author); \r
+\r
+ // reindex theoretical physics\r
+ repository = GitBlitSuite.getTheoreticalPhysicsRepository();\r
+ name = getName(repository);\r
+ lucene.reindex(name, repository);\r
+ repository.close();\r
+ \r
+ // reindex JGit\r
+ repository = GitBlitSuite.getJGitRepository();\r
+ name = getName(repository);\r
+ lucene.reindex(name, repository);\r
+ repository.close();\r
+ \r
+ lucene.close();\r
+ }\r
+\r
+ @Test\r
+ public void testQuery() throws Exception {\r
+ LuceneExecutor lucene = new LuceneExecutor(null, GitBlitSuite.REPOSITORIES);\r
+ \r
+ // 2 occurrences on the master branch\r
+ Repository repository = GitBlitSuite.getHelloworldRepository(); \r
+ String name = getName(repository);\r
+ repository.close();\r
+ \r
+ List<SearchResult> results = lucene.search("ada", 10, name);\r
+ assertEquals(2, results.size());\r
+ for (SearchResult res : results) {\r
+ assertEquals("refs/heads/master", res.branch);\r
+ }\r
+\r
+ // author test\r
+ results = lucene.search("author: tinogomes AND type:commit", 10, name);\r
+ assertEquals(2, results.size());\r
+ \r
+ // blob test\r
+ results = lucene.search("type: blob AND \"import std.stdio\"", 10, name);\r
+ assertEquals(1, results.size());\r
+ assertEquals("d.D", results.get(0).path);\r
+ \r
+ // 1 occurrence on the gh-pages branch\r
+ repository = GitBlitSuite.getTheoreticalPhysicsRepository();\r
+ name = getName(repository);\r
+ repository.close();\r
+ \r
+ results = lucene.search("\"add the .nojekyll file\"", 10, name);\r
+ assertEquals(1, results.size());\r
+ assertEquals("Ondrej Certik", results.get(0).author);\r
+ assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).commitId);\r
+ assertEquals("refs/heads/gh-pages", results.get(0).branch);\r
+ \r
+ results = lucene.search("type:blob AND \"src/intro.rst\"", 10, name);\r
+ assertEquals(4, results.size());\r
+ \r
+ // hash id tests\r
+ results = lucene.search("commit:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, name);\r
+ assertEquals(1, results.size());\r
+\r
+ results = lucene.search("commit:57c4f26f157*", 10, name);\r
+ assertEquals(1, results.size()); \r
+ \r
+ // annotated tag test\r
+ repository = GitBlitSuite.getJGitRepository();\r
+ name = getName(repository);\r
+ repository.close();\r
+ \r
+ results = lucene.search("I663208919f297836a9c16bf458e4a43ffaca4c12", 10, name);\r
+ assertEquals(1, results.size());\r
+ assertEquals("[v1.3.0.201202151440-r]", results.get(0).tags.toString()); \r
+ \r
+ lucene.close();\r
+ }\r
+ \r
+ @Test\r
+ public void testMultiSearch() throws Exception {\r
+ LuceneExecutor lucene = newLuceneExecutor();\r
+ List<String> list = new ArrayList<String>();\r
+ Repository repository = GitBlitSuite.getHelloworldRepository();\r
+ list.add(getName(repository));\r
+ repository.close();\r
+\r
+ repository = GitBlitSuite.getJGitRepository();\r
+ list.add(getName(repository));\r
+ repository.close();\r
+\r
+ List<SearchResult> results = lucene.search("test", 10, list);\r
+ lucene.close();\r
+ assertEquals(10, results.size());\r
+ }\r
+}
\ No newline at end of file
+++ /dev/null
-/*\r
- * Copyright 2012 gitblit.com.\r
- *\r
- * Licensed under the Apache License, Version 2.0 (the "License");\r
- * you may not use this file except in compliance with the License.\r
- * You may obtain a copy of the License at\r
- *\r
- * http://www.apache.org/licenses/LICENSE-2.0\r
- *\r
- * Unless required by applicable law or agreed to in writing, software\r
- * distributed under the License is distributed on an "AS IS" BASIS,\r
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
- * See the License for the specific language governing permissions and\r
- * limitations under the License.\r
- */\r
-package com.gitblit.tests;\r
-\r
-import static org.junit.Assert.assertEquals;\r
-\r
-import java.util.List;\r
-\r
-import org.eclipse.jgit.lib.Repository;\r
-import org.junit.Test;\r
-\r
-import com.gitblit.models.SearchResult;\r
-import com.gitblit.utils.LuceneUtils;\r
-import com.gitblit.utils.StringUtils;\r
-\r
-/**\r
- * Tests Lucene indexing and querying.\r
- * \r
- * @author James Moger\r
- * \r
- */\r
-public class LuceneUtilsTest {\r
-\r
- @Test\r
- public void testIndex() throws Exception {\r
- // reindex helloworld\r
- Repository repository = GitBlitSuite.getHelloworldRepository();\r
- String name = StringUtils.getRelativePath(GitBlitSuite.REPOSITORIES.getAbsolutePath(),\r
- repository.getDirectory().getAbsolutePath());\r
- LuceneUtils.reindex(name, repository);\r
- SearchResult result = LuceneUtils.search("type:blob AND path:bit.bit", 1, repository).get(0); \r
- assertEquals("Mike Donaghy", result.author);\r
- result = LuceneUtils.search("type:blob AND path:clipper.prg", 1, repository).get(0); \r
- assertEquals("tinogomes", result.author);\r
- repository.close();\r
-\r
- // reindex theoretical physics\r
- repository = GitBlitSuite.getTheoreticalPhysicsRepository();\r
- name = StringUtils.getRelativePath(GitBlitSuite.REPOSITORIES.getAbsolutePath(),\r
- repository.getDirectory().getAbsolutePath());\r
- LuceneUtils.reindex(name, repository);\r
- repository.close();\r
- \r
- // reindex JGit\r
- repository = GitBlitSuite.getJGitRepository();\r
- name = StringUtils.getRelativePath(GitBlitSuite.REPOSITORIES.getAbsolutePath(),\r
- repository.getDirectory().getAbsolutePath());\r
- LuceneUtils.reindex(name, repository);\r
- repository.close();\r
- \r
- LuceneUtils.close();\r
- }\r
-\r
- @Test\r
- public void testQuery() throws Exception {\r
- // 2 occurrences on the master branch\r
- Repository repository = GitBlitSuite.getHelloworldRepository();\r
- List<SearchResult> results = LuceneUtils.search("ada", 10, repository);\r
- assertEquals(2, results.size());\r
- for (SearchResult res : results) {\r
- assertEquals("refs/heads/master", res.branch);\r
- }\r
-\r
- // author test\r
- results = LuceneUtils.search("author: tinogomes", 10, repository);\r
- assertEquals(2, results.size());\r
-\r
- repository.close();\r
- // blob test\r
- results = LuceneUtils.search("type: blob AND \"import std.stdio\"", 10, repository);\r
- assertEquals(1, results.size());\r
- assertEquals("d.D", results.get(0).path);\r
- \r
- // 1 occurrence on the gh-pages branch\r
- repository = GitBlitSuite.getTheoreticalPhysicsRepository();\r
- results = LuceneUtils.search("\"add the .nojekyll file\"", 10, repository);\r
- assertEquals(1, results.size());\r
- assertEquals("Ondrej Certik", results.get(0).author);\r
- assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).commitId);\r
- assertEquals("refs/heads/gh-pages", results.get(0).branch);\r
- \r
- results = LuceneUtils.search("type:blob AND \"src/intro.rst\"", 10, repository);\r
- assertEquals(4, results.size());\r
- \r
- // hash id tests\r
- results = LuceneUtils.search("commit:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, repository);\r
- assertEquals(1, results.size());\r
-\r
- results = LuceneUtils.search("commit:57c4f26f157*", 10, repository);\r
- assertEquals(1, results.size());\r
-\r
- repository.close();\r
- \r
- // annotated tag test\r
- repository = GitBlitSuite.getJGitRepository();\r
- results = LuceneUtils.search("I663208919f297836a9c16bf458e4a43ffaca4c12", 10, repository);\r
- assertEquals(1, results.size());\r
- assertEquals("[v1.3.0.201202151440-r]", results.get(0).labels.toString());\r
-\r
- repository.close();\r
- \r
- LuceneUtils.close();\r
- }\r
- \r
- @Test\r
- public void testMultiSearch() throws Exception {\r
- List<SearchResult> results = LuceneUtils.search("test", 10,\r
- GitBlitSuite.getHelloworldRepository(), \r
- GitBlitSuite.getJGitRepository());\r
- LuceneUtils.close();\r
- assertEquals(10, results.size());\r
- }\r
-}
\ No newline at end of file