import java.util.ArrayList;\r
import java.util.Arrays;\r
import java.util.HashMap;\r
-import java.util.HashSet;\r
+import java.util.LinkedHashSet;\r
import java.util.List;\r
import java.util.Map;\r
import java.util.Set;\r
import org.apache.lucene.index.IndexWriter;\r
import org.apache.lucene.index.IndexWriterConfig;\r
import org.apache.lucene.index.IndexWriterConfig.OpenMode;\r
+import org.apache.lucene.index.MultiReader;\r
import org.apache.lucene.index.Term;\r
import org.apache.lucene.queryParser.QueryParser;\r
import org.apache.lucene.search.BooleanClause.Occur;\r
private static final String FIELD_LABEL = "label";\r
private static final String FIELD_ATTACHMENT = "attachment";\r
\r
- private static Set<String> excludes = new TreeSet<String>(Arrays.asList("7z", "arc", "arj",\r
- "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh",\r
- "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));\r
+ private static Set<String> excludedExtensions = new TreeSet<String>(\r
+ Arrays.asList("7z", "arc", "arj", "bin", "bmp", "dll", "doc",\r
+ "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", \r
+ "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls",\r
+ "xlsx", "zip"));\r
+\r
+ private static Set<String> excludedBranches = new TreeSet<String>(\r
+ Arrays.asList("/refs/heads/gb-issues"));\r
\r
private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();\r
private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();\r
\r
+ /**\r
+ * Returns the name of the repository.\r
+ * \r
+ * @param repository\r
+ * @return the repository name\r
+ */\r
+ private static String getName(Repository repository) {\r
+ if (repository.isBare()) {\r
+ return repository.getDirectory().getName();\r
+ } else {\r
+ return repository.getDirectory().getParentFile().getName();\r
+ }\r
+ }\r
+ \r
/**\r
* Deletes the Lucene index for the specified repository.\r
* \r
*/\r
public static boolean index(Repository repository) {\r
try {\r
+ String repositoryName = getName(repository);\r
Set<String> indexedCommits = new TreeSet<String>();\r
IndexWriter writer = getIndexWriter(repository, true);\r
// build a quick lookup of tags\r
// walk through each branch\r
List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);\r
for (RefModel branch : branches) {\r
+ if (excludedBranches.contains(branch.getName())) {\r
+ continue;\r
+ }\r
RevWalk revWalk = new RevWalk(repository);\r
RevCommit rev = revWalk.parseCommit(branch.getObjectId());\r
\r
Document doc = new Document();\r
doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES,\r
Index.NOT_ANALYZED_NO_NORMS));\r
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
+ Index.NOT_ANALYZED));\r
doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES,\r
Index.NOT_ANALYZED));\r
doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));\r
ext = name.substring(name.lastIndexOf('.') + 1);\r
}\r
\r
- if (StringUtils.isEmpty(ext) || !excludes.contains(ext)) {\r
+ if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {\r
// read the blob content\r
ObjectId entid = treeWalk.getObjectId(0);\r
FileMode entmode = treeWalk.getFileMode(0);\r
String head = rev.getId().getName();\r
if (indexedCommits.add(head)) {\r
Document doc = createDocument(rev, tags.get(head));\r
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
+ Index.NOT_ANALYZED));\r
writer.addDocument(doc);\r
}\r
\r
String hash = rev.getId().getName();\r
if (indexedCommits.add(hash)) {\r
Document doc = createDocument(rev, tags.get(hash));\r
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
+ Index.NOT_ANALYZED));\r
writer.addDocument(doc);\r
}\r
}\r
List<IssueModel> issues = IssueUtils.getIssues(repository, null);\r
for (IssueModel issue : issues) {\r
Document doc = createDocument(issue);\r
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
+ Index.NOT_ANALYZED));\r
writer.addDocument(doc);\r
}\r
}\r
*/\r
private static boolean index(Repository repository, Document doc) {\r
try {\r
+ String repositoryName = getName(repository);\r
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
+ Index.NOT_ANALYZED));\r
IndexWriter writer = getIndexWriter(repository, false);\r
writer.addDocument(doc);\r
resetIndexSearcher(repository);\r
result.author = doc.get(FIELD_AUTHOR);\r
result.committer = doc.get(FIELD_COMMITTER);\r
result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));\r
+ result.repository = doc.get(FIELD_REPOSITORY);\r
result.id = doc.get(FIELD_OBJECT_ID);\r
if (doc.get(FIELD_LABEL) != null) {\r
result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));\r
}\r
\r
/**\r
- * Search the repository for the given text or query\r
+ * Searches the specified repositories for the given text or query\r
* \r
- * @param repository\r
* @param text\r
- * @return a list of SearchResults\r
+ * if the text is null or empty, null is returned\r
+ * @param maximumHits\r
+ * the maximum number of hits to collect\r
+ * @param repositories\r
+ * a list of repositories to search. if no repositories are\r
+ * specified null is returned.\r
+ * @return a list of SearchResults in order from highest to the lowest score\r
+ * \r
*/\r
- public static List<SearchResult> search(Repository repository, String text) {\r
+ public static List<SearchResult> search(String text, int maximumHits,\r
+ Repository... repositories) {\r
if (StringUtils.isEmpty(text)) {\r
return null;\r
}\r
- Set<SearchResult> results = new HashSet<SearchResult>();\r
+ if (repositories.length == 0) {\r
+ return null;\r
+ }\r
+ Set<SearchResult> results = new LinkedHashSet<SearchResult>();\r
StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);\r
try {\r
// default search checks summary and content\r
qp.setAllowLeadingWildcard(true);\r
query.add(qp.parse(text), Occur.SHOULD);\r
\r
- IndexSearcher searcher = getIndexSearcher(repository);\r
+ IndexSearcher searcher;\r
+ if (repositories.length == 1) {\r
+ // single repository search\r
+ searcher = getIndexSearcher(repositories[0]);\r
+ } else {\r
+ // multiple repository search\r
+ List<IndexReader> readers = new ArrayList<IndexReader>();\r
+ for (Repository repository : repositories) {\r
+ IndexSearcher repositoryIndex = getIndexSearcher(repository);\r
+ readers.add(repositoryIndex.getIndexReader());\r
+ } \r
+ IndexReader [] rdrs = readers.toArray(new IndexReader[readers.size()]);\r
+ MultiReader reader = new MultiReader(rdrs); \r
+ searcher = new IndexSearcher(reader);\r
+ }\r
Query rewrittenQuery = searcher.rewrite(query);\r
-\r
- TopScoreDocCollector collector = TopScoreDocCollector.create(200, true);\r
+ TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true);\r
searcher.search(rewrittenQuery, collector);\r
ScoreDoc[] hits = collector.topDocs().scoreDocs;\r
for (int i = 0; i < hits.length; i++) {\r
e.printStackTrace();\r
}\r
return new ArrayList<SearchResult>(results);\r
- }\r
+ } \r
\r
/**\r
* Close all the index writers and searchers\r
public void testQuery() throws Exception {\r
// 2 occurrences on the master branch\r
Repository repository = GitBlitSuite.getHelloworldRepository();\r
- List<SearchResult> results = LuceneUtils.search(repository, "ada");\r
+ List<SearchResult> results = LuceneUtils.search("ada", 10, repository);\r
assertEquals(2, results.size());\r
\r
// author test\r
- results = LuceneUtils.search(repository, "author: tinogomes");\r
+ results = LuceneUtils.search("author: tinogomes", 10, repository);\r
assertEquals(2, results.size());\r
\r
repository.close();\r
// blob test\r
- results = LuceneUtils.search(repository, "type: blob AND \"import std.stdio\"");\r
+ results = LuceneUtils.search("type: blob AND \"import std.stdio\"", 10, repository);\r
assertEquals(1, results.size());\r
assertEquals("d.D", results.get(0).id);\r
\r
// 1 occurrence on the gh-pages branch\r
repository = GitBlitSuite.getTheoreticalPhysicsRepository();\r
- results = LuceneUtils.search(repository, "\"add the .nojekyll file\"");\r
+ results = LuceneUtils.search("\"add the .nojekyll file\"", 10, repository);\r
assertEquals(1, results.size());\r
assertEquals("Ondrej Certik", results.get(0).author);\r
assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).id);\r
\r
// tag test\r
- results = LuceneUtils.search(repository, "\"qft split\"");\r
+ results = LuceneUtils.search("\"qft split\"", 10, repository);\r
assertEquals(1, results.size());\r
assertEquals("Ondrej Certik", results.get(0).author);\r
assertEquals("57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", results.get(0).id);\r
assertEquals("[1st-edition]", results.get(0).labels.toString());\r
\r
- results = LuceneUtils.search(repository, "type:blob AND \"src/intro.rst\"");\r
+ results = LuceneUtils.search("type:blob AND \"src/intro.rst\"", 10, repository);\r
assertEquals(4, results.size());\r
\r
// hash id tests\r
- results = LuceneUtils.search(repository, "id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5");\r
+ results = LuceneUtils.search("id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, repository);\r
assertEquals(1, results.size());\r
\r
- results = LuceneUtils.search(repository, "id:57c4f26f157*");\r
+ results = LuceneUtils.search("id:57c4f26f157*", 10, repository);\r
assertEquals(1, results.size());\r
\r
repository.close();\r
\r
// annotated tag test\r
repository = GitBlitSuite.getBluezGnomeRepository();\r
- results = LuceneUtils.search(repository, "\"release 1.8\"");\r
+ results = LuceneUtils.search("\"release 1.8\"", 10, repository);\r
assertEquals(1, results.size());\r
assertEquals("[1.8]", results.get(0).labels.toString());\r
\r
\r
LuceneUtils.close();\r
}\r
+ \r
+ @Test\r
+ public void testMultiSearch() throws Exception {\r
+ List<SearchResult> results = LuceneUtils.search("test", 10,\r
+ GitBlitSuite.getHelloworldRepository(), \r
+ GitBlitSuite.getBluezGnomeRepository());\r
+ LuceneUtils.close();\r
+ assertEquals(10, results.size());\r
+ }\r
}
\ No newline at end of file