diff options
author | James Moger <james.moger@gitblit.com> | 2012-02-17 18:43:40 -0500 |
---|---|---|
committer | James Moger <james.moger@gitblit.com> | 2012-02-17 18:43:40 -0500 |
commit | 36ee2307ea083cfe8994b13a3023bc779b2b23ec (patch) | |
tree | 7d40aa082abc8070b9c3d367eb2156ef9ef8eb84 | |
parent | 0c2ea45eb400b1ded16223c03e63cec0c40564b8 (diff) | |
download | gitblit-36ee2307ea083cfe8994b13a3023bc779b2b23ec.tar.gz gitblit-36ee2307ea083cfe8994b13a3023bc779b2b23ec.zip |
Implemented multi-repository Lucene search
-rw-r--r-- | src/com/gitblit/models/SearchResult.java | 4 | ||||
-rw-r--r-- | src/com/gitblit/utils/LuceneUtils.java | 87 | ||||
-rw-r--r-- | tests/com/gitblit/tests/IssuesTest.java | 4 | ||||
-rw-r--r-- | tests/com/gitblit/tests/LuceneUtilsTest.java | 27 |
4 files changed, 96 insertions, 26 deletions
diff --git a/src/com/gitblit/models/SearchResult.java b/src/com/gitblit/models/SearchResult.java index 44207f08..4a03a70b 100644 --- a/src/com/gitblit/models/SearchResult.java +++ b/src/com/gitblit/models/SearchResult.java @@ -25,6 +25,8 @@ public class SearchResult implements Serializable { public String committer;
public String summary;
+
+ public String repository;
public String id;
@@ -37,6 +39,6 @@ public class SearchResult implements Serializable { @Override
public String toString() {
- return type.name() + ": " + id;
+ return score + " : " + type.name() + " : " + repository + " : " + id;
}
}
\ No newline at end of file diff --git a/src/com/gitblit/utils/LuceneUtils.java b/src/com/gitblit/utils/LuceneUtils.java index 4ca72f0a..483537d0 100644 --- a/src/com/gitblit/utils/LuceneUtils.java +++ b/src/com/gitblit/utils/LuceneUtils.java @@ -8,7 +8,7 @@ import java.text.ParseException; import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
-import java.util.HashSet;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
@@ -90,14 +91,33 @@ public class LuceneUtils { private static final String FIELD_LABEL = "label";
private static final String FIELD_ATTACHMENT = "attachment";
- private static Set<String> excludes = new TreeSet<String>(Arrays.asList("7z", "arc", "arj",
- "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh",
- "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
+ private static Set<String> excludedExtensions = new TreeSet<String>(
+ Arrays.asList("7z", "arc", "arj", "bin", "bmp", "dll", "doc",
+ "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh",
+ "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls",
+ "xlsx", "zip"));
+
+ private static Set<String> excludedBranches = new TreeSet<String>(
+ Arrays.asList("/refs/heads/gb-issues"));
private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();
private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();
/**
+ * Returns the name of the repository.
+ *
+ * @param repository
+ * @return the repository name
+ */
+ private static String getName(Repository repository) {
+ if (repository.isBare()) {
+ return repository.getDirectory().getName();
+ } else {
+ return repository.getDirectory().getParentFile().getName();
+ }
+ }
+
+ /**
* Deletes the Lucene index for the specified repository.
*
* @param repository
@@ -125,6 +145,7 @@ public class LuceneUtils { */
public static boolean index(Repository repository) {
try {
+ String repositoryName = getName(repository);
Set<String> indexedCommits = new TreeSet<String>();
IndexWriter writer = getIndexWriter(repository, true);
// build a quick lookup of tags
@@ -139,6 +160,9 @@ public class LuceneUtils { // walk through each branch
List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
for (RefModel branch : branches) {
+ if (excludedBranches.contains(branch.getName())) {
+ continue;
+ }
RevWalk revWalk = new RevWalk(repository);
RevCommit rev = revWalk.parseCommit(branch.getObjectId());
@@ -154,6 +178,8 @@ public class LuceneUtils { Document doc = new Document();
doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES,
Index.NOT_ANALYZED_NO_NORMS));
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
+ Index.NOT_ANALYZED));
doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES,
Index.NOT_ANALYZED));
doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
@@ -171,7 +197,7 @@ public class LuceneUtils { ext = name.substring(name.lastIndexOf('.') + 1);
}
- if (StringUtils.isEmpty(ext) || !excludes.contains(ext)) {
+ if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
// read the blob content
ObjectId entid = treeWalk.getObjectId(0);
FileMode entmode = treeWalk.getFileMode(0);
@@ -199,6 +225,8 @@ public class LuceneUtils { String head = rev.getId().getName();
if (indexedCommits.add(head)) {
Document doc = createDocument(rev, tags.get(head));
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
+ Index.NOT_ANALYZED));
writer.addDocument(doc);
}
@@ -208,6 +236,8 @@ public class LuceneUtils { String hash = rev.getId().getName();
if (indexedCommits.add(hash)) {
Document doc = createDocument(rev, tags.get(hash));
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
+ Index.NOT_ANALYZED));
writer.addDocument(doc);
}
}
@@ -221,6 +251,8 @@ public class LuceneUtils { List<IssueModel> issues = IssueUtils.getIssues(repository, null);
for (IssueModel issue : issues) {
Document doc = createDocument(issue);
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
+ Index.NOT_ANALYZED));
writer.addDocument(doc);
}
}
@@ -344,6 +376,9 @@ public class LuceneUtils { */
private static boolean index(Repository repository, Document doc) {
try {
+ String repositoryName = getName(repository);
+ doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
+ Index.NOT_ANALYZED));
IndexWriter writer = getIndexWriter(repository, false);
writer.addDocument(doc);
resetIndexSearcher(repository);
@@ -363,6 +398,7 @@ public class LuceneUtils { result.author = doc.get(FIELD_AUTHOR);
result.committer = doc.get(FIELD_COMMITTER);
result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
+ result.repository = doc.get(FIELD_REPOSITORY);
result.id = doc.get(FIELD_OBJECT_ID);
if (doc.get(FIELD_LABEL) != null) {
result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));
@@ -437,17 +473,27 @@ public class LuceneUtils { }
/**
- * Search the repository for the given text or query
+ * Searches the specified repositories for the given text or query
*
- * @param repository
* @param text
- * @return a list of SearchResults
+ * if the text is null or empty, null is returned
+ * @param maximumHits
+ * the maximum number of hits to collect
+ * @param repositories
+ * a list of repositories to search. if no repositories are
+ * specified null is returned.
+ * @return a list of SearchResults in order from highest to the lowest score
+ *
*/
- public static List<SearchResult> search(Repository repository, String text) {
+ public static List<SearchResult> search(String text, int maximumHits,
+ Repository... repositories) {
if (StringUtils.isEmpty(text)) {
return null;
}
- Set<SearchResult> results = new HashSet<SearchResult>();
+ if (repositories.length == 0) {
+ return null;
+ }
+ Set<SearchResult> results = new LinkedHashSet<SearchResult>();
StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
try {
// default search checks summary and content
@@ -461,10 +507,23 @@ public class LuceneUtils { qp.setAllowLeadingWildcard(true);
query.add(qp.parse(text), Occur.SHOULD);
- IndexSearcher searcher = getIndexSearcher(repository);
+ IndexSearcher searcher;
+ if (repositories.length == 1) {
+ // single repository search
+ searcher = getIndexSearcher(repositories[0]);
+ } else {
+ // multiple repository search
+ List<IndexReader> readers = new ArrayList<IndexReader>();
+ for (Repository repository : repositories) {
+ IndexSearcher repositoryIndex = getIndexSearcher(repository);
+ readers.add(repositoryIndex.getIndexReader());
+ }
+ IndexReader [] rdrs = readers.toArray(new IndexReader[readers.size()]);
+ MultiReader reader = new MultiReader(rdrs);
+ searcher = new IndexSearcher(reader);
+ }
Query rewrittenQuery = searcher.rewrite(query);
-
- TopScoreDocCollector collector = TopScoreDocCollector.create(200, true);
+ TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true);
searcher.search(rewrittenQuery, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (int i = 0; i < hits.length; i++) {
@@ -477,7 +536,7 @@ public class LuceneUtils { e.printStackTrace();
}
return new ArrayList<SearchResult>(results);
- }
+ }
/**
* Close all the index writers and searchers
diff --git a/tests/com/gitblit/tests/IssuesTest.java b/tests/com/gitblit/tests/IssuesTest.java index c475c466..a5d487d8 100644 --- a/tests/com/gitblit/tests/IssuesTest.java +++ b/tests/com/gitblit/tests/IssuesTest.java @@ -146,7 +146,7 @@ public class IssuesTest { for (IssueModel issue : allIssues) {
LuceneUtils.index(repository, issue, false);
}
- List<SearchResult> hits = LuceneUtils.search(repository, "working");
+ List<SearchResult> hits = LuceneUtils.search("working", 10, repository);
assertTrue(hits.size() > 0);
// reindex an issue
@@ -164,7 +164,7 @@ public class IssuesTest { @Test
public void testLuceneQuery() throws Exception {
Repository repository = GitBlitSuite.getIssuesTestRepository();
- List<SearchResult> hits = LuceneUtils.search(repository, "working");
+ List<SearchResult> hits = LuceneUtils.search("working", 10, repository);
LuceneUtils.close();
repository.close();
assertTrue(hits.size() > 0);
diff --git a/tests/com/gitblit/tests/LuceneUtilsTest.java b/tests/com/gitblit/tests/LuceneUtilsTest.java index 648c8128..a5446218 100644 --- a/tests/com/gitblit/tests/LuceneUtilsTest.java +++ b/tests/com/gitblit/tests/LuceneUtilsTest.java @@ -57,48 +57,48 @@ public class LuceneUtilsTest { public void testQuery() throws Exception {
// 2 occurrences on the master branch
Repository repository = GitBlitSuite.getHelloworldRepository();
- List<SearchResult> results = LuceneUtils.search(repository, "ada");
+ List<SearchResult> results = LuceneUtils.search("ada", 10, repository);
assertEquals(2, results.size());
// author test
- results = LuceneUtils.search(repository, "author: tinogomes");
+ results = LuceneUtils.search("author: tinogomes", 10, repository);
assertEquals(2, results.size());
repository.close();
// blob test
- results = LuceneUtils.search(repository, "type: blob AND \"import std.stdio\"");
+ results = LuceneUtils.search("type: blob AND \"import std.stdio\"", 10, repository);
assertEquals(1, results.size());
assertEquals("d.D", results.get(0).id);
// 1 occurrence on the gh-pages branch
repository = GitBlitSuite.getTheoreticalPhysicsRepository();
- results = LuceneUtils.search(repository, "\"add the .nojekyll file\"");
+ results = LuceneUtils.search("\"add the .nojekyll file\"", 10, repository);
assertEquals(1, results.size());
assertEquals("Ondrej Certik", results.get(0).author);
assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).id);
// tag test
- results = LuceneUtils.search(repository, "\"qft split\"");
+ results = LuceneUtils.search("\"qft split\"", 10, repository);
assertEquals(1, results.size());
assertEquals("Ondrej Certik", results.get(0).author);
assertEquals("57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", results.get(0).id);
assertEquals("[1st-edition]", results.get(0).labels.toString());
- results = LuceneUtils.search(repository, "type:blob AND \"src/intro.rst\"");
+ results = LuceneUtils.search("type:blob AND \"src/intro.rst\"", 10, repository);
assertEquals(4, results.size());
// hash id tests
- results = LuceneUtils.search(repository, "id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5");
+ results = LuceneUtils.search("id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, repository);
assertEquals(1, results.size());
- results = LuceneUtils.search(repository, "id:57c4f26f157*");
+ results = LuceneUtils.search("id:57c4f26f157*", 10, repository);
assertEquals(1, results.size());
repository.close();
// annotated tag test
repository = GitBlitSuite.getBluezGnomeRepository();
- results = LuceneUtils.search(repository, "\"release 1.8\"");
+ results = LuceneUtils.search("\"release 1.8\"", 10, repository);
assertEquals(1, results.size());
assertEquals("[1.8]", results.get(0).labels.toString());
@@ -106,4 +106,13 @@ public class LuceneUtilsTest { LuceneUtils.close();
}
+
+ @Test
+ public void testMultiSearch() throws Exception {
+ List<SearchResult> results = LuceneUtils.search("test", 10,
+ GitBlitSuite.getHelloworldRepository(),
+ GitBlitSuite.getBluezGnomeRepository());
+ LuceneUtils.close();
+ assertEquals(10, results.size());
+ }
}
\ No newline at end of file |