From: James Moger Date: Fri, 17 Feb 2012 23:43:40 +0000 (-0500) Subject: Implemented multi-repository Lucene search X-Git-Tag: v0.9.0~96^2~1 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=36ee2307ea083cfe8994b13a3023bc779b2b23ec;p=gitblit.git Implemented multi-repository Lucene search --- diff --git a/src/com/gitblit/models/SearchResult.java b/src/com/gitblit/models/SearchResult.java index 44207f08..4a03a70b 100644 --- a/src/com/gitblit/models/SearchResult.java +++ b/src/com/gitblit/models/SearchResult.java @@ -25,6 +25,8 @@ public class SearchResult implements Serializable { public String committer; public String summary; + + public String repository; public String id; @@ -37,6 +39,6 @@ public class SearchResult implements Serializable { @Override public String toString() { - return type.name() + ": " + id; + return score + " : " + type.name() + " : " + repository + " : " + id; } } \ No newline at end of file diff --git a/src/com/gitblit/utils/LuceneUtils.java b/src/com/gitblit/utils/LuceneUtils.java index 4ca72f0a..483537d0 100644 --- a/src/com/gitblit/utils/LuceneUtils.java +++ b/src/com/gitblit/utils/LuceneUtils.java @@ -8,7 +8,7 @@ import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause.Occur; @@ -90,13 +91,32 @@ public class LuceneUtils { private static final String FIELD_LABEL = "label"; private static final String FIELD_ATTACHMENT = "attachment"; - private static Set excludes = new TreeSet(Arrays.asList("7z", "arc", "arj", - "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", - "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip")); + private static Set excludedExtensions = new TreeSet( + Arrays.asList("7z", "arc", "arj", "bin", "bmp", "dll", "doc", + "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", + "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", + "xlsx", "zip")); + + private static Set excludedBranches = new TreeSet( + Arrays.asList("/refs/heads/gb-issues")); private static final Map SEARCHERS = new ConcurrentHashMap(); private static final Map WRITERS = new ConcurrentHashMap(); + /** + * Returns the name of the repository. + * + * @param repository + * @return the repository name + */ + private static String getName(Repository repository) { + if (repository.isBare()) { + return repository.getDirectory().getName(); + } else { + return repository.getDirectory().getParentFile().getName(); + } + } + /** * Deletes the Lucene index for the specified repository. * @@ -125,6 +145,7 @@ public class LuceneUtils { */ public static boolean index(Repository repository) { try { + String repositoryName = getName(repository); Set indexedCommits = new TreeSet(); IndexWriter writer = getIndexWriter(repository, true); // build a quick lookup of tags @@ -139,6 +160,9 @@ public class LuceneUtils { // walk through each branch List branches = JGitUtils.getLocalBranches(repository, true, -1); for (RefModel branch : branches) { + if (excludedBranches.contains(branch.getName())) { + continue; + } RevWalk revWalk = new RevWalk(repository); RevCommit rev = revWalk.parseCommit(branch.getObjectId()); @@ -154,6 +178,8 @@ public class LuceneUtils { Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, + Index.NOT_ANALYZED)); doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO)); @@ -171,7 +197,7 @@ public class LuceneUtils { ext = name.substring(name.lastIndexOf('.') + 1); } - if (StringUtils.isEmpty(ext) || !excludes.contains(ext)) { + if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { // read the blob content ObjectId entid = treeWalk.getObjectId(0); FileMode entmode = treeWalk.getFileMode(0); @@ -199,6 +225,8 @@ public class LuceneUtils { String head = rev.getId().getName(); if (indexedCommits.add(head)) { Document doc = createDocument(rev, tags.get(head)); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, + Index.NOT_ANALYZED)); writer.addDocument(doc); } @@ -208,6 +236,8 @@ public class LuceneUtils { String hash = rev.getId().getName(); if (indexedCommits.add(hash)) { Document doc = createDocument(rev, tags.get(hash)); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, + Index.NOT_ANALYZED)); writer.addDocument(doc); } } @@ -221,6 +251,8 @@ public class LuceneUtils { List issues = IssueUtils.getIssues(repository, null); for (IssueModel issue : issues) { Document doc = createDocument(issue); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, + Index.NOT_ANALYZED)); writer.addDocument(doc); } } @@ -344,6 +376,9 @@ public class LuceneUtils { */ private static boolean index(Repository repository, Document doc) { try { + String repositoryName = getName(repository); + doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, + Index.NOT_ANALYZED)); IndexWriter writer = getIndexWriter(repository, false); writer.addDocument(doc); resetIndexSearcher(repository); @@ -363,6 +398,7 @@ public class LuceneUtils { result.author = doc.get(FIELD_AUTHOR); result.committer = doc.get(FIELD_COMMITTER); result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE)); + result.repository = doc.get(FIELD_REPOSITORY); result.id = doc.get(FIELD_OBJECT_ID); if (doc.get(FIELD_LABEL) != null) { result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL)); @@ -437,17 +473,27 @@ public class LuceneUtils { } /** - * Search the repository for the given text or query + * Searches the specified repositories for the given text or query * - * @param repository * @param text - * @return a list of SearchResults + * if the text is null or empty, null is returned + * @param maximumHits + * the maximum number of hits to collect + * @param repositories + * a list of repositories to search. if no repositories are + * specified null is returned. + * @return a list of SearchResults in order from highest to the lowest score + * */ - public static List search(Repository repository, String text) { + public static List search(String text, int maximumHits, + Repository... repositories) { if (StringUtils.isEmpty(text)) { return null; } - Set results = new HashSet(); + if (repositories.length == 0) { + return null; + } + Set results = new LinkedHashSet(); StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); try { // default search checks summary and content @@ -461,10 +507,23 @@ public class LuceneUtils { qp.setAllowLeadingWildcard(true); query.add(qp.parse(text), Occur.SHOULD); - IndexSearcher searcher = getIndexSearcher(repository); + IndexSearcher searcher; + if (repositories.length == 1) { + // single repository search + searcher = getIndexSearcher(repositories[0]); + } else { + // multiple repository search + List readers = new ArrayList(); + for (Repository repository : repositories) { + IndexSearcher repositoryIndex = getIndexSearcher(repository); + readers.add(repositoryIndex.getIndexReader()); + } + IndexReader [] rdrs = readers.toArray(new IndexReader[readers.size()]); + MultiReader reader = new MultiReader(rdrs); + searcher = new IndexSearcher(reader); + } Query rewrittenQuery = searcher.rewrite(query); - - TopScoreDocCollector collector = TopScoreDocCollector.create(200, true); + TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true); searcher.search(rewrittenQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; i++) { @@ -477,7 +536,7 @@ public class LuceneUtils { e.printStackTrace(); } return new ArrayList(results); - } + } /** * Close all the index writers and searchers diff --git a/tests/com/gitblit/tests/IssuesTest.java b/tests/com/gitblit/tests/IssuesTest.java index c475c466..a5d487d8 100644 --- a/tests/com/gitblit/tests/IssuesTest.java +++ b/tests/com/gitblit/tests/IssuesTest.java @@ -146,7 +146,7 @@ public class IssuesTest { for (IssueModel issue : allIssues) { LuceneUtils.index(repository, issue, false); } - List hits = LuceneUtils.search(repository, "working"); + List hits = LuceneUtils.search("working", 10, repository); assertTrue(hits.size() > 0); // reindex an issue @@ -164,7 +164,7 @@ public class IssuesTest { @Test public void testLuceneQuery() throws Exception { Repository repository = GitBlitSuite.getIssuesTestRepository(); - List hits = LuceneUtils.search(repository, "working"); + List hits = LuceneUtils.search("working", 10, repository); LuceneUtils.close(); repository.close(); assertTrue(hits.size() > 0); diff --git a/tests/com/gitblit/tests/LuceneUtilsTest.java b/tests/com/gitblit/tests/LuceneUtilsTest.java index 648c8128..a5446218 100644 --- a/tests/com/gitblit/tests/LuceneUtilsTest.java +++ b/tests/com/gitblit/tests/LuceneUtilsTest.java @@ -57,48 +57,48 @@ public class LuceneUtilsTest { public void testQuery() throws Exception { // 2 occurrences on the master branch Repository repository = GitBlitSuite.getHelloworldRepository(); - List results = LuceneUtils.search(repository, "ada"); + List results = LuceneUtils.search("ada", 10, repository); assertEquals(2, results.size()); // author test - results = LuceneUtils.search(repository, "author: tinogomes"); + results = LuceneUtils.search("author: tinogomes", 10, repository); assertEquals(2, results.size()); repository.close(); // blob test - results = LuceneUtils.search(repository, "type: blob AND \"import std.stdio\""); + results = LuceneUtils.search("type: blob AND \"import std.stdio\"", 10, repository); assertEquals(1, results.size()); assertEquals("d.D", results.get(0).id); // 1 occurrence on the gh-pages branch repository = GitBlitSuite.getTheoreticalPhysicsRepository(); - results = LuceneUtils.search(repository, "\"add the .nojekyll file\""); + results = LuceneUtils.search("\"add the .nojekyll file\"", 10, repository); assertEquals(1, results.size()); assertEquals("Ondrej Certik", results.get(0).author); assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).id); // tag test - results = LuceneUtils.search(repository, "\"qft split\""); + results = LuceneUtils.search("\"qft split\"", 10, repository); assertEquals(1, results.size()); assertEquals("Ondrej Certik", results.get(0).author); assertEquals("57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", results.get(0).id); assertEquals("[1st-edition]", results.get(0).labels.toString()); - results = LuceneUtils.search(repository, "type:blob AND \"src/intro.rst\""); + results = LuceneUtils.search("type:blob AND \"src/intro.rst\"", 10, repository); assertEquals(4, results.size()); // hash id tests - results = LuceneUtils.search(repository, "id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5"); + results = LuceneUtils.search("id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, repository); assertEquals(1, results.size()); - results = LuceneUtils.search(repository, "id:57c4f26f157*"); + results = LuceneUtils.search("id:57c4f26f157*", 10, repository); assertEquals(1, results.size()); repository.close(); // annotated tag test repository = GitBlitSuite.getBluezGnomeRepository(); - results = LuceneUtils.search(repository, "\"release 1.8\""); + results = LuceneUtils.search("\"release 1.8\"", 10, repository); assertEquals(1, results.size()); assertEquals("[1.8]", results.get(0).labels.toString()); @@ -106,4 +106,13 @@ public class LuceneUtilsTest { LuceneUtils.close(); } + + @Test + public void testMultiSearch() throws Exception { + List results = LuceneUtils.search("test", 10, + GitBlitSuite.getHelloworldRepository(), + GitBlitSuite.getBluezGnomeRepository()); + LuceneUtils.close(); + assertEquals(10, results.size()); + } } \ No newline at end of file