]> source.dussan.org Git - gitblit.git/commitdiff
Implemented multi-repository Lucene search
authorJames Moger <james.moger@gitblit.com>
Fri, 17 Feb 2012 23:43:40 +0000 (18:43 -0500)
committerJames Moger <james.moger@gitblit.com>
Fri, 17 Feb 2012 23:43:40 +0000 (18:43 -0500)
src/com/gitblit/models/SearchResult.java
src/com/gitblit/utils/LuceneUtils.java
tests/com/gitblit/tests/IssuesTest.java
tests/com/gitblit/tests/LuceneUtilsTest.java

index 44207f0855eb950320449336558063b00325e239..4a03a70b0ebdfabaa4fb7d44eca033dc87c84c03 100644 (file)
@@ -25,6 +25,8 @@ public class SearchResult implements Serializable {
        public String committer;\r
 \r
        public String summary;\r
+       \r
+       public String repository;\r
 \r
        public String id;\r
 \r
@@ -37,6 +39,6 @@ public class SearchResult implements Serializable {
 \r
        @Override\r
        public String toString() {\r
-               return type.name() + ": " + id;\r
+               return  score + " : " + type.name() + " : " + repository + " : " + id;\r
        }\r
 }
\ No newline at end of file
index 4ca72f0ab13c7409d19bdee70287591a9ff71fc1..483537d086bfdf7b25229da9be3420bc31ed4cd1 100644 (file)
@@ -8,7 +8,7 @@ import java.text.ParseException;
 import java.util.ArrayList;\r
 import java.util.Arrays;\r
 import java.util.HashMap;\r
-import java.util.HashSet;\r
+import java.util.LinkedHashSet;\r
 import java.util.List;\r
 import java.util.Map;\r
 import java.util.Set;\r
@@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;\r
 import org.apache.lucene.index.IndexWriterConfig;\r
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;\r
+import org.apache.lucene.index.MultiReader;\r
 import org.apache.lucene.index.Term;\r
 import org.apache.lucene.queryParser.QueryParser;\r
 import org.apache.lucene.search.BooleanClause.Occur;\r
@@ -90,13 +91,32 @@ public class LuceneUtils {
        private static final String FIELD_LABEL = "label";\r
        private static final String FIELD_ATTACHMENT = "attachment";\r
 \r
-       private static Set<String> excludes = new TreeSet<String>(Arrays.asList("7z", "arc", "arj",\r
-                       "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh",\r
-                       "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));\r
+       private static Set<String> excludedExtensions = new TreeSet<String>(\r
+                       Arrays.asList("7z", "arc", "arj", "bin", "bmp", "dll", "doc",\r
+                                       "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", \r
+                                       "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls",\r
+                                       "xlsx", "zip"));\r
+\r
+       private static Set<String> excludedBranches = new TreeSet<String>(\r
+                       Arrays.asList("/refs/heads/gb-issues"));\r
 \r
        private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();\r
        private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();\r
 \r
+       /**\r
+        * Returns the name of the repository.\r
+        * \r
+        * @param repository\r
+        * @return the repository name\r
+        */\r
+       private static String getName(Repository repository) {\r
+               if (repository.isBare()) {\r
+                       return repository.getDirectory().getName();\r
+               } else {\r
+                       return repository.getDirectory().getParentFile().getName();\r
+               }\r
+       }\r
+       \r
        /**\r
         * Deletes the Lucene index for the specified repository.\r
         * \r
@@ -125,6 +145,7 @@ public class LuceneUtils {
         */\r
        public static boolean index(Repository repository) {\r
                try {\r
+                       String repositoryName = getName(repository);\r
                        Set<String> indexedCommits = new TreeSet<String>();\r
                        IndexWriter writer = getIndexWriter(repository, true);\r
                        // build a quick lookup of tags\r
@@ -139,6 +160,9 @@ public class LuceneUtils {
                        // walk through each branch\r
                        List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);\r
                        for (RefModel branch : branches) {\r
+                               if (excludedBranches.contains(branch.getName())) {\r
+                                       continue;\r
+                               }\r
                                RevWalk revWalk = new RevWalk(repository);\r
                                RevCommit rev = revWalk.parseCommit(branch.getObjectId());\r
 \r
@@ -154,6 +178,8 @@ public class LuceneUtils {
                                        Document doc = new Document();\r
                                        doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES,\r
                                                        Index.NOT_ANALYZED_NO_NORMS));\r
+                                       doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
+                                                       Index.NOT_ANALYZED));\r
                                        doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES,\r
                                                        Index.NOT_ANALYZED));\r
                                        doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));\r
@@ -171,7 +197,7 @@ public class LuceneUtils {
                                                ext = name.substring(name.lastIndexOf('.') + 1);\r
                                        }\r
 \r
-                                       if (StringUtils.isEmpty(ext) || !excludes.contains(ext)) {\r
+                                       if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {\r
                                                // read the blob content\r
                                                ObjectId entid = treeWalk.getObjectId(0);\r
                                                FileMode entmode = treeWalk.getFileMode(0);\r
@@ -199,6 +225,8 @@ public class LuceneUtils {
                                String head = rev.getId().getName();\r
                                if (indexedCommits.add(head)) {\r
                                        Document doc = createDocument(rev, tags.get(head));\r
+                                       doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
+                                                       Index.NOT_ANALYZED));\r
                                        writer.addDocument(doc);\r
                                }\r
 \r
@@ -208,6 +236,8 @@ public class LuceneUtils {
                                        String hash = rev.getId().getName();\r
                                        if (indexedCommits.add(hash)) {\r
                                                Document doc = createDocument(rev, tags.get(hash));\r
+                                               doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
+                                                               Index.NOT_ANALYZED));\r
                                                writer.addDocument(doc);\r
                                        }\r
                                }\r
@@ -221,6 +251,8 @@ public class LuceneUtils {
                                List<IssueModel> issues = IssueUtils.getIssues(repository, null);\r
                                for (IssueModel issue : issues) {\r
                                        Document doc = createDocument(issue);\r
+                                       doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
+                                                       Index.NOT_ANALYZED));\r
                                        writer.addDocument(doc);\r
                                }\r
                        }\r
@@ -344,6 +376,9 @@ public class LuceneUtils {
         */\r
        private static boolean index(Repository repository, Document doc) {\r
                try {\r
+                       String repositoryName = getName(repository);\r
+                       doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,\r
+                                       Index.NOT_ANALYZED));\r
                        IndexWriter writer = getIndexWriter(repository, false);\r
                        writer.addDocument(doc);\r
                        resetIndexSearcher(repository);\r
@@ -363,6 +398,7 @@ public class LuceneUtils {
                result.author = doc.get(FIELD_AUTHOR);\r
                result.committer = doc.get(FIELD_COMMITTER);\r
                result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));\r
+               result.repository = doc.get(FIELD_REPOSITORY);\r
                result.id = doc.get(FIELD_OBJECT_ID);\r
                if (doc.get(FIELD_LABEL) != null) {\r
                        result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));\r
@@ -437,17 +473,27 @@ public class LuceneUtils {
        }\r
 \r
        /**\r
-        * Search the repository for the given text or query\r
+        * Searches the specified repositories for the given text or query\r
         * \r
-        * @param repository\r
         * @param text\r
-        * @return a list of SearchResults\r
+        *            if the text is null or empty, null is returned\r
+        * @param maximumHits\r
+        *            the maximum number of hits to collect\r
+        * @param repositories\r
+        *            a list of repositories to search. if no repositories are\r
+        *            specified null is returned.\r
+        * @return a list of SearchResults in order from highest to the lowest score\r
+        * \r
         */\r
-       public static List<SearchResult> search(Repository repository, String text) {\r
+       public static List<SearchResult> search(String text, int maximumHits,\r
+                       Repository... repositories) {\r
                if (StringUtils.isEmpty(text)) {\r
                        return null;\r
                }\r
-               Set<SearchResult> results = new HashSet<SearchResult>();\r
+               if (repositories.length == 0) {\r
+                       return null;\r
+               }\r
+               Set<SearchResult> results = new LinkedHashSet<SearchResult>();\r
                StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);\r
                try {\r
                        // default search checks summary and content\r
@@ -461,10 +507,23 @@ public class LuceneUtils {
                        qp.setAllowLeadingWildcard(true);\r
                        query.add(qp.parse(text), Occur.SHOULD);\r
 \r
-                       IndexSearcher searcher = getIndexSearcher(repository);\r
+                       IndexSearcher searcher;\r
+                       if (repositories.length == 1) {\r
+                               // single repository search\r
+                               searcher = getIndexSearcher(repositories[0]);\r
+                       } else {\r
+                               // multiple repository search\r
+                               List<IndexReader> readers = new ArrayList<IndexReader>();\r
+                               for (Repository repository : repositories) {\r
+                                       IndexSearcher repositoryIndex = getIndexSearcher(repository);\r
+                                       readers.add(repositoryIndex.getIndexReader());\r
+                               }                       \r
+                               IndexReader [] rdrs = readers.toArray(new IndexReader[readers.size()]);\r
+                               MultiReader reader = new MultiReader(rdrs);                     \r
+                               searcher = new IndexSearcher(reader);\r
+                       }\r
                        Query rewrittenQuery = searcher.rewrite(query);\r
-\r
-                       TopScoreDocCollector collector = TopScoreDocCollector.create(200, true);\r
+                       TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true);\r
                        searcher.search(rewrittenQuery, collector);\r
                        ScoreDoc[] hits = collector.topDocs().scoreDocs;\r
                        for (int i = 0; i < hits.length; i++) {\r
@@ -477,7 +536,7 @@ public class LuceneUtils {
                        e.printStackTrace();\r
                }\r
                return new ArrayList<SearchResult>(results);\r
-       }\r
+       }       \r
 \r
        /**\r
         * Close all the index writers and searchers\r
index c475c466188e41e91bd2f9c7a4c44280ee07acc7..a5d487d80f97fe8ea6869e8f4b33fd208b4bd4c0 100644 (file)
@@ -146,7 +146,7 @@ public class IssuesTest {
                for (IssueModel issue : allIssues) {\r
                        LuceneUtils.index(repository, issue, false);\r
                }\r
-               List<SearchResult> hits = LuceneUtils.search(repository, "working");\r
+               List<SearchResult> hits = LuceneUtils.search("working", 10, repository);\r
                assertTrue(hits.size() > 0);\r
                \r
                // reindex an issue\r
@@ -164,7 +164,7 @@ public class IssuesTest {
        @Test\r
        public void testLuceneQuery() throws Exception {\r
                Repository repository = GitBlitSuite.getIssuesTestRepository();\r
-               List<SearchResult> hits = LuceneUtils.search(repository, "working");\r
+               List<SearchResult> hits = LuceneUtils.search("working", 10, repository);\r
                LuceneUtils.close();\r
                repository.close();\r
                assertTrue(hits.size() > 0);\r
index 648c8128710f3b9231b29eddf06d7745d6c20b1e..a54462182977e75e7b8205d86a5a8680fc224b61 100644 (file)
@@ -57,48 +57,48 @@ public class LuceneUtilsTest {
        public void testQuery() throws Exception {\r
                // 2 occurrences on the master branch\r
                Repository repository = GitBlitSuite.getHelloworldRepository();\r
-               List<SearchResult> results = LuceneUtils.search(repository, "ada");\r
+               List<SearchResult> results = LuceneUtils.search("ada", 10, repository);\r
                assertEquals(2, results.size());\r
 \r
                // author test\r
-               results = LuceneUtils.search(repository, "author: tinogomes");\r
+               results = LuceneUtils.search("author: tinogomes", 10, repository);\r
                assertEquals(2, results.size());\r
 \r
                repository.close();\r
                // blob test\r
-               results = LuceneUtils.search(repository, "type: blob AND \"import std.stdio\"");\r
+               results = LuceneUtils.search("type: blob AND \"import std.stdio\"", 10, repository);\r
                assertEquals(1, results.size());\r
                assertEquals("d.D", results.get(0).id);\r
                \r
                // 1 occurrence on the gh-pages branch\r
                repository = GitBlitSuite.getTheoreticalPhysicsRepository();\r
-               results = LuceneUtils.search(repository, "\"add the .nojekyll file\"");\r
+               results = LuceneUtils.search("\"add the .nojekyll file\"", 10, repository);\r
                assertEquals(1, results.size());\r
                assertEquals("Ondrej Certik", results.get(0).author);\r
                assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).id);\r
                \r
                // tag test\r
-               results = LuceneUtils.search(repository, "\"qft split\"");\r
+               results = LuceneUtils.search("\"qft split\"", 10, repository);\r
                assertEquals(1, results.size());\r
                assertEquals("Ondrej Certik", results.get(0).author);\r
                assertEquals("57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", results.get(0).id);\r
                assertEquals("[1st-edition]", results.get(0).labels.toString());\r
 \r
-               results = LuceneUtils.search(repository, "type:blob AND \"src/intro.rst\"");\r
+               results = LuceneUtils.search("type:blob AND \"src/intro.rst\"", 10, repository);\r
                assertEquals(4, results.size());\r
                \r
                // hash id tests\r
-               results = LuceneUtils.search(repository, "id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5");\r
+               results = LuceneUtils.search("id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, repository);\r
                assertEquals(1, results.size());\r
 \r
-               results = LuceneUtils.search(repository, "id:57c4f26f157*");\r
+               results = LuceneUtils.search("id:57c4f26f157*", 10, repository);\r
                assertEquals(1, results.size());\r
 \r
                repository.close();\r
                \r
                // annotated tag test\r
                repository = GitBlitSuite.getBluezGnomeRepository();\r
-               results = LuceneUtils.search(repository, "\"release 1.8\"");\r
+               results = LuceneUtils.search("\"release 1.8\"", 10, repository);\r
                assertEquals(1, results.size());\r
                assertEquals("[1.8]", results.get(0).labels.toString());\r
 \r
@@ -106,4 +106,13 @@ public class LuceneUtilsTest {
                \r
                LuceneUtils.close();\r
        }\r
+       \r
+       @Test\r
+       public void testMultiSearch() throws Exception {\r
+               List<SearchResult> results = LuceneUtils.search("test", 10,\r
+                               GitBlitSuite.getHelloworldRepository(), \r
+                               GitBlitSuite.getBluezGnomeRepository());\r
+               LuceneUtils.close();\r
+               assertEquals(10, results.size());\r
+       }\r
 }
\ No newline at end of file