]> source.dussan.org Git - gitblit.git/commitdiff
Fairly functional Lucene backend for commits, blobs, and issues
authorJames Moger <james.moger@gitblit.com>
Wed, 25 Jan 2012 03:49:53 +0000 (22:49 -0500)
committerJames Moger <james.moger@gitblit.com>
Wed, 25 Jan 2012 03:49:53 +0000 (22:49 -0500)
src/com/gitblit/build/Build.java
src/com/gitblit/models/IssueModel.java
src/com/gitblit/models/SearchResult.java [new file with mode: 0644]
src/com/gitblit/utils/IssueUtils.java
src/com/gitblit/utils/LuceneUtils.java [new file with mode: 0644]
tests/com/gitblit/tests/GitBlitSuite.java
tests/com/gitblit/tests/IssuesTest.java
tests/com/gitblit/tests/LuceneUtilsTest.java [new file with mode: 0644]

index 682adcad954c1d9c432c085b98b8a632ffe1f504..b539cac6e4b42123b6131a7894e04a9b28679415 100644 (file)
@@ -90,6 +90,7 @@ public class Build {
                downloadFromApache(MavenObject.GSON, BuildType.RUNTIME);\r
                downloadFromApache(MavenObject.MAIL, BuildType.RUNTIME);\r
                downloadFromApache(MavenObject.GROOVY, BuildType.RUNTIME);\r
+               downloadFromApache(MavenObject.LUCENE, BuildType.RUNTIME);\r
 \r
                downloadFromEclipse(MavenObject.JGIT, BuildType.RUNTIME);\r
                downloadFromEclipse(MavenObject.JGIT_HTTP, BuildType.RUNTIME);\r
@@ -116,6 +117,7 @@ public class Build {
                downloadFromApache(MavenObject.GSON, BuildType.COMPILETIME);\r
                downloadFromApache(MavenObject.MAIL, BuildType.COMPILETIME);\r
                downloadFromApache(MavenObject.GROOVY, BuildType.COMPILETIME);\r
+               downloadFromApache(MavenObject.LUCENE, BuildType.COMPILETIME);\r
                \r
                downloadFromEclipse(MavenObject.JGIT, BuildType.COMPILETIME);\r
                downloadFromEclipse(MavenObject.JGIT_HTTP, BuildType.COMPILETIME);\r
@@ -501,6 +503,10 @@ public class Build {
                                "1.8.5", 6143000, 2290000, 4608000, "3be3914c49ca7d8e8afb29a7772a74c30a1f1b28",\r
                                "1435cc8c90e3a91e5fee7bb53e83aad96e93aeb7", "5a214b52286523f9e2a4b5fed526506c763fa6f1");\r
 \r
+               public static final MavenObject LUCENE = new MavenObject("lucene", "org/apache/lucene", "lucene-core",\r
+                               "3.5.0", 1470000, 1347000, 3608000, "90ff0731fafb05c01fee4f2247140d56e9c30a3b",\r
+                               "0757113199f9c8c18c678c96d61c2c4160b9baa6", "19f8e80e5e7f6ec88a41d4f63495994692e31bf1");\r
+\r
                public final String name;\r
                public final String group;\r
                public final String artifact;\r
index 19241c291815d05b56d2af3fba2e7f43477d7bc0..246b29bf05eb49d17baf0d729b17741c5ff8ace8 100644 (file)
@@ -106,6 +106,16 @@ public class IssueModel implements Serializable, Comparable<IssueModel> {
                return attachment;\r
        }\r
 \r
+       public List<Attachment> getAttachments() {\r
+               List<Attachment> list = new ArrayList<Attachment>();\r
+               for (Change change : changes) {\r
+                       if (change.hasAttachments()) {\r
+                               list.addAll(change.attachments);\r
+                       }\r
+               }\r
+               return list;\r
+       }\r
+\r
        public void applyChange(Change change) {\r
                changes.add(change);\r
 \r
diff --git a/src/com/gitblit/models/SearchResult.java b/src/com/gitblit/models/SearchResult.java
new file mode 100644 (file)
index 0000000..44207f0
--- /dev/null
@@ -0,0 +1,42 @@
+package com.gitblit.models;\r
+\r
+import java.io.Serializable;\r
+import java.util.Date;\r
+import java.util.List;\r
+\r
+import com.gitblit.utils.LuceneUtils.ObjectType;\r
+\r
+/**\r
+ * Model class that represents a search result.\r
+ * \r
+ * @author James Moger\r
+ * \r
+ */\r
+public class SearchResult implements Serializable {\r
+\r
+       private static final long serialVersionUID = 1L;\r
+\r
+       public float score;\r
+\r
+       public Date date;\r
+\r
+       public String author;\r
+\r
+       public String committer;\r
+\r
+       public String summary;\r
+\r
+       public String id;\r
+\r
+       public List<String> labels;\r
+\r
+       public ObjectType type;\r
+\r
+       public SearchResult() {\r
+       }\r
+\r
+       @Override\r
+       public String toString() {\r
+               return type.name() + ": " + id;\r
+       }\r
+}
\ No newline at end of file
index d0a019925d5582e733469eef95ef8366ba9b3950..eb3b347b17acd3e7aaa0bba4e1cc5339ea58d1a9 100644 (file)
@@ -740,7 +740,7 @@ public class IssueUtils {
         * @param issueId\r
         * @return the root path of the issue content on the gb-issues branch\r
         */\r
-       private static String getIssuePath(String issueId) {\r
+       static String getIssuePath(String issueId) {\r
                return issueId.substring(0, 2) + "/" + issueId.substring(2);\r
        }\r
 \r
diff --git a/src/com/gitblit/utils/LuceneUtils.java b/src/com/gitblit/utils/LuceneUtils.java
new file mode 100644 (file)
index 0000000..4ca72f0
--- /dev/null
@@ -0,0 +1,506 @@
+package com.gitblit.utils;\r
+\r
+import java.io.ByteArrayOutputStream;\r
+import java.io.File;\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
+import java.text.ParseException;\r
+import java.util.ArrayList;\r
+import java.util.Arrays;\r
+import java.util.HashMap;\r
+import java.util.HashSet;\r
+import java.util.List;\r
+import java.util.Map;\r
+import java.util.Set;\r
+import java.util.TreeSet;\r
+import java.util.concurrent.ConcurrentHashMap;\r
+\r
+import org.apache.lucene.analysis.standard.StandardAnalyzer;\r
+import org.apache.lucene.document.DateTools;\r
+import org.apache.lucene.document.DateTools.Resolution;\r
+import org.apache.lucene.document.Document;\r
+import org.apache.lucene.document.Field;\r
+import org.apache.lucene.document.Field.Index;\r
+import org.apache.lucene.document.Field.Store;\r
+import org.apache.lucene.index.IndexReader;\r
+import org.apache.lucene.index.IndexWriter;\r
+import org.apache.lucene.index.IndexWriterConfig;\r
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;\r
+import org.apache.lucene.index.Term;\r
+import org.apache.lucene.queryParser.QueryParser;\r
+import org.apache.lucene.search.BooleanClause.Occur;\r
+import org.apache.lucene.search.BooleanQuery;\r
+import org.apache.lucene.search.IndexSearcher;\r
+import org.apache.lucene.search.Query;\r
+import org.apache.lucene.search.ScoreDoc;\r
+import org.apache.lucene.search.TopScoreDocCollector;\r
+import org.apache.lucene.store.Directory;\r
+import org.apache.lucene.store.FSDirectory;\r
+import org.apache.lucene.util.Version;\r
+import org.eclipse.jgit.lib.Constants;\r
+import org.eclipse.jgit.lib.FileMode;\r
+import org.eclipse.jgit.lib.ObjectId;\r
+import org.eclipse.jgit.lib.ObjectLoader;\r
+import org.eclipse.jgit.lib.Repository;\r
+import org.eclipse.jgit.revwalk.RevCommit;\r
+import org.eclipse.jgit.revwalk.RevObject;\r
+import org.eclipse.jgit.revwalk.RevWalk;\r
+import org.eclipse.jgit.treewalk.TreeWalk;\r
+\r
+import com.gitblit.models.IssueModel;\r
+import com.gitblit.models.IssueModel.Attachment;\r
+import com.gitblit.models.RefModel;\r
+import com.gitblit.models.SearchResult;\r
+\r
+/**\r
+ * A collection of utility methods for indexing and querying a Lucene repository\r
+ * index.\r
+ * \r
+ * @author James Moger\r
+ * \r
+ */\r
+public class LuceneUtils {\r
+\r
+       /**\r
+        * The types of objects that can be indexed and queried.\r
+        */\r
+       public static enum ObjectType {\r
+               commit, blob, issue;\r
+\r
+               static ObjectType fromName(String name) {\r
+                       for (ObjectType value : values()) {\r
+                               if (value.name().equals(name)) {\r
+                                       return value;\r
+                               }\r
+                       }\r
+                       return null;\r
+               }\r
+       }\r
+\r
+       private static final Version LUCENE_VERSION = Version.LUCENE_35;\r
+\r
+       private static final String FIELD_OBJECT_TYPE = "type";\r
+       private static final String FIELD_OBJECT_ID = "id";\r
+       private static final String FIELD_REPOSITORY = "repository";\r
+       private static final String FIELD_SUMMARY = "summary";\r
+       private static final String FIELD_CONTENT = "content";\r
+       private static final String FIELD_AUTHOR = "author";\r
+       private static final String FIELD_COMMITTER = "committer";\r
+       private static final String FIELD_DATE = "date";\r
+       private static final String FIELD_LABEL = "label";\r
+       private static final String FIELD_ATTACHMENT = "attachment";\r
+\r
+       private static Set<String> excludes = new TreeSet<String>(Arrays.asList("7z", "arc", "arj",\r
+                       "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh",\r
+                       "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));\r
+\r
+       private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();\r
+       private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();\r
+\r
+       /**\r
+        * Deletes the Lucene index for the specified repository.\r
+        * \r
+        * @param repository\r
+        * @return true, if successful\r
+        */\r
+       public static boolean deleteIndex(Repository repository) {\r
+               try {\r
+                       File luceneIndex = new File(repository.getDirectory(), "lucene");\r
+                       if (luceneIndex.exists()) {\r
+                               org.eclipse.jgit.util.FileUtils.delete(luceneIndex,\r
+                                               org.eclipse.jgit.util.FileUtils.RECURSIVE);\r
+                       }\r
+                       return true;\r
+               } catch (IOException e) {\r
+                       throw new RuntimeException(e);\r
+               }\r
+       }\r
+\r
+       /**\r
+        * This completely indexes the repository and will destroy any existing\r
+        * index.\r
+        * \r
+        * @param repository\r
+        * @return true if the indexing has succeeded\r
+        */\r
+       public static boolean index(Repository repository) {\r
+               try {\r
+                       Set<String> indexedCommits = new TreeSet<String>();\r
+                       IndexWriter writer = getIndexWriter(repository, true);\r
+                       // build a quick lookup of tags\r
+                       Map<String, List<String>> tags = new HashMap<String, List<String>>();\r
+                       for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {\r
+                               if (!tags.containsKey(tag.getObjectId())) {\r
+                                       tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());\r
+                               }\r
+                               tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);\r
+                       }\r
+\r
+                       // walk through each branch\r
+                       List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);\r
+                       for (RefModel branch : branches) {\r
+                               RevWalk revWalk = new RevWalk(repository);\r
+                               RevCommit rev = revWalk.parseCommit(branch.getObjectId());\r
+\r
+                               // index the blob contents of the tree\r
+                               ByteArrayOutputStream os = new ByteArrayOutputStream();\r
+                               byte[] tmp = new byte[32767];\r
+                               TreeWalk treeWalk = new TreeWalk(repository);\r
+                               treeWalk.addTree(rev.getTree());\r
+                               treeWalk.setRecursive(true);\r
+                               String revDate = DateTools.timeToString(rev.getCommitTime() * 1000L,\r
+                                               Resolution.MINUTE);\r
+                               while (treeWalk.next()) {\r
+                                       Document doc = new Document();\r
+                                       doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES,\r
+                                                       Index.NOT_ANALYZED_NO_NORMS));\r
+                                       doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES,\r
+                                                       Index.NOT_ANALYZED));\r
+                                       doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));\r
+                                       doc.add(new Field(FIELD_AUTHOR, rev.getAuthorIdent().getName(), Store.YES,\r
+                                                       Index.NOT_ANALYZED_NO_NORMS));\r
+                                       doc.add(new Field(FIELD_COMMITTER, rev.getCommitterIdent().getName(),\r
+                                                       Store.YES, Index.NOT_ANALYZED_NO_NORMS));\r
+                                       doc.add(new Field(FIELD_LABEL, branch.getName(), Store.YES, Index.ANALYZED));\r
+\r
+                                       // determine extension to compare to the extension\r
+                                       // blacklist\r
+                                       String ext = null;\r
+                                       String name = treeWalk.getPathString().toLowerCase();\r
+                                       if (name.indexOf('.') > -1) {\r
+                                               ext = name.substring(name.lastIndexOf('.') + 1);\r
+                                       }\r
+\r
+                                       if (StringUtils.isEmpty(ext) || !excludes.contains(ext)) {\r
+                                               // read the blob content\r
+                                               ObjectId entid = treeWalk.getObjectId(0);\r
+                                               FileMode entmode = treeWalk.getFileMode(0);\r
+                                               RevObject ro = revWalk.lookupAny(entid, entmode.getObjectType());\r
+                                               revWalk.parseBody(ro);\r
+                                               ObjectLoader ldr = repository.open(ro.getId(), Constants.OBJ_BLOB);\r
+                                               InputStream in = ldr.openStream();\r
+                                               os.reset();\r
+                                               int n = 0;\r
+                                               while ((n = in.read(tmp)) > 0) {\r
+                                                       os.write(tmp, 0, n);\r
+                                               }\r
+                                               in.close();\r
+                                               byte[] content = os.toByteArray();\r
+                                               String str = new String(content, "UTF-8");\r
+                                               doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));\r
+                                               writer.addDocument(doc);\r
+                                       }\r
+                               }\r
+\r
+                               os.close();\r
+                               treeWalk.release();\r
+\r
+                               // index the head commit object\r
+                               String head = rev.getId().getName();\r
+                               if (indexedCommits.add(head)) {\r
+                                       Document doc = createDocument(rev, tags.get(head));\r
+                                       writer.addDocument(doc);\r
+                               }\r
+\r
+                               // traverse the log and index the previous commit objects\r
+                               revWalk.markStart(rev);\r
+                               while ((rev = revWalk.next()) != null) {\r
+                                       String hash = rev.getId().getName();\r
+                                       if (indexedCommits.add(hash)) {\r
+                                               Document doc = createDocument(rev, tags.get(hash));\r
+                                               writer.addDocument(doc);\r
+                                       }\r
+                               }\r
+\r
+                               // finished\r
+                               revWalk.dispose();\r
+                       }\r
+\r
+                       // this repository has a gb-issues branch, index all issues\r
+                       if (IssueUtils.getIssuesBranch(repository) != null) {\r
+                               List<IssueModel> issues = IssueUtils.getIssues(repository, null);\r
+                               for (IssueModel issue : issues) {\r
+                                       Document doc = createDocument(issue);\r
+                                       writer.addDocument(doc);\r
+                               }\r
+                       }\r
+\r
+                       // commit all changes and reset the searcher\r
+                       resetIndexSearcher(repository);\r
+                       writer.commit();\r
+                       return true;\r
+               } catch (Exception e) {\r
+                       e.printStackTrace();\r
+               }\r
+               return false;\r
+       }\r
+\r
+       /**\r
+        * Incrementally update the index with the specified commit for the\r
+        * repository.\r
+        * \r
+        * @param repository\r
+        * @param commit\r
+        * @return true, if successful\r
+        */\r
+       public static boolean index(Repository repository, RevCommit commit) {\r
+               try {\r
+                       Document doc = createDocument(commit, null);\r
+                       return index(repository, doc);\r
+               } catch (Exception e) {\r
+                       e.printStackTrace();\r
+               }\r
+               return false;\r
+       }\r
+\r
+       /**\r
+        * Incrementally update the index with the specified issue for the\r
+        * repository.\r
+        * \r
+        * @param repository\r
+        * @param issue\r
+        * @param reindex\r
+        *            if true, the old index entry for this issue will be deleted.\r
+        *            This is only appropriate for pre-existing/indexed issues.\r
+        * @return true, if successful\r
+        */\r
+       public static boolean index(Repository repository, IssueModel issue, boolean reindex) {\r
+               try {\r
+                       Document doc = createDocument(issue);\r
+                       if (reindex) {\r
+                               // delete the old issue from the index, if exists\r
+                               IndexWriter writer = getIndexWriter(repository, false);\r
+                               writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()),\r
+                                               new Term(FIELD_OBJECT_ID, String.valueOf(issue.id)));\r
+                               writer.commit();\r
+                       }\r
+                       return index(repository, doc);\r
+               } catch (Exception e) {\r
+                       e.printStackTrace();\r
+               }\r
+               return false;\r
+       }\r
+\r
+       /**\r
+        * Creates a Lucene document from an issue.\r
+        * \r
+        * @param issue\r
+        * @return a Lucene document\r
+        */\r
+       private static Document createDocument(IssueModel issue) {\r
+               Document doc = new Document();\r
+               doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES,\r
+                               Field.Index.NOT_ANALYZED_NO_NORMS));\r
+               doc.add(new Field(FIELD_OBJECT_ID, issue.id, Store.YES, Index.NOT_ANALYZED));\r
+               doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),\r
+                               Store.YES, Field.Index.NO));\r
+               doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.NOT_ANALYZED_NO_NORMS));\r
+               List<String> attachments = new ArrayList<String>();\r
+               for (Attachment attachment : issue.getAttachments()) {\r
+                       attachments.add(attachment.name.toLowerCase());\r
+               }\r
+               doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES,\r
+                               Index.ANALYZED));\r
+               doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED));\r
+               doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.NO, Index.ANALYZED));\r
+               doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES,\r
+                               Index.ANALYZED));\r
+               return doc;\r
+       }\r
+\r
+       /**\r
+        * Creates a Lucene document for a commit\r
+        * \r
+        * @param commit\r
+        * @param tags\r
+        * @return a Lucene document\r
+        */\r
+       private static Document createDocument(RevCommit commit, List<String> tags) {\r
+               Document doc = new Document();\r
+               doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.commit.name(), Store.YES,\r
+                               Index.NOT_ANALYZED_NO_NORMS));\r
+               doc.add(new Field(FIELD_OBJECT_ID, commit.getName(), Store.YES, Index.NOT_ANALYZED));\r
+               doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,\r
+                               Resolution.MINUTE), Store.YES, Index.NO));\r
+               doc.add(new Field(FIELD_AUTHOR, commit.getCommitterIdent().getName(), Store.YES,\r
+                               Index.NOT_ANALYZED_NO_NORMS));\r
+               doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED));\r
+               doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.NO, Index.ANALYZED));\r
+               if (!ArrayUtils.isEmpty(tags)) {\r
+                       if (!ArrayUtils.isEmpty(tags)) {\r
+                               doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(tags), Store.YES,\r
+                                               Index.ANALYZED));\r
+                       }\r
+               }\r
+               return doc;\r
+       }\r
+\r
+       /**\r
+        * Incrementally index an object for the repository.\r
+        * \r
+        * @param repository\r
+        * @param doc\r
+        * @return true, if successful\r
+        */\r
+       private static boolean index(Repository repository, Document doc) {\r
+               try {\r
+                       IndexWriter writer = getIndexWriter(repository, false);\r
+                       writer.addDocument(doc);\r
+                       resetIndexSearcher(repository);\r
+                       writer.commit();\r
+                       return true;\r
+               } catch (Exception e) {\r
+                       e.printStackTrace();\r
+               }\r
+               return false;\r
+       }\r
+\r
+       private static SearchResult createSearchResult(Document doc, float score) throws ParseException {\r
+               SearchResult result = new SearchResult();\r
+               result.score = score;\r
+               result.date = DateTools.stringToDate(doc.get(FIELD_DATE));\r
+               result.summary = doc.get(FIELD_SUMMARY);\r
+               result.author = doc.get(FIELD_AUTHOR);\r
+               result.committer = doc.get(FIELD_COMMITTER);\r
+               result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));\r
+               result.id = doc.get(FIELD_OBJECT_ID);\r
+               if (doc.get(FIELD_LABEL) != null) {\r
+                       result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));\r
+               }\r
+               return result;\r
+       }\r
+\r
+       private static void resetIndexSearcher(Repository repository) throws IOException {\r
+               IndexSearcher searcher = SEARCHERS.get(repository.getDirectory());\r
+               if (searcher != null) {\r
+                       SEARCHERS.remove(repository.getDirectory());\r
+                       searcher.close();\r
+               }\r
+       }\r
+\r
+       /**\r
+        * Gets an index searcher for the repository.\r
+        * \r
+        * @param repository\r
+        * @return\r
+        * @throws IOException\r
+        */\r
+       private static IndexSearcher getIndexSearcher(Repository repository) throws IOException {\r
+               IndexSearcher searcher = SEARCHERS.get(repository.getDirectory());\r
+               if (searcher == null) {\r
+                       IndexWriter writer = getIndexWriter(repository, false);\r
+                       searcher = new IndexSearcher(IndexReader.open(writer, true));\r
+                       SEARCHERS.put(repository.getDirectory(), searcher);\r
+               }\r
+               return searcher;\r
+       }\r
+\r
+       /**\r
+        * Gets an index writer for the repository. The index will be created if it\r
+        * does not already exist or if forceCreate is specified.\r
+        * \r
+        * @param repository\r
+        * @param forceCreate\r
+        * @return an IndexWriter\r
+        * @throws IOException\r
+        */\r
+       private static IndexWriter getIndexWriter(Repository repository, boolean forceCreate)\r
+                       throws IOException {\r
+               IndexWriter indexWriter = WRITERS.get(repository.getDirectory());\r
+               File indexFolder = new File(repository.getDirectory(), "lucene");\r
+               Directory directory = FSDirectory.open(indexFolder);\r
+               if (forceCreate || !indexFolder.exists()) {\r
+                       // if the writer is going to blow away the existing index and create\r
+                       // a new one then it should not be cached. instead, close any open\r
+                       // writer, create a new one, and return.\r
+                       if (indexWriter != null) {\r
+                               indexWriter.close();\r
+                               indexWriter = null;\r
+                               WRITERS.remove(repository.getDirectory());\r
+                       }\r
+                       indexFolder.mkdirs();\r
+                       IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer(\r
+                                       LUCENE_VERSION));\r
+                       config.setOpenMode(OpenMode.CREATE);\r
+                       IndexWriter writer = new IndexWriter(directory, config);\r
+                       writer.close();\r
+               }\r
+\r
+               if (indexWriter == null) {\r
+                       IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer(\r
+                                       LUCENE_VERSION));\r
+                       config.setOpenMode(OpenMode.APPEND);\r
+                       indexWriter = new IndexWriter(directory, config);\r
+                       WRITERS.put(repository.getDirectory(), indexWriter);\r
+               }\r
+               return indexWriter;\r
+       }\r
+\r
+       /**\r
+        * Search the repository for the given text or query\r
+        * \r
+        * @param repository\r
+        * @param text\r
+        * @return a list of SearchResults\r
+        */\r
+       public static List<SearchResult> search(Repository repository, String text) {\r
+               if (StringUtils.isEmpty(text)) {\r
+                       return null;\r
+               }\r
+               Set<SearchResult> results = new HashSet<SearchResult>();\r
+               StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);\r
+               try {\r
+                       // default search checks summary and content\r
+                       BooleanQuery query = new BooleanQuery();\r
+                       QueryParser qp;\r
+                       qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);\r
+                       qp.setAllowLeadingWildcard(true);\r
+                       query.add(qp.parse(text), Occur.SHOULD);\r
+\r
+                       qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);\r
+                       qp.setAllowLeadingWildcard(true);\r
+                       query.add(qp.parse(text), Occur.SHOULD);\r
+\r
+                       IndexSearcher searcher = getIndexSearcher(repository);\r
+                       Query rewrittenQuery = searcher.rewrite(query);\r
+\r
+                       TopScoreDocCollector collector = TopScoreDocCollector.create(200, true);\r
+                       searcher.search(rewrittenQuery, collector);\r
+                       ScoreDoc[] hits = collector.topDocs().scoreDocs;\r
+                       for (int i = 0; i < hits.length; i++) {\r
+                               int docId = hits[i].doc;\r
+                               Document doc = searcher.doc(docId);\r
+                               SearchResult result = createSearchResult(doc, hits[i].score);\r
+                               results.add(result);\r
+                       }\r
+               } catch (Exception e) {\r
+                       e.printStackTrace();\r
+               }\r
+               return new ArrayList<SearchResult>(results);\r
+       }\r
+\r
+       /**\r
+        * Close all the index writers and searchers\r
+        */\r
+       public static void close() {\r
+               // close writers\r
+               for (File file : WRITERS.keySet()) {\r
+                       try {\r
+                               WRITERS.get(file).close(true);\r
+                       } catch (Throwable t) {\r
+                               t.printStackTrace();\r
+                       }\r
+               }\r
+               WRITERS.clear();\r
+\r
+               // close searchers\r
+               for (File file : SEARCHERS.keySet()) {\r
+                       try {\r
+                               SEARCHERS.get(file).close();\r
+                       } catch (Throwable t) {\r
+                               t.printStackTrace();\r
+                       }\r
+               }\r
+               SEARCHERS.clear();\r
+       }\r
+}\r
index 9e5caf0b238874db6a2813ed4d9998e453eaf5fb..8fac212cd6a4dc853fed885fab872936b837a4d2 100644 (file)
@@ -52,7 +52,7 @@ import com.gitblit.utils.JGitUtils;
                ObjectCacheTest.class, UserServiceTest.class, MarkdownUtilsTest.class, JGitUtilsTest.class,\r
                SyndicationUtilsTest.class, DiffUtilsTest.class, MetricUtilsTest.class,\r
                TicgitUtilsTest.class, GitBlitTest.class, FederationTests.class, RpcTests.class,\r
-               GitServletTest.class, GroovyScriptTest.class, IssuesTest.class })\r
+               GitServletTest.class, GroovyScriptTest.class, LuceneUtilsTest.class, IssuesTest.class })\r
 public class GitBlitSuite {\r
 \r
        public static final File REPOSITORIES = new File("git");\r
index 26b599567994e77753d9accc98754089d61820e0..c475c466188e41e91bd2f9c7a4c44280ee07acc7 100644 (file)
@@ -32,8 +32,10 @@ import com.gitblit.models.IssueModel.Change;
 import com.gitblit.models.IssueModel.Field;\r
 import com.gitblit.models.IssueModel.Priority;\r
 import com.gitblit.models.IssueModel.Status;\r
+import com.gitblit.models.SearchResult;\r
 import com.gitblit.utils.IssueUtils;\r
 import com.gitblit.utils.IssueUtils.IssueFilter;\r
+import com.gitblit.utils.LuceneUtils;\r
 \r
 /**\r
  * Tests the mechanics of distributed issue management on the gb-issues branch.\r
@@ -135,6 +137,40 @@ public class IssuesTest {
                assertEquals(1, closedIssues.size());\r
        }\r
 \r
+       @Test\r
+       public void testLuceneIndexAndQuery() throws Exception {                \r
+               Repository repository = GitBlitSuite.getIssuesTestRepository();\r
+               LuceneUtils.deleteIndex(repository);\r
+               List<IssueModel> allIssues = IssueUtils.getIssues(repository, null);\r
+               assertTrue(allIssues.size() > 0);\r
+               for (IssueModel issue : allIssues) {\r
+                       LuceneUtils.index(repository, issue, false);\r
+               }\r
+               List<SearchResult> hits = LuceneUtils.search(repository, "working");\r
+               assertTrue(hits.size() > 0);\r
+               \r
+               // reindex an issue\r
+               IssueModel issue = allIssues.get(0);\r
+               Change change = new Change("reindex");\r
+               change.comment("this is a test of reindexing an issue");\r
+               IssueUtils.updateIssue(repository, issue.id, change);\r
+               issue = IssueUtils.getIssue(repository, issue.id);\r
+               LuceneUtils.index(repository, issue, true);\r
+               \r
+               LuceneUtils.close();\r
+               repository.close();\r
+       }\r
+       \r
+       @Test\r
+       public void testLuceneQuery() throws Exception {\r
+               Repository repository = GitBlitSuite.getIssuesTestRepository();\r
+               List<SearchResult> hits = LuceneUtils.search(repository, "working");\r
+               LuceneUtils.close();\r
+               repository.close();\r
+               assertTrue(hits.size() > 0);\r
+       }\r
+\r
+\r
        @Test\r
        public void testDelete() throws Exception {\r
                Repository repository = GitBlitSuite.getIssuesTestRepository();\r
diff --git a/tests/com/gitblit/tests/LuceneUtilsTest.java b/tests/com/gitblit/tests/LuceneUtilsTest.java
new file mode 100644 (file)
index 0000000..648c812
--- /dev/null
@@ -0,0 +1,109 @@
+/*\r
+ * Copyright 2012 gitblit.com.\r
+ *\r
+ * Licensed under the Apache License, Version 2.0 (the "License");\r
+ * you may not use this file except in compliance with the License.\r
+ * You may obtain a copy of the License at\r
+ *\r
+ *     http://www.apache.org/licenses/LICENSE-2.0\r
+ *\r
+ * Unless required by applicable law or agreed to in writing, software\r
+ * distributed under the License is distributed on an "AS IS" BASIS,\r
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ * See the License for the specific language governing permissions and\r
+ * limitations under the License.\r
+ */\r
+package com.gitblit.tests;\r
+\r
+import static org.junit.Assert.assertEquals;\r
+\r
+import java.util.List;\r
+\r
+import org.eclipse.jgit.lib.Repository;\r
+import org.junit.Test;\r
+\r
+import com.gitblit.models.SearchResult;\r
+import com.gitblit.utils.LuceneUtils;\r
+\r
+/**\r
+ * Tests Lucene indexing and querying.\r
+ * \r
+ * @author James Moger\r
+ * \r
+ */\r
+public class LuceneUtilsTest {\r
+\r
+       @Test\r
+       public void testFullIndex() throws Exception {\r
+               // reindex helloworld\r
+               Repository repository = GitBlitSuite.getHelloworldRepository();\r
+               LuceneUtils.index(repository);\r
+               repository.close();\r
+\r
+               // reindex theoretical physics\r
+               repository = GitBlitSuite.getTheoreticalPhysicsRepository();\r
+               LuceneUtils.index(repository);\r
+               repository.close();\r
+\r
+               // reindex bluez-gnome\r
+               repository = GitBlitSuite.getBluezGnomeRepository();\r
+               LuceneUtils.index(repository);\r
+               repository.close();\r
+\r
+               LuceneUtils.close();\r
+       }\r
+\r
+       @Test\r
+       public void testQuery() throws Exception {\r
+               // 2 occurrences on the master branch\r
+               Repository repository = GitBlitSuite.getHelloworldRepository();\r
+               List<SearchResult> results = LuceneUtils.search(repository, "ada");\r
+               assertEquals(2, results.size());\r
+\r
+               // author test\r
+               results = LuceneUtils.search(repository, "author: tinogomes");\r
+               assertEquals(2, results.size());\r
+\r
+               repository.close();\r
+               // blob test\r
+               results = LuceneUtils.search(repository, "type: blob AND \"import std.stdio\"");\r
+               assertEquals(1, results.size());\r
+               assertEquals("d.D", results.get(0).id);\r
+               \r
+               // 1 occurrence on the gh-pages branch\r
+               repository = GitBlitSuite.getTheoreticalPhysicsRepository();\r
+               results = LuceneUtils.search(repository, "\"add the .nojekyll file\"");\r
+               assertEquals(1, results.size());\r
+               assertEquals("Ondrej Certik", results.get(0).author);\r
+               assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).id);\r
+               \r
+               // tag test\r
+               results = LuceneUtils.search(repository, "\"qft split\"");\r
+               assertEquals(1, results.size());\r
+               assertEquals("Ondrej Certik", results.get(0).author);\r
+               assertEquals("57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", results.get(0).id);\r
+               assertEquals("[1st-edition]", results.get(0).labels.toString());\r
+\r
+               results = LuceneUtils.search(repository, "type:blob AND \"src/intro.rst\"");\r
+               assertEquals(4, results.size());\r
+               \r
+               // hash id tests\r
+               results = LuceneUtils.search(repository, "id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5");\r
+               assertEquals(1, results.size());\r
+\r
+               results = LuceneUtils.search(repository, "id:57c4f26f157*");\r
+               assertEquals(1, results.size());\r
+\r
+               repository.close();\r
+               \r
+               // annotated tag test\r
+               repository = GitBlitSuite.getBluezGnomeRepository();\r
+               results = LuceneUtils.search(repository, "\"release 1.8\"");\r
+               assertEquals(1, results.size());\r
+               assertEquals("[1.8]", results.get(0).labels.toString());\r
+\r
+               repository.close();\r
+               \r
+               LuceneUtils.close();\r
+       }\r
+}
\ No newline at end of file