From 06ff61c04db32abc7ba4bbaeee604482c343152e Mon Sep 17 00:00:00 2001 From: James Moger Date: Tue, 24 Jan 2012 22:49:53 -0500 Subject: [PATCH] Fairly functional Lucene backend for commits, blobs, and issues --- src/com/gitblit/build/Build.java | 6 + src/com/gitblit/models/IssueModel.java | 10 + src/com/gitblit/models/SearchResult.java | 42 ++ src/com/gitblit/utils/IssueUtils.java | 2 +- src/com/gitblit/utils/LuceneUtils.java | 506 +++++++++++++++++++ tests/com/gitblit/tests/GitBlitSuite.java | 2 +- tests/com/gitblit/tests/IssuesTest.java | 36 ++ tests/com/gitblit/tests/LuceneUtilsTest.java | 109 ++++ 8 files changed, 711 insertions(+), 2 deletions(-) create mode 100644 src/com/gitblit/models/SearchResult.java create mode 100644 src/com/gitblit/utils/LuceneUtils.java create mode 100644 tests/com/gitblit/tests/LuceneUtilsTest.java diff --git a/src/com/gitblit/build/Build.java b/src/com/gitblit/build/Build.java index 682adcad..b539cac6 100644 --- a/src/com/gitblit/build/Build.java +++ b/src/com/gitblit/build/Build.java @@ -90,6 +90,7 @@ public class Build { downloadFromApache(MavenObject.GSON, BuildType.RUNTIME); downloadFromApache(MavenObject.MAIL, BuildType.RUNTIME); downloadFromApache(MavenObject.GROOVY, BuildType.RUNTIME); + downloadFromApache(MavenObject.LUCENE, BuildType.RUNTIME); downloadFromEclipse(MavenObject.JGIT, BuildType.RUNTIME); downloadFromEclipse(MavenObject.JGIT_HTTP, BuildType.RUNTIME); @@ -116,6 +117,7 @@ public class Build { downloadFromApache(MavenObject.GSON, BuildType.COMPILETIME); downloadFromApache(MavenObject.MAIL, BuildType.COMPILETIME); downloadFromApache(MavenObject.GROOVY, BuildType.COMPILETIME); + downloadFromApache(MavenObject.LUCENE, BuildType.COMPILETIME); downloadFromEclipse(MavenObject.JGIT, BuildType.COMPILETIME); downloadFromEclipse(MavenObject.JGIT_HTTP, BuildType.COMPILETIME); @@ -501,6 +503,10 @@ public class Build { "1.8.5", 6143000, 2290000, 4608000, "3be3914c49ca7d8e8afb29a7772a74c30a1f1b28", "1435cc8c90e3a91e5fee7bb53e83aad96e93aeb7", "5a214b52286523f9e2a4b5fed526506c763fa6f1"); + public static final MavenObject LUCENE = new MavenObject("lucene", "org/apache/lucene", "lucene-core", + "3.5.0", 1470000, 1347000, 3608000, "90ff0731fafb05c01fee4f2247140d56e9c30a3b", + "0757113199f9c8c18c678c96d61c2c4160b9baa6", "19f8e80e5e7f6ec88a41d4f63495994692e31bf1"); + public final String name; public final String group; public final String artifact; diff --git a/src/com/gitblit/models/IssueModel.java b/src/com/gitblit/models/IssueModel.java index 19241c29..246b29bf 100644 --- a/src/com/gitblit/models/IssueModel.java +++ b/src/com/gitblit/models/IssueModel.java @@ -106,6 +106,16 @@ public class IssueModel implements Serializable, Comparable { return attachment; } + public List getAttachments() { + List list = new ArrayList(); + for (Change change : changes) { + if (change.hasAttachments()) { + list.addAll(change.attachments); + } + } + return list; + } + public void applyChange(Change change) { changes.add(change); diff --git a/src/com/gitblit/models/SearchResult.java b/src/com/gitblit/models/SearchResult.java new file mode 100644 index 00000000..44207f08 --- /dev/null +++ b/src/com/gitblit/models/SearchResult.java @@ -0,0 +1,42 @@ +package com.gitblit.models; + +import java.io.Serializable; +import java.util.Date; +import java.util.List; + +import com.gitblit.utils.LuceneUtils.ObjectType; + +/** + * Model class that represents a search result. + * + * @author James Moger + * + */ +public class SearchResult implements Serializable { + + private static final long serialVersionUID = 1L; + + public float score; + + public Date date; + + public String author; + + public String committer; + + public String summary; + + public String id; + + public List labels; + + public ObjectType type; + + public SearchResult() { + } + + @Override + public String toString() { + return type.name() + ": " + id; + } +} \ No newline at end of file diff --git a/src/com/gitblit/utils/IssueUtils.java b/src/com/gitblit/utils/IssueUtils.java index d0a01992..eb3b347b 100644 --- a/src/com/gitblit/utils/IssueUtils.java +++ b/src/com/gitblit/utils/IssueUtils.java @@ -740,7 +740,7 @@ public class IssueUtils { * @param issueId * @return the root path of the issue content on the gb-issues branch */ - private static String getIssuePath(String issueId) { + static String getIssuePath(String issueId) { return issueId.substring(0, 2) + "/" + issueId.substring(2); } diff --git a/src/com/gitblit/utils/LuceneUtils.java b/src/com/gitblit/utils/LuceneUtils.java new file mode 100644 index 00000000..4ca72f0a --- /dev/null +++ b/src/com/gitblit/utils/LuceneUtils.java @@ -0,0 +1,506 @@ +package com.gitblit.utils; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.DateTools.Resolution; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.Version; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevObject; +import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.treewalk.TreeWalk; + +import com.gitblit.models.IssueModel; +import com.gitblit.models.IssueModel.Attachment; +import com.gitblit.models.RefModel; +import com.gitblit.models.SearchResult; + +/** + * A collection of utility methods for indexing and querying a Lucene repository + * index. + * + * @author James Moger + * + */ +public class LuceneUtils { + + /** + * The types of objects that can be indexed and queried. + */ + public static enum ObjectType { + commit, blob, issue; + + static ObjectType fromName(String name) { + for (ObjectType value : values()) { + if (value.name().equals(name)) { + return value; + } + } + return null; + } + } + + private static final Version LUCENE_VERSION = Version.LUCENE_35; + + private static final String FIELD_OBJECT_TYPE = "type"; + private static final String FIELD_OBJECT_ID = "id"; + private static final String FIELD_REPOSITORY = "repository"; + private static final String FIELD_SUMMARY = "summary"; + private static final String FIELD_CONTENT = "content"; + private static final String FIELD_AUTHOR = "author"; + private static final String FIELD_COMMITTER = "committer"; + private static final String FIELD_DATE = "date"; + private static final String FIELD_LABEL = "label"; + private static final String FIELD_ATTACHMENT = "attachment"; + + private static Set excludes = new TreeSet(Arrays.asList("7z", "arc", "arj", + "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", + "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip")); + + private static final Map SEARCHERS = new ConcurrentHashMap(); + private static final Map WRITERS = new ConcurrentHashMap(); + + /** + * Deletes the Lucene index for the specified repository. + * + * @param repository + * @return true, if successful + */ + public static boolean deleteIndex(Repository repository) { + try { + File luceneIndex = new File(repository.getDirectory(), "lucene"); + if (luceneIndex.exists()) { + org.eclipse.jgit.util.FileUtils.delete(luceneIndex, + org.eclipse.jgit.util.FileUtils.RECURSIVE); + } + return true; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * This completely indexes the repository and will destroy any existing + * index. + * + * @param repository + * @return true if the indexing has succeeded + */ + public static boolean index(Repository repository) { + try { + Set indexedCommits = new TreeSet(); + IndexWriter writer = getIndexWriter(repository, true); + // build a quick lookup of tags + Map> tags = new HashMap>(); + for (RefModel tag : JGitUtils.getTags(repository, false, -1)) { + if (!tags.containsKey(tag.getObjectId())) { + tags.put(tag.getReferencedObjectId().getName(), new ArrayList()); + } + tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName); + } + + // walk through each branch + List branches = JGitUtils.getLocalBranches(repository, true, -1); + for (RefModel branch : branches) { + RevWalk revWalk = new RevWalk(repository); + RevCommit rev = revWalk.parseCommit(branch.getObjectId()); + + // index the blob contents of the tree + ByteArrayOutputStream os = new ByteArrayOutputStream(); + byte[] tmp = new byte[32767]; + TreeWalk treeWalk = new TreeWalk(repository); + treeWalk.addTree(rev.getTree()); + treeWalk.setRecursive(true); + String revDate = DateTools.timeToString(rev.getCommitTime() * 1000L, + Resolution.MINUTE); + while (treeWalk.next()) { + Document doc = new Document(); + doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, + Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES, + Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO)); + doc.add(new Field(FIELD_AUTHOR, rev.getAuthorIdent().getName(), Store.YES, + Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_COMMITTER, rev.getCommitterIdent().getName(), + Store.YES, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_LABEL, branch.getName(), Store.YES, Index.ANALYZED)); + + // determine extension to compare to the extension + // blacklist + String ext = null; + String name = treeWalk.getPathString().toLowerCase(); + if (name.indexOf('.') > -1) { + ext = name.substring(name.lastIndexOf('.') + 1); + } + + if (StringUtils.isEmpty(ext) || !excludes.contains(ext)) { + // read the blob content + ObjectId entid = treeWalk.getObjectId(0); + FileMode entmode = treeWalk.getFileMode(0); + RevObject ro = revWalk.lookupAny(entid, entmode.getObjectType()); + revWalk.parseBody(ro); + ObjectLoader ldr = repository.open(ro.getId(), Constants.OBJ_BLOB); + InputStream in = ldr.openStream(); + os.reset(); + int n = 0; + while ((n = in.read(tmp)) > 0) { + os.write(tmp, 0, n); + } + in.close(); + byte[] content = os.toByteArray(); + String str = new String(content, "UTF-8"); + doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED)); + writer.addDocument(doc); + } + } + + os.close(); + treeWalk.release(); + + // index the head commit object + String head = rev.getId().getName(); + if (indexedCommits.add(head)) { + Document doc = createDocument(rev, tags.get(head)); + writer.addDocument(doc); + } + + // traverse the log and index the previous commit objects + revWalk.markStart(rev); + while ((rev = revWalk.next()) != null) { + String hash = rev.getId().getName(); + if (indexedCommits.add(hash)) { + Document doc = createDocument(rev, tags.get(hash)); + writer.addDocument(doc); + } + } + + // finished + revWalk.dispose(); + } + + // this repository has a gb-issues branch, index all issues + if (IssueUtils.getIssuesBranch(repository) != null) { + List issues = IssueUtils.getIssues(repository, null); + for (IssueModel issue : issues) { + Document doc = createDocument(issue); + writer.addDocument(doc); + } + } + + // commit all changes and reset the searcher + resetIndexSearcher(repository); + writer.commit(); + return true; + } catch (Exception e) { + e.printStackTrace(); + } + return false; + } + + /** + * Incrementally update the index with the specified commit for the + * repository. + * + * @param repository + * @param commit + * @return true, if successful + */ + public static boolean index(Repository repository, RevCommit commit) { + try { + Document doc = createDocument(commit, null); + return index(repository, doc); + } catch (Exception e) { + e.printStackTrace(); + } + return false; + } + + /** + * Incrementally update the index with the specified issue for the + * repository. + * + * @param repository + * @param issue + * @param reindex + * if true, the old index entry for this issue will be deleted. + * This is only appropriate for pre-existing/indexed issues. + * @return true, if successful + */ + public static boolean index(Repository repository, IssueModel issue, boolean reindex) { + try { + Document doc = createDocument(issue); + if (reindex) { + // delete the old issue from the index, if exists + IndexWriter writer = getIndexWriter(repository, false); + writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), + new Term(FIELD_OBJECT_ID, String.valueOf(issue.id))); + writer.commit(); + } + return index(repository, doc); + } catch (Exception e) { + e.printStackTrace(); + } + return false; + } + + /** + * Creates a Lucene document from an issue. + * + * @param issue + * @return a Lucene document + */ + private static Document createDocument(IssueModel issue) { + Document doc = new Document(); + doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES, + Field.Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_OBJECT_ID, issue.id, Store.YES, Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE), + Store.YES, Field.Index.NO)); + doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.NOT_ANALYZED_NO_NORMS)); + List attachments = new ArrayList(); + for (Attachment attachment : issue.getAttachments()) { + attachments.add(attachment.name.toLowerCase()); + } + doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES, + Index.ANALYZED)); + doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.NO, Index.ANALYZED)); + doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES, + Index.ANALYZED)); + return doc; + } + + /** + * Creates a Lucene document for a commit + * + * @param commit + * @param tags + * @return a Lucene document + */ + private static Document createDocument(RevCommit commit, List tags) { + Document doc = new Document(); + doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.commit.name(), Store.YES, + Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_OBJECT_ID, commit.getName(), Store.YES, Index.NOT_ANALYZED)); + doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L, + Resolution.MINUTE), Store.YES, Index.NO)); + doc.add(new Field(FIELD_AUTHOR, commit.getCommitterIdent().getName(), Store.YES, + Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED)); + doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.NO, Index.ANALYZED)); + if (!ArrayUtils.isEmpty(tags)) { + if (!ArrayUtils.isEmpty(tags)) { + doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(tags), Store.YES, + Index.ANALYZED)); + } + } + return doc; + } + + /** + * Incrementally index an object for the repository. + * + * @param repository + * @param doc + * @return true, if successful + */ + private static boolean index(Repository repository, Document doc) { + try { + IndexWriter writer = getIndexWriter(repository, false); + writer.addDocument(doc); + resetIndexSearcher(repository); + writer.commit(); + return true; + } catch (Exception e) { + e.printStackTrace(); + } + return false; + } + + private static SearchResult createSearchResult(Document doc, float score) throws ParseException { + SearchResult result = new SearchResult(); + result.score = score; + result.date = DateTools.stringToDate(doc.get(FIELD_DATE)); + result.summary = doc.get(FIELD_SUMMARY); + result.author = doc.get(FIELD_AUTHOR); + result.committer = doc.get(FIELD_COMMITTER); + result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE)); + result.id = doc.get(FIELD_OBJECT_ID); + if (doc.get(FIELD_LABEL) != null) { + result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL)); + } + return result; + } + + private static void resetIndexSearcher(Repository repository) throws IOException { + IndexSearcher searcher = SEARCHERS.get(repository.getDirectory()); + if (searcher != null) { + SEARCHERS.remove(repository.getDirectory()); + searcher.close(); + } + } + + /** + * Gets an index searcher for the repository. + * + * @param repository + * @return + * @throws IOException + */ + private static IndexSearcher getIndexSearcher(Repository repository) throws IOException { + IndexSearcher searcher = SEARCHERS.get(repository.getDirectory()); + if (searcher == null) { + IndexWriter writer = getIndexWriter(repository, false); + searcher = new IndexSearcher(IndexReader.open(writer, true)); + SEARCHERS.put(repository.getDirectory(), searcher); + } + return searcher; + } + + /** + * Gets an index writer for the repository. The index will be created if it + * does not already exist or if forceCreate is specified. + * + * @param repository + * @param forceCreate + * @return an IndexWriter + * @throws IOException + */ + private static IndexWriter getIndexWriter(Repository repository, boolean forceCreate) + throws IOException { + IndexWriter indexWriter = WRITERS.get(repository.getDirectory()); + File indexFolder = new File(repository.getDirectory(), "lucene"); + Directory directory = FSDirectory.open(indexFolder); + if (forceCreate || !indexFolder.exists()) { + // if the writer is going to blow away the existing index and create + // a new one then it should not be cached. instead, close any open + // writer, create a new one, and return. + if (indexWriter != null) { + indexWriter.close(); + indexWriter = null; + WRITERS.remove(repository.getDirectory()); + } + indexFolder.mkdirs(); + IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer( + LUCENE_VERSION)); + config.setOpenMode(OpenMode.CREATE); + IndexWriter writer = new IndexWriter(directory, config); + writer.close(); + } + + if (indexWriter == null) { + IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer( + LUCENE_VERSION)); + config.setOpenMode(OpenMode.APPEND); + indexWriter = new IndexWriter(directory, config); + WRITERS.put(repository.getDirectory(), indexWriter); + } + return indexWriter; + } + + /** + * Search the repository for the given text or query + * + * @param repository + * @param text + * @return a list of SearchResults + */ + public static List search(Repository repository, String text) { + if (StringUtils.isEmpty(text)) { + return null; + } + Set results = new HashSet(); + StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); + try { + // default search checks summary and content + BooleanQuery query = new BooleanQuery(); + QueryParser qp; + qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer); + qp.setAllowLeadingWildcard(true); + query.add(qp.parse(text), Occur.SHOULD); + + qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer); + qp.setAllowLeadingWildcard(true); + query.add(qp.parse(text), Occur.SHOULD); + + IndexSearcher searcher = getIndexSearcher(repository); + Query rewrittenQuery = searcher.rewrite(query); + + TopScoreDocCollector collector = TopScoreDocCollector.create(200, true); + searcher.search(rewrittenQuery, collector); + ScoreDoc[] hits = collector.topDocs().scoreDocs; + for (int i = 0; i < hits.length; i++) { + int docId = hits[i].doc; + Document doc = searcher.doc(docId); + SearchResult result = createSearchResult(doc, hits[i].score); + results.add(result); + } + } catch (Exception e) { + e.printStackTrace(); + } + return new ArrayList(results); + } + + /** + * Close all the index writers and searchers + */ + public static void close() { + // close writers + for (File file : WRITERS.keySet()) { + try { + WRITERS.get(file).close(true); + } catch (Throwable t) { + t.printStackTrace(); + } + } + WRITERS.clear(); + + // close searchers + for (File file : SEARCHERS.keySet()) { + try { + SEARCHERS.get(file).close(); + } catch (Throwable t) { + t.printStackTrace(); + } + } + SEARCHERS.clear(); + } +} diff --git a/tests/com/gitblit/tests/GitBlitSuite.java b/tests/com/gitblit/tests/GitBlitSuite.java index 9e5caf0b..8fac212c 100644 --- a/tests/com/gitblit/tests/GitBlitSuite.java +++ b/tests/com/gitblit/tests/GitBlitSuite.java @@ -52,7 +52,7 @@ import com.gitblit.utils.JGitUtils; ObjectCacheTest.class, UserServiceTest.class, MarkdownUtilsTest.class, JGitUtilsTest.class, SyndicationUtilsTest.class, DiffUtilsTest.class, MetricUtilsTest.class, TicgitUtilsTest.class, GitBlitTest.class, FederationTests.class, RpcTests.class, - GitServletTest.class, GroovyScriptTest.class, IssuesTest.class }) + GitServletTest.class, GroovyScriptTest.class, LuceneUtilsTest.class, IssuesTest.class }) public class GitBlitSuite { public static final File REPOSITORIES = new File("git"); diff --git a/tests/com/gitblit/tests/IssuesTest.java b/tests/com/gitblit/tests/IssuesTest.java index 26b59956..c475c466 100644 --- a/tests/com/gitblit/tests/IssuesTest.java +++ b/tests/com/gitblit/tests/IssuesTest.java @@ -32,8 +32,10 @@ import com.gitblit.models.IssueModel.Change; import com.gitblit.models.IssueModel.Field; import com.gitblit.models.IssueModel.Priority; import com.gitblit.models.IssueModel.Status; +import com.gitblit.models.SearchResult; import com.gitblit.utils.IssueUtils; import com.gitblit.utils.IssueUtils.IssueFilter; +import com.gitblit.utils.LuceneUtils; /** * Tests the mechanics of distributed issue management on the gb-issues branch. @@ -135,6 +137,40 @@ public class IssuesTest { assertEquals(1, closedIssues.size()); } + @Test + public void testLuceneIndexAndQuery() throws Exception { + Repository repository = GitBlitSuite.getIssuesTestRepository(); + LuceneUtils.deleteIndex(repository); + List allIssues = IssueUtils.getIssues(repository, null); + assertTrue(allIssues.size() > 0); + for (IssueModel issue : allIssues) { + LuceneUtils.index(repository, issue, false); + } + List hits = LuceneUtils.search(repository, "working"); + assertTrue(hits.size() > 0); + + // reindex an issue + IssueModel issue = allIssues.get(0); + Change change = new Change("reindex"); + change.comment("this is a test of reindexing an issue"); + IssueUtils.updateIssue(repository, issue.id, change); + issue = IssueUtils.getIssue(repository, issue.id); + LuceneUtils.index(repository, issue, true); + + LuceneUtils.close(); + repository.close(); + } + + @Test + public void testLuceneQuery() throws Exception { + Repository repository = GitBlitSuite.getIssuesTestRepository(); + List hits = LuceneUtils.search(repository, "working"); + LuceneUtils.close(); + repository.close(); + assertTrue(hits.size() > 0); + } + + @Test public void testDelete() throws Exception { Repository repository = GitBlitSuite.getIssuesTestRepository(); diff --git a/tests/com/gitblit/tests/LuceneUtilsTest.java b/tests/com/gitblit/tests/LuceneUtilsTest.java new file mode 100644 index 00000000..648c8128 --- /dev/null +++ b/tests/com/gitblit/tests/LuceneUtilsTest.java @@ -0,0 +1,109 @@ +/* + * Copyright 2012 gitblit.com. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.gitblit.tests; + +import static org.junit.Assert.assertEquals; + +import java.util.List; + +import org.eclipse.jgit.lib.Repository; +import org.junit.Test; + +import com.gitblit.models.SearchResult; +import com.gitblit.utils.LuceneUtils; + +/** + * Tests Lucene indexing and querying. + * + * @author James Moger + * + */ +public class LuceneUtilsTest { + + @Test + public void testFullIndex() throws Exception { + // reindex helloworld + Repository repository = GitBlitSuite.getHelloworldRepository(); + LuceneUtils.index(repository); + repository.close(); + + // reindex theoretical physics + repository = GitBlitSuite.getTheoreticalPhysicsRepository(); + LuceneUtils.index(repository); + repository.close(); + + // reindex bluez-gnome + repository = GitBlitSuite.getBluezGnomeRepository(); + LuceneUtils.index(repository); + repository.close(); + + LuceneUtils.close(); + } + + @Test + public void testQuery() throws Exception { + // 2 occurrences on the master branch + Repository repository = GitBlitSuite.getHelloworldRepository(); + List results = LuceneUtils.search(repository, "ada"); + assertEquals(2, results.size()); + + // author test + results = LuceneUtils.search(repository, "author: tinogomes"); + assertEquals(2, results.size()); + + repository.close(); + // blob test + results = LuceneUtils.search(repository, "type: blob AND \"import std.stdio\""); + assertEquals(1, results.size()); + assertEquals("d.D", results.get(0).id); + + // 1 occurrence on the gh-pages branch + repository = GitBlitSuite.getTheoreticalPhysicsRepository(); + results = LuceneUtils.search(repository, "\"add the .nojekyll file\""); + assertEquals(1, results.size()); + assertEquals("Ondrej Certik", results.get(0).author); + assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).id); + + // tag test + results = LuceneUtils.search(repository, "\"qft split\""); + assertEquals(1, results.size()); + assertEquals("Ondrej Certik", results.get(0).author); + assertEquals("57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", results.get(0).id); + assertEquals("[1st-edition]", results.get(0).labels.toString()); + + results = LuceneUtils.search(repository, "type:blob AND \"src/intro.rst\""); + assertEquals(4, results.size()); + + // hash id tests + results = LuceneUtils.search(repository, "id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5"); + assertEquals(1, results.size()); + + results = LuceneUtils.search(repository, "id:57c4f26f157*"); + assertEquals(1, results.size()); + + repository.close(); + + // annotated tag test + repository = GitBlitSuite.getBluezGnomeRepository(); + results = LuceneUtils.search(repository, "\"release 1.8\""); + assertEquals(1, results.size()); + assertEquals("[1.8]", results.get(0).labels.toString()); + + repository.close(); + + LuceneUtils.close(); + } +} \ No newline at end of file -- 2.39.5