From 87ee944282f3c7191eed900e498cca20fc34fe98 Mon Sep 17 00:00:00 2001 From: James Moger Date: Thu, 12 Jul 2012 18:21:47 -0400 Subject: [PATCH] Fixed broken Lucene blob delete --- docs/04_releases.mkd | 3 +- groovy/.gitignore | 1 + src/com/gitblit/LuceneExecutor.java | 52 ++++++++++++++----- tests/com/gitblit/tests/IssuesTest.java | 6 ++- .../com/gitblit/tests/LuceneExecutorTest.java | 17 +++++- 5 files changed, 61 insertions(+), 18 deletions(-) create mode 100644 groovy/.gitignore diff --git a/docs/04_releases.mkd b/docs/04_releases.mkd index bed23288..43a2db0d 100644 --- a/docs/04_releases.mkd +++ b/docs/04_releases.mkd @@ -6,8 +6,9 @@ #### fixes +- Fixed bug in Lucene search where old/stale blobs were never properly deleted prior to reindexing. This resulted in duplicate blob entries in the index. - Fixed intermittent bug in identifying line numbers in Lucene search (issue 105) -- Adjust repository search to handle foo.git and foo/bar.git (issue 104) +- Adjust repository identification to handle foo.git and foo/bar.git (issue 104) - Fixed bug where a repository set as authenticated push did not have anonymous clone access (issue 96) - Fixed bug in Basic authentication if passwords had a colon (Github/peterloron) - Fixed bug where the Gitblit Manager could not update a setting that was not referenced in reference.properties (issue 85) diff --git a/groovy/.gitignore b/groovy/.gitignore new file mode 100644 index 00000000..e58dc47f --- /dev/null +++ b/groovy/.gitignore @@ -0,0 +1 @@ +/grape diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java index c702dcc7..f7a73905 100644 --- a/src/com/gitblit/LuceneExecutor.java +++ b/src/com/gitblit/LuceneExecutor.java @@ -105,7 +105,7 @@ import com.gitblit.utils.StringUtils; public class LuceneExecutor implements Runnable { - private static final int INDEX_VERSION = 3; + private static final int INDEX_VERSION = 4; private static final String FIELD_OBJECT_TYPE = "type"; private static final String FIELD_ISSUE = "issue"; @@ -301,7 +301,6 @@ public class LuceneExecutor implements Runnable { throw new RuntimeException(e); } } - /** * Returns the author for the commit, if this information is available. @@ -728,8 +727,9 @@ public class LuceneExecutor implements Runnable { * @param repositoryName * @param issueId * @throws Exception + * @return true, if deleted, false if no record was deleted */ - private void deleteIssue(String repositoryName, String issueId) throws Exception { + private boolean deleteIssue(String repositoryName, String issueId) throws Exception { BooleanQuery query = new BooleanQuery(); Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.issue.name()); query.add(new TermQuery(objectTerm), Occur.MUST); @@ -737,8 +737,17 @@ public class LuceneExecutor implements Runnable { query.add(new TermQuery(issueidTerm), Occur.MUST); IndexWriter writer = getIndexWriter(repositoryName); + int numDocsBefore = writer.numDocs(); writer.deleteDocuments(query); writer.commit(); + int numDocsAfter = writer.numDocs(); + if (numDocsBefore == numDocsAfter) { + logger.debug(MessageFormat.format("no records found to delete {0}", query.toString())); + return false; + } else { + logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString())); + return true; + } } /** @@ -748,19 +757,29 @@ public class LuceneExecutor implements Runnable { * @param branch * @param path * @throws Exception + * @return true, if deleted, false if no record was deleted */ - private void deleteBlob(String repositoryName, String branch, String path) throws Exception { - BooleanQuery query = new BooleanQuery(); - Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.blob.name()); - query.add(new TermQuery(objectTerm), Occur.MUST); - Term branchTerm = new Term(FIELD_BRANCH, branch); - query.add(new TermQuery(branchTerm), Occur.MUST); - Term pathTerm = new Term(FIELD_PATH, path); - query.add(new TermQuery(pathTerm), Occur.MUST); + public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception { + String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH); + String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path); + BooleanQuery query = new BooleanQuery(); + StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); + QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer); + query.add(qp.parse(q), Occur.MUST); + IndexWriter writer = getIndexWriter(repositoryName); - writer.deleteDocuments(query); + int numDocsBefore = writer.numDocs(); + writer.deleteDocuments(query); writer.commit(); + int numDocsAfter = writer.numDocs(); + if (numDocsBefore == numDocsAfter) { + logger.debug(MessageFormat.format("no records found to delete {0}", query.toString())); + return false; + } else { + logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString())); + return true; + } } /** @@ -881,7 +900,9 @@ public class LuceneExecutor implements Runnable { IssueModel issue = IssueUtils.getIssue(repository, issueId); if (issue == null) { // issue was deleted, remove from index - deleteIssue(model.name, issueId); + if (!deleteIssue(model.name, issueId)) { + logger.error(MessageFormat.format("Failed to delete issue {0} from Lucene index!", issueId)); + } } else { // issue was updated index(model.name, issue); @@ -1119,7 +1140,7 @@ public class LuceneExecutor implements Runnable { qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer); qp.setAllowLeadingWildcard(true); query.add(qp.parse(text), Occur.SHOULD); - + IndexSearcher searcher; if (repositories.length == 1) { // single repository search @@ -1135,7 +1156,10 @@ public class LuceneExecutor implements Runnable { MultiSourceReader reader = new MultiSourceReader(rdrs); searcher = new IndexSearcher(reader); } + Query rewrittenQuery = searcher.rewrite(query); + logger.debug(rewrittenQuery.toString()); + TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true); searcher.search(rewrittenQuery, collector); int offset = Math.max(0, (page - 1) * pageSize); diff --git a/tests/com/gitblit/tests/IssuesTest.java b/tests/com/gitblit/tests/IssuesTest.java index 11f95514..54cac335 100644 --- a/tests/com/gitblit/tests/IssuesTest.java +++ b/tests/com/gitblit/tests/IssuesTest.java @@ -134,7 +134,7 @@ public class IssuesTest { lucene.index(name, anIssue); } List hits = lucene.search("working", 1, 10, name); - assertTrue(hits.size() > 0); + assertTrue(hits.size() == 1); // reindex an issue issue = allIssues.get(0); @@ -144,6 +144,10 @@ public class IssuesTest { issue = IssueUtils.getIssue(repository, issue.id); lucene.index(name, issue); + hits = lucene.search("working", 1, 10, name); + assertTrue(hits.size() == 1); + + // delete all issues for (IssueModel anIssue : allIssues) { assertTrue(IssueUtils.deleteIssue(repository, anIssue.id, "D")); diff --git a/tests/com/gitblit/tests/LuceneExecutorTest.java b/tests/com/gitblit/tests/LuceneExecutorTest.java index ec81fd8e..21454fe4 100644 --- a/tests/com/gitblit/tests/LuceneExecutorTest.java +++ b/tests/com/gitblit/tests/LuceneExecutorTest.java @@ -16,6 +16,8 @@ package com.gitblit.tests; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import java.util.ArrayList; import java.util.List; @@ -23,14 +25,12 @@ import java.util.List; import org.eclipse.jgit.lib.Repository; import org.junit.Test; -import com.gitblit.GitBlit; import com.gitblit.LuceneExecutor; import com.gitblit.models.RefModel; import com.gitblit.models.RepositoryModel; import com.gitblit.models.SearchResult; import com.gitblit.utils.FileUtils; import com.gitblit.utils.JGitUtils; -import com.gitblit.utils.StringUtils; /** * Tests Lucene indexing and querying. @@ -160,4 +160,17 @@ public class LuceneExecutorTest { lucene.close(); assertEquals(10, results.size()); } + + @Test + public void testDeleteBlobFromIndex() throws Exception { + // start with a fresh reindex of entire repository + LuceneExecutor lucene = new LuceneExecutor(null, GitBlitSuite.REPOSITORIES); + Repository repository = GitBlitSuite.getHelloworldRepository(); + RepositoryModel model = newRepositoryModel(repository); + lucene.reindex(model, repository); + + // now delete a blob + assertTrue(lucene.deleteBlob(model.name, "refs/heads/master", "java.java")); + assertFalse(lucene.deleteBlob(model.name, "refs/heads/master", "java.java")); + } } \ No newline at end of file -- 2.39.5