]> source.dussan.org Git - gitblit.git/commitdiff
Revise indexed fields and improve Lucene page usability
authorJames Moger <james.moger@gitblit.com>
Tue, 13 Mar 2012 02:57:13 +0000 (22:57 -0400)
committerJames Moger <james.moger@gitblit.com>
Tue, 13 Mar 2012 02:57:13 +0000 (22:57 -0400)
resources/gitblit.css
src/com/gitblit/models/SearchResult.java
src/com/gitblit/utils/IssueUtils.java
src/com/gitblit/utils/LuceneUtils.java
src/com/gitblit/wicket/pages/LucenePage.html
src/com/gitblit/wicket/pages/LucenePage.java
tests/com/gitblit/tests/LuceneUtilsTest.java

index 8f65986540d81aa12111da9a81555959641a0c83..0e8e753204bf3bf7f1c1fbeb4ab5a027cfcdc02b 100644 (file)
@@ -286,6 +286,34 @@ div.searchResult .date {
        color:#999;\r
 }\r
 \r
+div.searchResult .body {\r
+       padding-left:20px;\r
+}\r
+\r
+div.searchResult .fragment {\r
+       padding: 7px 0;\r
+}\r
+\r
+div.searchResult .highlight {\r
+       background-color: #ffffaa;      \r
+       border: 1px solid #ffcc00;\r
+       padding: 0 2px;\r
+}\r
+\r
+div.searchResult .ellipses {\r
+       font-family: sans-serif;\r
+       font-size: 9px;\r
+       font-weight: normal;    \r
+       background-color: #eee; \r
+       border: 1px solid #ccc;\r
+       padding: 0 3px;\r
+       margin: 0px;\r
+}\r
+\r
+div.searchResult pre {\r
+       margin: 1px 0px;\r
+}\r
+\r
 div.header, div.commitHeader, table.repositories th {\r
        background-color:#e0e0e0;\r
        background-repeat:repeat-x;\r
index c74229a99488fb52dcb3b80e2f8a13a86efe33f1..181eb0e042549a6ad870a8244ceb0c8263ab32e9 100644 (file)
@@ -26,13 +26,17 @@ public class SearchResult implements Serializable {
 \r
        public String summary;\r
        \r
-       public String content;\r
+       public String fragment;\r
        \r
        public String repository;\r
        \r
        public String branch;\r
 \r
-       public String id;\r
+       public String commitId;\r
+       \r
+       public String path;\r
+       \r
+       public String issueId;\r
 \r
        public List<String> tags;\r
        \r
@@ -42,9 +46,21 @@ public class SearchResult implements Serializable {
 \r
        public SearchResult() {\r
        }\r
+       \r
+       public String getId() {\r
+               switch (type) {\r
+               case blob:\r
+                       return path;\r
+               case commit:\r
+                       return commitId;\r
+               case issue:\r
+                       return issueId;\r
+               }\r
+               return commitId;\r
+       }\r
 \r
        @Override\r
        public String toString() {\r
-               return  score + " : " + type.name() + " : " + repository + " : " + id + " (" + branch + ")";\r
+               return  score + " : " + type.name() + " : " + repository + " : " + getId() + " (" + branch + ")";\r
        }\r
 }
\ No newline at end of file
index eb3b347b17acd3e7aaa0bba4e1cc5339ea58d1a9..cfd6200df07c6ad17b5ce7ff9b57321bb83040b3 100644 (file)
@@ -425,7 +425,7 @@ public class IssueUtils {
         * Updates an issue in the gb-issues branch of the repository.\r
         * \r
         * @param repository\r
-        * @param issue\r
+        * @param issueId\r
         * @param change\r
         * @return true if successful\r
         */\r
@@ -619,7 +619,7 @@ public class IssueUtils {
         * distributed merging.\r
         * \r
         * @param repository\r
-        * @param issue\r
+        * @param issueId\r
         * @param change\r
         * @return true, if the change was committed\r
         */\r
index 3c2606bcc6c5e26aa984fb72c7183e04cd3a823a..1c24f287ae312d31edfac2db72c0806500bf603c 100644 (file)
@@ -35,6 +35,8 @@ import java.util.TreeMap;
 import java.util.TreeSet;\r
 import java.util.concurrent.ConcurrentHashMap;\r
 \r
+import org.apache.lucene.analysis.Analyzer;\r
+import org.apache.lucene.analysis.TokenStream;\r
 import org.apache.lucene.analysis.standard.StandardAnalyzer;\r
 import org.apache.lucene.document.DateTools;\r
 import org.apache.lucene.document.DateTools.Resolution;\r
@@ -55,6 +57,13 @@ import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;\r
 import org.apache.lucene.search.ScoreDoc;\r
 import org.apache.lucene.search.TopScoreDocCollector;\r
+import org.apache.lucene.search.highlight.Fragmenter;\r
+import org.apache.lucene.search.highlight.Highlighter;\r
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;\r
+import org.apache.lucene.search.highlight.QueryScorer;\r
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;\r
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;\r
+import org.apache.lucene.search.highlight.TokenSources;\r
 import org.apache.lucene.store.Directory;\r
 import org.apache.lucene.store.FSDirectory;\r
 import org.apache.lucene.util.Version;\r
@@ -107,7 +116,9 @@ public class LuceneUtils {
        private static final int INDEX_VERSION = 1;\r
 \r
        private static final String FIELD_OBJECT_TYPE = "type";\r
-       private static final String FIELD_OBJECT_ID = "id";\r
+       private static final String FIELD_ISSUE = "issue";\r
+       private static final String FIELD_PATH = "path";\r
+       private static final String FIELD_COMMIT = "commit";\r
        private static final String FIELD_BRANCH = "branch";\r
        private static final String FIELD_REPOSITORY = "repository";\r
        private static final String FIELD_SUMMARY = "summary";\r
@@ -361,7 +372,8 @@ public class LuceneUtils {
                                                doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));\r
                                                doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));\r
                                                doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));\r
-                                               doc.add(new Field(FIELD_OBJECT_ID, path, Store.YES, Index.ANALYZED));\r
+                                               doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));\r
+                                               doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));\r
                                                doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));\r
                                                doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));\r
                                                doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));                                  \r
@@ -385,7 +397,7 @@ public class LuceneUtils {
                                                        in.close();\r
                                                        byte[] content = os.toByteArray();\r
                                                        String str = new String(content, Constants.CHARACTER_ENCODING);\r
-                                                       doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));\r
+                                                       doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));\r
                                                        os.reset();\r
                                                }                                                       \r
                                                \r
@@ -462,8 +474,9 @@ public class LuceneUtils {
        protected static RevTree getTree(final RevWalk walk, final RevCommit commit)\r
                        throws IOException {\r
                final RevTree tree = commit.getTree();\r
-               if (tree != null)\r
+               if (tree != null) {\r
                        return tree;\r
+               }\r
                walk.parseHeaders(commit);\r
                return commit.getTree();\r
        }\r
@@ -493,7 +506,7 @@ public class LuceneUtils {
                                                IndexWriter writer = getIndexWriter(repository, false);\r
                                                writer.deleteDocuments(\r
                                                                new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(\r
-                                                                               FIELD_OBJECT_ID, issueId));\r
+                                                                               FIELD_ISSUE, issueId));\r
                                                writer.commit();\r
                                                result.success = true;\r
                                                return result;\r
@@ -512,7 +525,7 @@ public class LuceneUtils {
                        for (PathChangeModel path : changedPaths) {\r
                                // delete the indexed blob\r
                                writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()),\r
-                                               new Term(FIELD_BRANCH, branch), new Term(FIELD_OBJECT_ID, path.path));\r
+                                               new Term(FIELD_BRANCH, branch), new Term(FIELD_PATH, path.path));\r
 \r
                                // re-index the blob\r
                                if (!ChangeType.DELETE.equals(path.changeType)) {\r
@@ -522,7 +535,8 @@ public class LuceneUtils {
                                                        Index.NOT_ANALYZED));\r
                                        doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED));\r
                                        doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));\r
-                                       doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES, Index.ANALYZED));\r
+                                       doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));\r
+                                       doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));\r
                                        doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));\r
                                        doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));\r
                                        doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));\r
@@ -539,7 +553,7 @@ public class LuceneUtils {
                                                // read the blob content\r
                                                String str = JGitUtils.getStringContent(repository, commit.getTree(),\r
                                                                path.path);\r
-                                               doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));\r
+                                               doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));\r
                                                writer.addDocument(doc);\r
                                        }\r
                                }\r
@@ -568,7 +582,7 @@ public class LuceneUtils {
                        // delete the old issue from the index, if exists\r
                        IndexWriter writer = getIndexWriter(repository, false);\r
                        writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(\r
-                                       FIELD_OBJECT_ID, String.valueOf(issue.id)));\r
+                                       FIELD_ISSUE, String.valueOf(issue.id)));\r
                        writer.commit();\r
 \r
                        Document doc = createDocument(issue);\r
@@ -678,7 +692,7 @@ public class LuceneUtils {
                Document doc = new Document();\r
                doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES,\r
                                Field.Index.NOT_ANALYZED));\r
-               doc.add(new Field(FIELD_OBJECT_ID, issue.id, Store.YES, Index.ANALYZED));\r
+               doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED));\r
                doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED));\r
                doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),\r
                                Store.YES, Field.Index.NO));\r
@@ -707,7 +721,7 @@ public class LuceneUtils {
                Document doc = new Document();\r
                doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.commit.name(), Store.YES,\r
                                Index.NOT_ANALYZED));\r
-               doc.add(new Field(FIELD_OBJECT_ID, commit.getName(), Store.YES, Index.ANALYZED));\r
+               doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));\r
                doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,\r
                                Resolution.MINUTE), Store.YES, Index.NO));\r
                doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));\r
@@ -746,14 +760,15 @@ public class LuceneUtils {
                SearchResult result = new SearchResult();\r
                result.score = score;\r
                result.date = DateTools.stringToDate(doc.get(FIELD_DATE));\r
-               result.summary = doc.get(FIELD_SUMMARY);\r
-               result.content = doc.get(FIELD_CONTENT);\r
+               result.summary = doc.get(FIELD_SUMMARY);                \r
                result.author = doc.get(FIELD_AUTHOR);\r
                result.committer = doc.get(FIELD_COMMITTER);\r
                result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));\r
                result.repository = doc.get(FIELD_REPOSITORY);\r
                result.branch = doc.get(FIELD_BRANCH);\r
-               result.id = doc.get(FIELD_OBJECT_ID);\r
+               result.commitId = doc.get(FIELD_COMMIT);\r
+               result.issueId = doc.get(FIELD_ISSUE);\r
+               result.path = doc.get(FIELD_PATH);\r
                if (doc.get(FIELD_TAG) != null) {\r
                        result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));\r
                }\r
@@ -887,6 +902,8 @@ public class LuceneUtils {
                                int docId = hits[i].doc;\r
                                Document doc = searcher.doc(docId);\r
                                SearchResult result = createSearchResult(doc, hits[i].score);\r
+                               String content = doc.get(FIELD_CONTENT);\r
+                               result.fragment = getHighlightedFragment(analyzer, query, content);\r
                                results.add(result);\r
                        }\r
                } catch (Exception e) {\r
@@ -894,6 +911,37 @@ public class LuceneUtils {
                }\r
                return new ArrayList<SearchResult>(results);\r
        }\r
+       \r
+       private static String getHighlightedFragment(Analyzer analyzer, Query query,\r
+                       String content) throws IOException, InvalidTokenOffsetsException {\r
+               content = content == null ? "":StringUtils.escapeForHtml(content, false);       \r
+               TokenStream stream = TokenSources.getTokenStream("content", content, analyzer);\r
+               QueryScorer scorer = new QueryScorer(query, "content");\r
+               Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 150);\r
+\r
+               SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");\r
+               Highlighter highlighter = new Highlighter(formatter, scorer);\r
+               \r
+               highlighter.setTextFragmenter(fragmenter);\r
+               String [] fragments = highlighter.getBestFragments(stream, content, 5);\r
+               if (ArrayUtils.isEmpty(fragments)) {\r
+                       return content;\r
+               }\r
+               if (fragments.length == 1) {\r
+                       return "<pre>" + fragments[0] + "</pre>";\r
+               }\r
+               StringBuilder sb = new StringBuilder();\r
+               for (int i = 0, len = fragments.length; i < len; i++) {\r
+                       String fragment = fragments[i].trim();                  \r
+                       sb.append("<pre>");\r
+                       sb.append(fragment);\r
+                       sb.append("</pre>");\r
+                       if (i < len - 1) {\r
+                               sb.append("<span class=\"ellipses\">...</span><br/>");\r
+                       }\r
+               }\r
+               return sb.toString();\r
+       }\r
 \r
        /**\r
         * Close all the index writers and searchers\r
index ff9111685d7d1469018b9563f7fb87416d79bb51..6999d783b82322ef1e5177f87fa020396f83c659 100644 (file)
@@ -24,8 +24,9 @@
                                        <div style="margin-left:0px;" class="span4">\r
                                                <div class="alert alert">\r
                                                        <b>type:</b> commit or blob<br/>\r
-                                                       <b>id:</b> commit id or file path<br/>\r
-                                                       <b>branch:</b><br/>\r
+                                                       <b>commit:</b> commit id<br/>\r
+                                                       <b>path:</b> blob path<br/>\r
+                                                       <b>branch:</b> refs/heads/master<br/>\r
                                                        <b>author:</b><br/>\r
                                                        <b>committer:</b><br/>\r
                                                        <b>tag:</b> tag<br/>\r
        <div class="row-fluid">\r
        <div class="searchResult" wicket:id="searchResults">\r
                <div><i wicket:id="type"></i><span class="summary" wicket:id="summary"></span></div>\r
-               <span class="author" wicket:id="author"></span> committed to <span class="repository" wicket:id="repository"></span>:<span class="branch" wicket:id="branch"></span><br/>\r
-               <span class="date" wicket:id="date"></span>\r
+               <div class="body">\r
+                       <div class="fragment" wicket:id="fragment"></div>\r
+                       <span class="author" wicket:id="author"></span> committed to <span class="repository" wicket:id="repository"></span>:<span class="branch" wicket:id="branch"></span><br/>\r
+                       <span class="date" wicket:id="date"></span>\r
+                       <hr/>\r
+               </div>\r
        </div>\r
        </div>\r
 </wicket:extend>\r
index c269c8e665351cc4c84d0e16aa1092c0a2ce8e16..f72aeda27652b3ffc16355845acc7711706f5de9 100644 (file)
@@ -75,24 +75,25 @@ public class LucenePage extends RootPage {
                                        Label icon = WicketUtils.newIcon("type", "icon-refresh");\r
                                        WicketUtils.setHtmlTooltip(icon, "commit");\r
                                        item.add(icon);\r
-                                       item.add(new LinkPanel("summary", null, sr.summary, CommitPage.class, WicketUtils.newObjectParameter(sr.repository, sr.id)));\r
+                                       item.add(new LinkPanel("summary", null, sr.summary, CommitPage.class, WicketUtils.newObjectParameter(sr.repository, sr.commitId)));\r
                                        break;\r
                                }\r
                                case blob: {\r
                                        Label icon = WicketUtils.newIcon("type", "icon-file");\r
                                        WicketUtils.setHtmlTooltip(icon, "blob");\r
                                        item.add(icon);\r
-                                       item.add(new LinkPanel("summary", null, sr.id, BlobPage.class, WicketUtils.newPathParameter(sr.repository, sr.branch, sr.id)));\r
+                                       item.add(new LinkPanel("summary", null, sr.path, BlobPage.class, WicketUtils.newPathParameter(sr.repository, sr.branch, sr.path)));\r
                                        break;\r
                                }\r
                                case issue: {\r
                                        Label icon = WicketUtils.newIcon("type", "icon-file");\r
                                        WicketUtils.setHtmlTooltip(icon, "issue");\r
                                        item.add(icon);\r
-                                       item.add(new Label("summary", "issue: " + sr.id));\r
+                                       item.add(new Label("summary", "issue: " + sr.issueId));\r
                                        break;\r
                                }\r
                                }\r
+                               item.add(new Label("fragment", sr.fragment).setEscapeModelStrings(false).setVisible(!StringUtils.isEmpty(sr.fragment)));\r
                                item.add(new LinkPanel("repository", null, sr.repository, SummaryPage.class, WicketUtils.newRepositoryParameter(sr.repository)));\r
                                item.add(new LinkPanel("branch", "branch", StringUtils.getRelativePath(Constants.R_HEADS, sr.branch), LogPage.class, WicketUtils.newObjectParameter(sr.repository, sr.branch)));\r
                                item.add(new Label("author", sr.author));\r
@@ -140,7 +141,7 @@ public class LucenePage extends RootPage {
                        }\r
                };\r
                ListMultipleChoice<String> selections = new ListMultipleChoice<String>("repositories", repositories, GitBlit.self().getRepositoryList());\r
-               selections.setMaxRows(11);\r
+               selections.setMaxRows(10);\r
                form.add(selections);\r
                form.add(new TextField<String>("fragment", fragment));\r
                add(form);\r
index e77545865f846c7f9557b308dae6c14de15750db..01858f50cbee2b1dc2ff7614c9713e8e11ec6b0f 100644 (file)
@@ -41,9 +41,9 @@ public class LuceneUtilsTest {
                String name = StringUtils.getRelativePath(GitBlitSuite.REPOSITORIES.getAbsolutePath(),\r
                                repository.getDirectory().getAbsolutePath());\r
                LuceneUtils.reindex(name, repository);\r
-               SearchResult result = LuceneUtils.search("type:blob AND id:bit.bit", 1, repository).get(0);             \r
+               SearchResult result = LuceneUtils.search("type:blob AND path:bit.bit", 1, repository).get(0);           \r
                assertEquals("Mike Donaghy", result.author);\r
-               result = LuceneUtils.search("type:blob AND id:clipper.prg", 1, repository).get(0);              \r
+               result = LuceneUtils.search("type:blob AND path:clipper.prg", 1, repository).get(0);            \r
                assertEquals("tinogomes", result.author);\r
                repository.close();\r
 \r
@@ -82,24 +82,24 @@ public class LuceneUtilsTest {
                // blob test\r
                results = LuceneUtils.search("type: blob AND \"import std.stdio\"", 10, repository);\r
                assertEquals(1, results.size());\r
-               assertEquals("d.D", results.get(0).id);\r
+               assertEquals("d.D", results.get(0).path);\r
                \r
                // 1 occurrence on the gh-pages branch\r
                repository = GitBlitSuite.getTheoreticalPhysicsRepository();\r
                results = LuceneUtils.search("\"add the .nojekyll file\"", 10, repository);\r
                assertEquals(1, results.size());\r
                assertEquals("Ondrej Certik", results.get(0).author);\r
-               assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).id);\r
+               assertEquals("2648c0c98f2101180715b4d432fc58d0e21a51d7", results.get(0).commitId);\r
                assertEquals("refs/heads/gh-pages", results.get(0).branch);\r
                \r
                results = LuceneUtils.search("type:blob AND \"src/intro.rst\"", 10, repository);\r
                assertEquals(4, results.size());\r
                \r
                // hash id tests\r
-               results = LuceneUtils.search("id:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, repository);\r
+               results = LuceneUtils.search("commit:57c4f26f157ece24b02f4f10f5f68db1d2ce7ff5", 10, repository);\r
                assertEquals(1, results.size());\r
 \r
-               results = LuceneUtils.search("id:57c4f26f157*", 10, repository);\r
+               results = LuceneUtils.search("commit:57c4f26f157*", 10, repository);\r
                assertEquals(1, results.size());\r
 \r
                repository.close();\r