/* * Copyright 2012 gitblit.com. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.gitblit.utils; import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.text.MessageFormat; import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevTree; import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.storage.file.FileBasedConfig; import org.eclipse.jgit.treewalk.EmptyTreeIterator; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.util.FS; import com.gitblit.models.IssueModel; import com.gitblit.models.IssueModel.Attachment; import com.gitblit.models.PathModel.PathChangeModel; import com.gitblit.models.RefModel; import com.gitblit.models.SearchResult; /** * A collection of utility methods for indexing and querying a Lucene repository * index. * * @author James Moger * */ public class LuceneUtils { /** * The types of objects that can be indexed and queried. */ public static enum ObjectType { commit, blob, issue; static ObjectType fromName(String name) { for (ObjectType value : values()) { if (value.name().equals(name)) { return value; } } return null; } } private static final Version LUCENE_VERSION = Version.LUCENE_35; private static final int INDEX_VERSION = 1; private static final String FIELD_OBJECT_TYPE = "type"; private static final String FIELD_ISSUE = "issue"; private static final String FIELD_PATH = "path"; private static final String FIELD_COMMIT = "commit"; private static final String FIELD_BRANCH = "branch"; private static final String FIELD_REPOSITORY = "repository"; private static final String FIELD_SUMMARY = "summary"; private static final String FIELD_CONTENT = "content"; private static final String FIELD_AUTHOR = "author"; private static final String FIELD_COMMITTER = "committer"; private static final String FIELD_DATE = "date"; private static final String FIELD_TAG = "tag"; private static final String FIELD_LABEL = "label"; private static final String FIELD_ATTACHMENT = "attachment"; private static Set excludedExtensions = new TreeSet(Arrays.asList("7z", "arc", "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip")); private static Set excludedBranches = new TreeSet( Arrays.asList("/refs/heads/gb-issues")); private static final Map SEARCHERS = new ConcurrentHashMap(); private static final Map WRITERS = new ConcurrentHashMap(); private static final String LUCENE_DIR = "lucene"; private static final String CONF_FILE = "lucene.conf"; private static final String CONF_INDEX = "index"; private static final String CONF_VERSION = "version"; private static final String CONF_ALIAS = "aliases"; private static final String CONF_BRANCH = "branches"; /** * Returns the author for the commit, if this information is available. * * @param commit * @return an author or unknown */ private static String getAuthor(RevCommit commit) { String name = "unknown"; try { name = commit.getAuthorIdent().getName(); if (StringUtils.isEmpty(name)) { name = commit.getAuthorIdent().getEmailAddress(); } } catch (NullPointerException n) { } return name; } /** * Returns the committer for the commit, if this information is available. * * @param commit * @return an committer or unknown */ private static String getCommitter(RevCommit commit) { String name = "unknown"; try { name = commit.getCommitterIdent().getName(); if (StringUtils.isEmpty(name)) { name = commit.getCommitterIdent().getEmailAddress(); } } catch (NullPointerException n) { } return name; } /** * Construct a keyname from the branch. * * @param branchName * @return a keyname appropriate for the Git config file format */ private static String getBranchKey(String branchName) { return StringUtils.getSHA1(branchName); } /** * Returns the Lucene configuration for the specified repository. * * @param repository * @return a config object */ private static FileBasedConfig getConfig(Repository repository) { File file = new File(repository.getDirectory(), CONF_FILE); FileBasedConfig config = new FileBasedConfig(file, FS.detect()); return config; } /** * Reads the Lucene config file for the repository to check the index * version. If the index version is different, then rebuild the repository * index. * * @param repository * @return true of the on-disk index format is different than INDEX_VERSION */ public static boolean shouldReindex(Repository repository) { try { FileBasedConfig config = getConfig(repository); config.load(); int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0); // reindex if versions do not match return indexVersion != INDEX_VERSION; } catch (Throwable t) { } return true; } /** * Deletes the Lucene index for the specified repository. * * @param repository * @return true, if successful */ public static boolean deleteIndex(Repository repository) { try { File luceneIndex = new File(repository.getDirectory(), LUCENE_DIR); if (luceneIndex.exists()) { org.eclipse.jgit.util.FileUtils.delete(luceneIndex, org.eclipse.jgit.util.FileUtils.RECURSIVE); } File luceneConfig = new File(repository.getDirectory(), CONF_FILE); if (luceneConfig.exists()) { luceneConfig.delete(); } return true; } catch (IOException e) { throw new RuntimeException(e); } } /** * This completely indexes the repository and will destroy any existing * index. * * @param repositoryName * @param repository * @return IndexResult */ public static IndexResult reindex(String repositoryName, Repository repository) { IndexResult result = new IndexResult(); if (!LuceneUtils.deleteIndex(repository)) { return result; } try { FileBasedConfig config = getConfig(repository); Set indexedCommits = new TreeSet(); IndexWriter writer = getIndexWriter(repository, true); // build a quick lookup of tags Map> tags = new HashMap>(); for (RefModel tag : JGitUtils.getTags(repository, false, -1)) { if (!tag.isAnnotatedTag()) { // skip non-annotated tags continue; } if (!tags.containsKey(tag.getObjectId())) { tags.put(tag.getReferencedObjectId().getName(), new ArrayList()); } tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName); } ObjectReader reader = repository.newObjectReader(); // get the local branches List branches = JGitUtils.getLocalBranches(repository, true, -1); // sort them by most recently updated Collections.sort(branches, new Comparator() { @Override public int compare(RefModel ref1, RefModel ref2) { return ref2.getDate().compareTo(ref1.getDate()); } }); // reorder default branch to first position RefModel defaultBranch = null; ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository); for (RefModel branch : branches) { if (branch.getObjectId().equals(defaultBranchId)) { defaultBranch = branch; break; } } branches.remove(defaultBranch); branches.add(0, defaultBranch); // walk through each branch for (RefModel branch : branches) { if (excludedBranches.contains(branch.getName())) { continue; } String branchName = branch.getName(); RevWalk revWalk = new RevWalk(reader); RevCommit tip = revWalk.parseCommit(branch.getObjectId()); String tipId = tip.getId().getName(); String keyName = getBranchKey(branchName); config.setString(CONF_ALIAS, null, keyName, branchName); config.setString(CONF_BRANCH, null, keyName, tipId); // index the blob contents of the tree TreeWalk treeWalk = new TreeWalk(repository); treeWalk.addTree(tip.getTree()); treeWalk.setRecursive(true); Map paths = new TreeMap(); while (treeWalk.next()) { paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0)); } ByteArrayOutputStream os = new ByteArrayOutputStream(); byte[] tmp = new byte[32767]; RevWalk commitWalk = new RevWalk(reader); commitWalk.markStart(tip); RevCommit commit; while ((paths.size() > 0) && (commit = commitWalk.next()) != null) { TreeWalk diffWalk = new TreeWalk(reader); int parentCount = commit.getParentCount(); switch (parentCount) { case 0: diffWalk.addTree(new EmptyTreeIterator()); break; case 1: diffWalk.addTree(getTree(commitWalk, commit.getParent(0))); break; default: // skip merge commits continue; } diffWalk.addTree(getTree(commitWalk, commit)); diffWalk.setFilter(ANY_DIFF); diffWalk.setRecursive(true); while ((paths.size() > 0) && diffWalk.next()) { String path = diffWalk.getPathString(); if (!paths.containsKey(path)) { continue; } // remove path from set ObjectId blobId = paths.remove(path); result.blobCount++; // index the blob metadata String blobAuthor = getAuthor(commit); String blobCommitter = getCommitter(commit); String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE); Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO)); doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED)); // determine extension to compare to the extension // blacklist String ext = null; String name = path.toLowerCase(); if (name.indexOf('.') > -1) { ext = name.substring(name.lastIndexOf('.') + 1); } // index the blob content if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB); InputStream in = ldr.openStream(); int n; while ((n = in.read(tmp)) > 0) { os.write(tmp, 0, n); } in.close(); byte[] content = os.toByteArray(); String str = new String(content, Constants.CHARACTER_ENCODING); doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED)); os.reset(); } // add the blob to the index writer.addDocument(doc); } } os.close(); // index the tip commit object if (indexedCommits.add(tipId)) { Document doc = createDocument(tip, tags.get(tipId)); doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); result.commitCount += 1; result.branchCount += 1; } // traverse the log and index the previous commit objects RevWalk historyWalk = new RevWalk(reader); historyWalk.markStart(historyWalk.parseCommit(tip.getId())); RevCommit rev; while ((rev = historyWalk.next()) != null) { String hash = rev.getId().getName(); if (indexedCommits.add(hash)) { Document doc = createDocument(rev, tags.get(hash)); doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); result.commitCount += 1; } } } // finished reader.release(); // this repository has a gb-issues branch, index all issues if (IssueUtils.getIssuesBranch(repository) != null) { List issues = IssueUtils.getIssues(repository, null); if (issues.size() > 0) { result.branchCount += 1; } for (IssueModel issue : issues) { result.issueCount++; Document doc = createDocument(issue); doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); } } // commit all changes and reset the searcher config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION); config.save(); resetIndexSearcher(repository); writer.commit(); result.success = true; } catch (Exception e) { e.printStackTrace(); } return result; } /** * Get the tree associated with the given commit. * * @param walk * @param commit * @return tree * @throws IOException */ protected static RevTree getTree(final RevWalk walk, final RevCommit commit) throws IOException { final RevTree tree = commit.getTree(); if (tree != null) { return tree; } walk.parseHeaders(commit); return commit.getTree(); } /** * Incrementally update the index with the specified commit for the * repository. * * @param repositoryName * @param repository * @param branch * the fully qualified branch name (e.g. refs/heads/master) * @param commit * @return true, if successful */ private static IndexResult index(String repositoryName, Repository repository, String branch, RevCommit commit) { IndexResult result = new IndexResult(); try { if (excludedBranches.contains(branch)) { if (IssueUtils.GB_ISSUES.equals(branch)) { // index an issue String issueId = commit.getShortMessage().substring(2).trim(); IssueModel issue = IssueUtils.getIssue(repository, issueId); if (issue == null) { // issue was deleted, remove from index IndexWriter writer = getIndexWriter(repository, false); writer.deleteDocuments( new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term( FIELD_ISSUE, issueId)); writer.commit(); result.success = true; return result; } result.success = index(repositoryName, repository, issue); result.issueCount++; return result; } return result; } List changedPaths = JGitUtils.getFilesInCommit(repository, commit); String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE); IndexWriter writer = getIndexWriter(repository, false); for (PathChangeModel path : changedPaths) { // delete the indexed blob writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()), new Term(FIELD_BRANCH, branch), new Term(FIELD_PATH, path.path)); // re-index the blob if (!ChangeType.DELETE.equals(path.changeType)) { result.blobCount++; Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO)); doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED)); // determine extension to compare to the extension // blacklist String ext = null; String name = path.name.toLowerCase(); if (name.indexOf('.') > -1) { ext = name.substring(name.lastIndexOf('.') + 1); } if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { // read the blob content String str = JGitUtils.getStringContent(repository, commit.getTree(), path.path); doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED)); writer.addDocument(doc); } } } writer.commit(); Document doc = createDocument(commit, null); result.commitCount++; result.success = index(repositoryName, repository, doc); } catch (Exception e) { e.printStackTrace(); } return result; } /** * Incrementally update the index with the specified issue for the * repository. * * @param repository * @param issue * @return true, if successful */ public static boolean index(String repositoryName, Repository repository, IssueModel issue) { try { // delete the old issue from the index, if exists IndexWriter writer = getIndexWriter(repository, false); writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term( FIELD_ISSUE, String.valueOf(issue.id))); writer.commit(); Document doc = createDocument(issue); return index(repositoryName, repository, doc); } catch (Exception e) { e.printStackTrace(); } return false; } /** * Updates a repository index incrementally from the last indexed commits. * * @param repositoryName * @param repository * @return IndexResult */ public static IndexResult updateIndex(String repositoryName, Repository repository) { IndexResult result = new IndexResult(); try { FileBasedConfig config = getConfig(repository); config.load(); // build a quick lookup of annotated tags Map> tags = new HashMap>(); for (RefModel tag : JGitUtils.getTags(repository, false, -1)) { if (!tag.isAnnotatedTag()) { // skip non-annotated tags continue; } if (!tags.containsKey(tag.getObjectId())) { tags.put(tag.getReferencedObjectId().getName(), new ArrayList()); } tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName); } // detect branch deletion // first assume all branches are deleted and then remove each // existing branch from deletedBranches during indexing Set deletedBranches = new TreeSet(); for (String alias : config.getNames(CONF_ALIAS)) { String branch = config.getString(CONF_ALIAS, null, alias); deletedBranches.add(branch); } // walk through each branches List branches = JGitUtils.getLocalBranches(repository, true, -1); for (RefModel branch : branches) { String branchName = branch.getName(); // remove this branch from the deletedBranches set deletedBranches.remove(branchName); // determine last commit String keyName = getBranchKey(branchName); String lastCommit = config.getString(CONF_BRANCH, null, keyName); List revs; if (StringUtils.isEmpty(lastCommit)) { // new branch/unindexed branch, get all commits on branch revs = JGitUtils.getRevLog(repository, branchName, 0, -1); } else { // pre-existing branch, get changes since last commit revs = JGitUtils.getRevLog(repository, lastCommit, branchName); } if (revs.size() > 0) { result.branchCount += 1; } // reverse the list of commits so we start with the first commit Collections.reverse(revs); for (RevCommit commit : revs) { result.add(index(repositoryName, repository, branchName, commit)); } // update the config config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION); config.setString(CONF_ALIAS, null, keyName, branchName); config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName()); config.save(); } // the deletedBranches set will normally be empty by this point // unless a branch really was deleted and no longer exists if (deletedBranches.size() > 0) { for (String branch : deletedBranches) { IndexWriter writer = getIndexWriter(repository, false); writer.deleteDocuments(new Term(FIELD_BRANCH, branch)); writer.commit(); } } result.success = true; } catch (Throwable t) { t.printStackTrace(); } return result; } /** * Creates a Lucene document from an issue. * * @param issue * @return a Lucene document */ private static Document createDocument(IssueModel issue) { Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE), Store.YES, Field.Index.NO)); doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.ANALYZED)); List attachments = new ArrayList(); for (Attachment attachment : issue.getAttachments()) { attachments.add(attachment.name.toLowerCase()); } doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES, Index.ANALYZED)); return doc; } /** * Creates a Lucene document for a commit * * @param commit * @param tags * @return a Lucene document */ private static Document createDocument(RevCommit commit, List tags) { Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.commit.name(), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE), Store.YES, Index.NO)); doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.YES, Index.ANALYZED)); if (!ArrayUtils.isEmpty(tags)) { doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), Store.YES, Index.ANALYZED)); } return doc; } /** * Incrementally index an object for the repository. * * @param repositoryName * @param repository * @param doc * @return true, if successful */ private static boolean index(String repositoryName, Repository repository, Document doc) { try { doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED)); IndexWriter writer = getIndexWriter(repository, false); writer.addDocument(doc); resetIndexSearcher(repository); writer.commit(); return true; } catch (Exception e) { e.printStackTrace(); } return false; } private static SearchResult createSearchResult(Document doc, float score) throws ParseException { SearchResult result = new SearchResult(); result.score = score; result.date = DateTools.stringToDate(doc.get(FIELD_DATE)); result.summary = doc.get(FIELD_SUMMARY); result.author = doc.get(FIELD_AUTHOR); result.committer = doc.get(FIELD_COMMITTER); result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE)); result.repository = doc.get(FIELD_REPOSITORY); result.branch = doc.get(FIELD_BRANCH); result.commitId = doc.get(FIELD_COMMIT); result.issueId = doc.get(FIELD_ISSUE); result.path = doc.get(FIELD_PATH); if (doc.get(FIELD_TAG) != null) { result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG)); } if (doc.get(FIELD_LABEL) != null) { result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL)); } return result; } private static void resetIndexSearcher(Repository repository) throws IOException { IndexSearcher searcher = SEARCHERS.get(repository.getDirectory()); if (searcher != null) { SEARCHERS.remove(repository.getDirectory()); searcher.close(); } } /** * Gets an index searcher for the repository. * * @param repository * @return * @throws IOException */ private static IndexSearcher getIndexSearcher(Repository repository) throws IOException { IndexSearcher searcher = SEARCHERS.get(repository.getDirectory()); if (searcher == null) { IndexWriter writer = getIndexWriter(repository, false); searcher = new IndexSearcher(IndexReader.open(writer, true)); SEARCHERS.put(repository.getDirectory(), searcher); } return searcher; } /** * Gets an index writer for the repository. The index will be created if it * does not already exist or if forceCreate is specified. * * @param repository * @param forceCreate * @return an IndexWriter * @throws IOException */ private static IndexWriter getIndexWriter(Repository repository, boolean forceCreate) throws IOException { IndexWriter indexWriter = WRITERS.get(repository.getDirectory()); File indexFolder = new File(repository.getDirectory(), LUCENE_DIR); Directory directory = FSDirectory.open(indexFolder); if (forceCreate || !indexFolder.exists()) { // if the writer is going to blow away the existing index and create // a new one then it should not be cached. instead, close any open // writer, create a new one, and return. if (indexWriter != null) { indexWriter.close(); indexWriter = null; WRITERS.remove(repository.getDirectory()); } indexFolder.mkdirs(); IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer( LUCENE_VERSION)); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, config); writer.close(); } if (indexWriter == null) { IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer( LUCENE_VERSION)); config.setOpenMode(OpenMode.APPEND); indexWriter = new IndexWriter(directory, config); WRITERS.put(repository.getDirectory(), indexWriter); } return indexWriter; } /** * Searches the specified repositories for the given text or query * * @param text * if the text is null or empty, null is returned * @param maximumHits * the maximum number of hits to collect * @param repositories * a list of repositories to search. if no repositories are * specified null is returned. * @return a list of SearchResults in order from highest to the lowest score * */ public static List search(String text, int maximumHits, Repository... repositories) { if (StringUtils.isEmpty(text)) { return null; } if (repositories.length == 0) { return null; } Set results = new LinkedHashSet(); StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); try { // default search checks summary and content BooleanQuery query = new BooleanQuery(); QueryParser qp; qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer); qp.setAllowLeadingWildcard(true); query.add(qp.parse(text), Occur.SHOULD); qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer); qp.setAllowLeadingWildcard(true); query.add(qp.parse(text), Occur.SHOULD); IndexSearcher searcher; if (repositories.length == 1) { // single repository search searcher = getIndexSearcher(repositories[0]); } else { // multiple repository search List readers = new ArrayList(); for (Repository repository : repositories) { IndexSearcher repositoryIndex = getIndexSearcher(repository); readers.add(repositoryIndex.getIndexReader()); } IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]); MultiReader reader = new MultiReader(rdrs); searcher = new IndexSearcher(reader); } Query rewrittenQuery = searcher.rewrite(query); TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true); searcher.search(rewrittenQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document doc = searcher.doc(docId); SearchResult result = createSearchResult(doc, hits[i].score); String content = doc.get(FIELD_CONTENT); result.fragment = getHighlightedFragment(analyzer, query, content, result); results.add(result); } } catch (Exception e) { e.printStackTrace(); } return new ArrayList(results); } private static String getHighlightedFragment(Analyzer analyzer, Query query, String content, SearchResult result) throws IOException, InvalidTokenOffsetsException { content = content == null ? "":StringUtils.escapeForHtml(content, false); TokenStream stream = TokenSources.getTokenStream("content", content, analyzer); QueryScorer scorer = new QueryScorer(query, "content"); Fragmenter fragmenter; if (ObjectType.commit == result.type) { fragmenter = new SimpleSpanFragmenter(scorer, 1024); } else { fragmenter = new SimpleSpanFragmenter(scorer, 150); } // use an artificial delimiter for the token String termTag = ""; SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(fragmenter); String [] fragments = highlighter.getBestFragments(stream, content, 5); if (ArrayUtils.isEmpty(fragments)) { if (ObjectType.blob == result.type) { return ""; } return "
" + content + "
"; } StringBuilder sb = new StringBuilder(); for (int i = 0, len = fragments.length; i < len; i++) { String fragment = fragments[i]; // resurrect the raw fragment from removing the artificial delimiters String raw = fragment.replace(termTag, "").replace(termTagEnd, ""); sb.append(getPreTag(result, raw, content)); // replace the artificial delimiter with html tags String html = fragment.replace(termTag, "").replace(termTagEnd, ""); sb.append(html); sb.append(""); if (i < len - 1) { sb.append("...
"); } } return sb.toString(); } private static String getPreTag(SearchResult result, String fragment, String content) { String pre = "
";
		if (ObjectType.blob  == result.type) {
			int line = StringUtils.countLines(content.substring(0, content.indexOf(fragment)));			
			int lastDot = result.path.lastIndexOf('.');
			if (lastDot > -1) {
				String ext = result.path.substring(lastDot + 1).toLowerCase();
				pre = MessageFormat.format("
", line, ext);	
			} else {
				pre = MessageFormat.format("
", line);
			}
		}
		return pre;
	}

	/**
	 * Close all the index writers and searchers
	 */
	public static void close() {
		// close writers
		for (File file : WRITERS.keySet()) {
			try {
				WRITERS.get(file).close(true);
			} catch (Throwable t) {
				t.printStackTrace();
			}
		}
		WRITERS.clear();

		// close searchers
		for (File file : SEARCHERS.keySet()) {
			try {
				SEARCHERS.get(file).close();
			} catch (Throwable t) {
				t.printStackTrace();
			}
		}
		SEARCHERS.clear();
	}

	public static class IndexResult {
		public boolean success;
		public int branchCount;
		public int commitCount;
		public int blobCount;
		public int issueCount;
		
		public void add(IndexResult result) {
			this.branchCount += result.branchCount;
			this.commitCount += result.commitCount;
			this.blobCount += result.blobCount;
			this.issueCount += result.issueCount;			
		}
	}
}