You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

LuceneService.java 43KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271
  1. /*
  2. * Copyright 2012 gitblit.com.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package com.gitblit.service;
  17. import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
  18. import java.io.ByteArrayOutputStream;
  19. import java.io.File;
  20. import java.io.FileInputStream;
  21. import java.io.IOException;
  22. import java.io.InputStream;
  23. import java.text.MessageFormat;
  24. import java.text.ParseException;
  25. import java.util.ArrayList;
  26. import java.util.Collections;
  27. import java.util.Comparator;
  28. import java.util.HashMap;
  29. import java.util.LinkedHashSet;
  30. import java.util.List;
  31. import java.util.Map;
  32. import java.util.Set;
  33. import java.util.TreeMap;
  34. import java.util.TreeSet;
  35. import java.util.concurrent.ConcurrentHashMap;
  36. import org.apache.lucene.analysis.Analyzer;
  37. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  38. import org.apache.lucene.document.DateTools;
  39. import org.apache.lucene.document.DateTools.Resolution;
  40. import org.apache.lucene.document.Document;
  41. import org.apache.lucene.document.Field;
  42. import org.apache.lucene.document.StringField;
  43. import org.apache.lucene.document.TextField;
  44. import org.apache.lucene.index.DirectoryReader;
  45. import org.apache.lucene.index.IndexReader;
  46. import org.apache.lucene.index.IndexWriter;
  47. import org.apache.lucene.index.IndexWriterConfig;
  48. import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  49. import org.apache.lucene.index.MultiReader;
  50. import org.apache.lucene.index.Term;
  51. import org.apache.lucene.queryparser.classic.QueryParser;
  52. import org.apache.lucene.search.BooleanClause.Occur;
  53. import org.apache.lucene.search.BooleanQuery;
  54. import org.apache.lucene.search.IndexSearcher;
  55. import org.apache.lucene.search.Query;
  56. import org.apache.lucene.search.ScoreDoc;
  57. import org.apache.lucene.search.TopScoreDocCollector;
  58. import org.apache.lucene.search.highlight.Fragmenter;
  59. import org.apache.lucene.search.highlight.Highlighter;
  60. import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
  61. import org.apache.lucene.search.highlight.QueryScorer;
  62. import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
  63. import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
  64. import org.apache.lucene.store.Directory;
  65. import org.apache.lucene.store.FSDirectory;
  66. import org.apache.lucene.util.Version;
  67. import org.apache.tika.metadata.Metadata;
  68. import org.apache.tika.parser.AutoDetectParser;
  69. import org.apache.tika.parser.ParseContext;
  70. import org.apache.tika.parser.pdf.PDFParser;
  71. import org.apache.tika.sax.BodyContentHandler;
  72. import org.eclipse.jgit.diff.DiffEntry.ChangeType;
  73. import org.eclipse.jgit.lib.Constants;
  74. import org.eclipse.jgit.lib.FileMode;
  75. import org.eclipse.jgit.lib.ObjectId;
  76. import org.eclipse.jgit.lib.ObjectLoader;
  77. import org.eclipse.jgit.lib.ObjectReader;
  78. import org.eclipse.jgit.lib.Repository;
  79. import org.eclipse.jgit.lib.RepositoryCache.FileKey;
  80. import org.eclipse.jgit.revwalk.RevCommit;
  81. import org.eclipse.jgit.revwalk.RevTree;
  82. import org.eclipse.jgit.revwalk.RevWalk;
  83. import org.eclipse.jgit.storage.file.FileBasedConfig;
  84. import org.eclipse.jgit.treewalk.EmptyTreeIterator;
  85. import org.eclipse.jgit.treewalk.TreeWalk;
  86. import org.eclipse.jgit.util.FS;
  87. import org.slf4j.Logger;
  88. import org.slf4j.LoggerFactory;
  89. import com.gitblit.Constants.SearchObjectType;
  90. import com.gitblit.GitBlit;
  91. import com.gitblit.IStoredSettings;
  92. import com.gitblit.Keys;
  93. import com.gitblit.manager.FilestoreManager;
  94. import com.gitblit.manager.IFilestoreManager;
  95. import com.gitblit.manager.IRepositoryManager;
  96. import com.gitblit.models.PathModel.PathChangeModel;
  97. import com.gitblit.models.RefModel;
  98. import com.gitblit.models.RepositoryModel;
  99. import com.gitblit.models.SearchResult;
  100. import com.gitblit.utils.ArrayUtils;
  101. import com.gitblit.utils.JGitUtils;
  102. import com.gitblit.utils.StringUtils;
  103. /**
  104. * The Lucene service handles indexing and searching repositories.
  105. *
  106. * @author James Moger
  107. *
  108. */
  109. public class LuceneService implements Runnable {
  110. private static final int INDEX_VERSION = 6;
  111. private static final String FIELD_OBJECT_TYPE = "type";
  112. private static final String FIELD_PATH = "path";
  113. private static final String FIELD_COMMIT = "commit";
  114. private static final String FIELD_BRANCH = "branch";
  115. private static final String FIELD_SUMMARY = "summary";
  116. private static final String FIELD_CONTENT = "content";
  117. private static final String FIELD_AUTHOR = "author";
  118. private static final String FIELD_COMMITTER = "committer";
  119. private static final String FIELD_DATE = "date";
  120. private static final String FIELD_TAG = "tag";
  121. private static final String CONF_FILE = "lucene.conf";
  122. private static final String LUCENE_DIR = "lucene";
  123. private static final String CONF_INDEX = "index";
  124. private static final String CONF_VERSION = "version";
  125. private static final String CONF_ALIAS = "aliases";
  126. private static final String CONF_BRANCH = "branches";
  127. private static final Version LUCENE_VERSION = Version.LUCENE_4_10_0;
  128. private final Logger logger = LoggerFactory.getLogger(LuceneService.class);
  129. private final IStoredSettings storedSettings;
  130. private final IRepositoryManager repositoryManager;
  131. private final IFilestoreManager filestoreManager;
  132. private final File repositoriesFolder;
  133. private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
  134. private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
  135. private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
  136. private Set<String> excludedExtensions;
  137. public LuceneService(
  138. IStoredSettings settings,
  139. IRepositoryManager repositoryManager,
  140. IFilestoreManager filestoreManager) {
  141. this.storedSettings = settings;
  142. this.repositoryManager = repositoryManager;
  143. this.filestoreManager = filestoreManager;
  144. this.repositoriesFolder = repositoryManager.getRepositoriesFolder();
  145. String exts = luceneIgnoreExtensions;
  146. if (settings != null) {
  147. exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
  148. }
  149. excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
  150. }
  151. /**
  152. * Run is executed by the Gitblit executor service. Because this is called
  153. * by an executor service, calls will queue - i.e. there can never be
  154. * concurrent execution of repository index updates.
  155. */
  156. @Override
  157. public void run() {
  158. if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
  159. // Lucene indexing is disabled
  160. return;
  161. }
  162. // reload the excluded extensions
  163. String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
  164. excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
  165. if (repositoryManager.isCollectingGarbage()) {
  166. // busy collecting garbage, try again later
  167. return;
  168. }
  169. for (String repositoryName: repositoryManager.getRepositoryList()) {
  170. RepositoryModel model = repositoryManager.getRepositoryModel(repositoryName);
  171. if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
  172. Repository repository = repositoryManager.getRepository(model.name);
  173. if (repository == null) {
  174. if (repositoryManager.isCollectingGarbage(model.name)) {
  175. logger.info(MessageFormat.format("Skipping Lucene index of {0}, busy garbage collecting", repositoryName));
  176. }
  177. continue;
  178. }
  179. index(model, repository);
  180. repository.close();
  181. System.gc();
  182. }
  183. }
  184. }
  185. /**
  186. * Synchronously indexes a repository. This may build a complete index of a
  187. * repository or it may update an existing index.
  188. *
  189. * @param displayName
  190. * the name of the repository
  191. * @param repository
  192. * the repository object
  193. */
  194. private void index(RepositoryModel model, Repository repository) {
  195. try {
  196. if (shouldReindex(repository)) {
  197. // (re)build the entire index
  198. IndexResult result = reindex(model, repository);
  199. if (result.success) {
  200. if (result.commitCount > 0) {
  201. String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
  202. logger.info(MessageFormat.format(msg, model.name, result.commitCount,
  203. result.blobCount, result.branchCount, result.duration()));
  204. }
  205. } else {
  206. String msg = "Could not build {0} Lucene index!";
  207. logger.error(MessageFormat.format(msg, model.name));
  208. }
  209. } else {
  210. // update the index with latest commits
  211. IndexResult result = updateIndex(model, repository);
  212. if (result.success) {
  213. if (result.commitCount > 0) {
  214. String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
  215. logger.info(MessageFormat.format(msg, model.name, result.commitCount,
  216. result.blobCount, result.branchCount, result.duration()));
  217. }
  218. } else {
  219. String msg = "Could not update {0} Lucene index!";
  220. logger.error(MessageFormat.format(msg, model.name));
  221. }
  222. }
  223. } catch (Throwable t) {
  224. logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
  225. }
  226. }
  227. /**
  228. * Close the writer/searcher objects for a repository.
  229. *
  230. * @param repositoryName
  231. */
  232. public synchronized void close(String repositoryName) {
  233. try {
  234. IndexSearcher searcher = searchers.remove(repositoryName);
  235. if (searcher != null) {
  236. searcher.getIndexReader().close();
  237. }
  238. } catch (Exception e) {
  239. logger.error("Failed to close index searcher for " + repositoryName, e);
  240. }
  241. try {
  242. IndexWriter writer = writers.remove(repositoryName);
  243. if (writer != null) {
  244. writer.close();
  245. }
  246. } catch (Exception e) {
  247. logger.error("Failed to close index writer for " + repositoryName, e);
  248. }
  249. }
  250. /**
  251. * Close all Lucene indexers.
  252. *
  253. */
  254. public synchronized void close() {
  255. // close all writers
  256. for (String writer : writers.keySet()) {
  257. try {
  258. writers.get(writer).close(true);
  259. } catch (Throwable t) {
  260. logger.error("Failed to close Lucene writer for " + writer, t);
  261. }
  262. }
  263. writers.clear();
  264. // close all searchers
  265. for (String searcher : searchers.keySet()) {
  266. try {
  267. searchers.get(searcher).getIndexReader().close();
  268. } catch (Throwable t) {
  269. logger.error("Failed to close Lucene searcher for " + searcher, t);
  270. }
  271. }
  272. searchers.clear();
  273. }
  274. /**
  275. * Deletes the Lucene index for the specified repository.
  276. *
  277. * @param repositoryName
  278. * @return true, if successful
  279. */
  280. public boolean deleteIndex(String repositoryName) {
  281. try {
  282. // close any open writer/searcher
  283. close(repositoryName);
  284. // delete the index folder
  285. File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
  286. File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
  287. if (luceneIndex.exists()) {
  288. org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
  289. org.eclipse.jgit.util.FileUtils.RECURSIVE);
  290. }
  291. // delete the config file
  292. File luceneConfig = new File(repositoryFolder, CONF_FILE);
  293. if (luceneConfig.exists()) {
  294. luceneConfig.delete();
  295. }
  296. return true;
  297. } catch (IOException e) {
  298. throw new RuntimeException(e);
  299. }
  300. }
  301. /**
  302. * Returns the author for the commit, if this information is available.
  303. *
  304. * @param commit
  305. * @return an author or unknown
  306. */
  307. private String getAuthor(RevCommit commit) {
  308. String name = "unknown";
  309. try {
  310. name = commit.getAuthorIdent().getName();
  311. if (StringUtils.isEmpty(name)) {
  312. name = commit.getAuthorIdent().getEmailAddress();
  313. }
  314. } catch (NullPointerException n) {
  315. }
  316. return name;
  317. }
  318. /**
  319. * Returns the committer for the commit, if this information is available.
  320. *
  321. * @param commit
  322. * @return an committer or unknown
  323. */
  324. private String getCommitter(RevCommit commit) {
  325. String name = "unknown";
  326. try {
  327. name = commit.getCommitterIdent().getName();
  328. if (StringUtils.isEmpty(name)) {
  329. name = commit.getCommitterIdent().getEmailAddress();
  330. }
  331. } catch (NullPointerException n) {
  332. }
  333. return name;
  334. }
  335. /**
  336. * Get the tree associated with the given commit.
  337. *
  338. * @param walk
  339. * @param commit
  340. * @return tree
  341. * @throws IOException
  342. */
  343. private RevTree getTree(final RevWalk walk, final RevCommit commit)
  344. throws IOException {
  345. final RevTree tree = commit.getTree();
  346. if (tree != null) {
  347. return tree;
  348. }
  349. walk.parseHeaders(commit);
  350. return commit.getTree();
  351. }
  352. /**
  353. * Construct a keyname from the branch.
  354. *
  355. * @param branchName
  356. * @return a keyname appropriate for the Git config file format
  357. */
  358. private String getBranchKey(String branchName) {
  359. return StringUtils.getSHA1(branchName);
  360. }
  361. /**
  362. * Returns the Lucene configuration for the specified repository.
  363. *
  364. * @param repository
  365. * @return a config object
  366. */
  367. private FileBasedConfig getConfig(Repository repository) {
  368. File file = new File(repository.getDirectory(), CONF_FILE);
  369. FileBasedConfig config = new FileBasedConfig(file, FS.detect());
  370. return config;
  371. }
  372. /**
  373. * Reads the Lucene config file for the repository to check the index
  374. * version. If the index version is different, then rebuild the repository
  375. * index.
  376. *
  377. * @param repository
  378. * @return true of the on-disk index format is different than INDEX_VERSION
  379. */
  380. private boolean shouldReindex(Repository repository) {
  381. try {
  382. FileBasedConfig config = getConfig(repository);
  383. config.load();
  384. int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
  385. // reindex if versions do not match
  386. return indexVersion != INDEX_VERSION;
  387. } catch (Throwable t) {
  388. }
  389. return true;
  390. }
  391. /**
  392. * This completely indexes the repository and will destroy any existing
  393. * index.
  394. *
  395. * @param repositoryName
  396. * @param repository
  397. * @return IndexResult
  398. */
  399. public IndexResult reindex(RepositoryModel model, Repository repository) {
  400. IndexResult result = new IndexResult();
  401. if (!deleteIndex(model.name)) {
  402. return result;
  403. }
  404. try {
  405. String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
  406. FileBasedConfig config = getConfig(repository);
  407. Set<String> indexedCommits = new TreeSet<String>();
  408. IndexWriter writer = getIndexWriter(model.name);
  409. // build a quick lookup of tags
  410. Map<String, List<String>> tags = new HashMap<String, List<String>>();
  411. for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
  412. if (!tag.isAnnotatedTag()) {
  413. // skip non-annotated tags
  414. continue;
  415. }
  416. if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
  417. tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
  418. }
  419. tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
  420. }
  421. ObjectReader reader = repository.newObjectReader();
  422. // get the local branches
  423. List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
  424. // sort them by most recently updated
  425. Collections.sort(branches, new Comparator<RefModel>() {
  426. @Override
  427. public int compare(RefModel ref1, RefModel ref2) {
  428. return ref2.getDate().compareTo(ref1.getDate());
  429. }
  430. });
  431. // reorder default branch to first position
  432. RefModel defaultBranch = null;
  433. ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
  434. for (RefModel branch : branches) {
  435. if (branch.getObjectId().equals(defaultBranchId)) {
  436. defaultBranch = branch;
  437. break;
  438. }
  439. }
  440. branches.remove(defaultBranch);
  441. branches.add(0, defaultBranch);
  442. // walk through each branch
  443. for (RefModel branch : branches) {
  444. boolean indexBranch = false;
  445. if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
  446. && branch.equals(defaultBranch)) {
  447. // indexing "default" branch
  448. indexBranch = true;
  449. } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
  450. // skip internal meta branches
  451. indexBranch = false;
  452. } else {
  453. // normal explicit branch check
  454. indexBranch = model.indexedBranches.contains(branch.getName());
  455. }
  456. // if this branch is not specifically indexed then skip
  457. if (!indexBranch) {
  458. continue;
  459. }
  460. String branchName = branch.getName();
  461. RevWalk revWalk = new RevWalk(reader);
  462. RevCommit tip = revWalk.parseCommit(branch.getObjectId());
  463. String tipId = tip.getId().getName();
  464. String keyName = getBranchKey(branchName);
  465. config.setString(CONF_ALIAS, null, keyName, branchName);
  466. config.setString(CONF_BRANCH, null, keyName, tipId);
  467. // index the blob contents of the tree
  468. TreeWalk treeWalk = new TreeWalk(repository);
  469. treeWalk.addTree(tip.getTree());
  470. treeWalk.setRecursive(true);
  471. Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
  472. while (treeWalk.next()) {
  473. // ensure path is not in a submodule
  474. if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
  475. paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
  476. }
  477. }
  478. ByteArrayOutputStream os = new ByteArrayOutputStream();
  479. byte[] tmp = new byte[32767];
  480. RevWalk commitWalk = new RevWalk(reader);
  481. commitWalk.markStart(tip);
  482. RevCommit commit;
  483. while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
  484. TreeWalk diffWalk = new TreeWalk(reader);
  485. int parentCount = commit.getParentCount();
  486. switch (parentCount) {
  487. case 0:
  488. diffWalk.addTree(new EmptyTreeIterator());
  489. break;
  490. case 1:
  491. diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
  492. break;
  493. default:
  494. // skip merge commits
  495. continue;
  496. }
  497. diffWalk.addTree(getTree(commitWalk, commit));
  498. diffWalk.setFilter(ANY_DIFF);
  499. diffWalk.setRecursive(true);
  500. while ((paths.size() > 0) && diffWalk.next()) {
  501. String path = diffWalk.getPathString();
  502. if (!paths.containsKey(path)) {
  503. continue;
  504. }
  505. //TODO: Figure out filestore oid the path - bit more involved than updating the index
  506. // remove path from set
  507. ObjectId blobId = paths.remove(path);
  508. result.blobCount++;
  509. // index the blob metadata
  510. String blobAuthor = getAuthor(commit);
  511. String blobCommitter = getCommitter(commit);
  512. String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
  513. Resolution.MINUTE);
  514. Document doc = new Document();
  515. doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
  516. doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
  517. doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
  518. doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
  519. doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
  520. doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
  521. doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));
  522. // determine extension to compare to the extension
  523. // blacklist
  524. String ext = null;
  525. String name = path.toLowerCase();
  526. if (name.indexOf('.') > -1) {
  527. ext = name.substring(name.lastIndexOf('.') + 1);
  528. }
  529. // index the blob content
  530. if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
  531. ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
  532. InputStream in = ldr.openStream();
  533. int n;
  534. while ((n = in.read(tmp)) > 0) {
  535. os.write(tmp, 0, n);
  536. }
  537. in.close();
  538. byte[] content = os.toByteArray();
  539. String str = StringUtils.decodeString(content, encodings);
  540. doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
  541. os.reset();
  542. }
  543. // add the blob to the index
  544. writer.addDocument(doc);
  545. }
  546. }
  547. os.close();
  548. // index the tip commit object
  549. if (indexedCommits.add(tipId)) {
  550. Document doc = createDocument(tip, tags.get(tipId));
  551. doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
  552. writer.addDocument(doc);
  553. result.commitCount += 1;
  554. result.branchCount += 1;
  555. }
  556. // traverse the log and index the previous commit objects
  557. RevWalk historyWalk = new RevWalk(reader);
  558. historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
  559. RevCommit rev;
  560. while ((rev = historyWalk.next()) != null) {
  561. String hash = rev.getId().getName();
  562. if (indexedCommits.add(hash)) {
  563. Document doc = createDocument(rev, tags.get(hash));
  564. doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
  565. writer.addDocument(doc);
  566. result.commitCount += 1;
  567. }
  568. }
  569. }
  570. // finished
  571. reader.close();
  572. // commit all changes and reset the searcher
  573. config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
  574. config.save();
  575. writer.commit();
  576. resetIndexSearcher(model.name);
  577. result.success();
  578. } catch (Exception e) {
  579. logger.error("Exception while reindexing " + model.name, e);
  580. }
  581. return result;
  582. }
  583. /**
  584. * Incrementally update the index with the specified commit for the
  585. * repository.
  586. *
  587. * @param repositoryName
  588. * @param repository
  589. * @param branch
  590. * the fully qualified branch name (e.g. refs/heads/master)
  591. * @param commit
  592. * @return true, if successful
  593. */
  594. private IndexResult index(String repositoryName, Repository repository,
  595. String branch, RevCommit commit) {
  596. IndexResult result = new IndexResult();
  597. try {
  598. String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
  599. List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
  600. String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
  601. Resolution.MINUTE);
  602. IndexWriter writer = getIndexWriter(repositoryName);
  603. for (PathChangeModel path : changedPaths) {
  604. if (path.isSubmodule()) {
  605. continue;
  606. }
  607. // delete the indexed blob
  608. deleteBlob(repositoryName, branch, path.name);
  609. // re-index the blob
  610. if (!ChangeType.DELETE.equals(path.changeType)) {
  611. result.blobCount++;
  612. Document doc = new Document();
  613. doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
  614. doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
  615. doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
  616. doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
  617. doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
  618. doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
  619. doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
  620. // determine extension to compare to the extension
  621. // blacklist
  622. String ext = null;
  623. String name = path.name.toLowerCase();
  624. if (name.indexOf('.') > -1) {
  625. ext = name.substring(name.lastIndexOf('.') + 1);
  626. }
  627. if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
  628. String str = "";
  629. // read the blob content
  630. if (path.isFilestoreItem()) {
  631. //Get file from filestore
  632. BodyContentHandler handler = new BodyContentHandler();
  633. Metadata metadata = new Metadata();
  634. PDFParser parser = new PDFParser();
  635. ParseContext parseContext = new ParseContext();
  636. File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid());
  637. FileInputStream inputstream = new FileInputStream(lfsFile);
  638. parser.parse(inputstream, handler, metadata, parseContext);
  639. str = handler.toString();
  640. } else {
  641. str = JGitUtils.getStringContent(repository, commit.getTree(),
  642. path.path, encodings);
  643. }
  644. if (str != null) {
  645. doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
  646. writer.addDocument(doc);
  647. }
  648. }
  649. }
  650. }
  651. writer.commit();
  652. // get any annotated commit tags
  653. List<String> commitTags = new ArrayList<String>();
  654. for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
  655. if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
  656. commitTags.add(ref.displayName);
  657. }
  658. }
  659. // create and write the Lucene document
  660. Document doc = createDocument(commit, commitTags);
  661. doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
  662. result.commitCount++;
  663. result.success = index(repositoryName, doc);
  664. } catch (Exception e) {
  665. logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
  666. }
  667. return result;
  668. }
  669. /**
  670. * Delete a blob from the specified branch of the repository index.
  671. *
  672. * @param repositoryName
  673. * @param branch
  674. * @param path
  675. * @throws Exception
  676. * @return true, if deleted, false if no record was deleted
  677. */
  678. public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
  679. String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
  680. String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);
  681. BooleanQuery query = new BooleanQuery();
  682. StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
  683. QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
  684. query.add(qp.parse(q), Occur.MUST);
  685. IndexWriter writer = getIndexWriter(repositoryName);
  686. int numDocsBefore = writer.numDocs();
  687. writer.deleteDocuments(query);
  688. writer.commit();
  689. int numDocsAfter = writer.numDocs();
  690. if (numDocsBefore == numDocsAfter) {
  691. logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
  692. return false;
  693. } else {
  694. logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
  695. return true;
  696. }
  697. }
  698. /**
  699. * Updates a repository index incrementally from the last indexed commits.
  700. *
  701. * @param model
  702. * @param repository
  703. * @return IndexResult
  704. */
  705. private IndexResult updateIndex(RepositoryModel model, Repository repository) {
  706. IndexResult result = new IndexResult();
  707. try {
  708. FileBasedConfig config = getConfig(repository);
  709. config.load();
  710. // build a quick lookup of annotated tags
  711. Map<String, List<String>> tags = new HashMap<String, List<String>>();
  712. for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
  713. if (!tag.isAnnotatedTag()) {
  714. // skip non-annotated tags
  715. continue;
  716. }
  717. if (!tags.containsKey(tag.getObjectId().getName())) {
  718. tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
  719. }
  720. tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
  721. }
  722. // detect branch deletion
  723. // first assume all branches are deleted and then remove each
  724. // existing branch from deletedBranches during indexing
  725. Set<String> deletedBranches = new TreeSet<String>();
  726. for (String alias : config.getNames(CONF_ALIAS)) {
  727. String branch = config.getString(CONF_ALIAS, null, alias);
  728. deletedBranches.add(branch);
  729. }
  730. // get the local branches
  731. List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
  732. // sort them by most recently updated
  733. Collections.sort(branches, new Comparator<RefModel>() {
  734. @Override
  735. public int compare(RefModel ref1, RefModel ref2) {
  736. return ref2.getDate().compareTo(ref1.getDate());
  737. }
  738. });
  739. // reorder default branch to first position
  740. RefModel defaultBranch = null;
  741. ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
  742. for (RefModel branch : branches) {
  743. if (branch.getObjectId().equals(defaultBranchId)) {
  744. defaultBranch = branch;
  745. break;
  746. }
  747. }
  748. branches.remove(defaultBranch);
  749. branches.add(0, defaultBranch);
  750. // walk through each branches
  751. for (RefModel branch : branches) {
  752. String branchName = branch.getName();
  753. boolean indexBranch = false;
  754. if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
  755. && branch.equals(defaultBranch)) {
  756. // indexing "default" branch
  757. indexBranch = true;
  758. } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
  759. // ignore internal meta branches
  760. indexBranch = false;
  761. } else {
  762. // normal explicit branch check
  763. indexBranch = model.indexedBranches.contains(branch.getName());
  764. }
  765. // if this branch is not specifically indexed then skip
  766. if (!indexBranch) {
  767. continue;
  768. }
  769. // remove this branch from the deletedBranches set
  770. deletedBranches.remove(branchName);
  771. // determine last commit
  772. String keyName = getBranchKey(branchName);
  773. String lastCommit = config.getString(CONF_BRANCH, null, keyName);
  774. List<RevCommit> revs;
  775. if (StringUtils.isEmpty(lastCommit)) {
  776. // new branch/unindexed branch, get all commits on branch
  777. revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
  778. } else {
  779. // pre-existing branch, get changes since last commit
  780. revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
  781. }
  782. if (revs.size() > 0) {
  783. result.branchCount += 1;
  784. }
  785. // reverse the list of commits so we start with the first commit
  786. Collections.reverse(revs);
  787. for (RevCommit commit : revs) {
  788. // index a commit
  789. result.add(index(model.name, repository, branchName, commit));
  790. }
  791. // update the config
  792. config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
  793. config.setString(CONF_ALIAS, null, keyName, branchName);
  794. config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
  795. config.save();
  796. }
  797. // the deletedBranches set will normally be empty by this point
  798. // unless a branch really was deleted and no longer exists
  799. if (deletedBranches.size() > 0) {
  800. for (String branch : deletedBranches) {
  801. IndexWriter writer = getIndexWriter(model.name);
  802. writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
  803. writer.commit();
  804. }
  805. }
  806. result.success = true;
  807. } catch (Throwable t) {
  808. logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
  809. }
  810. return result;
  811. }
  812. /**
  813. * Creates a Lucene document for a commit
  814. *
  815. * @param commit
  816. * @param tags
  817. * @return a Lucene document
  818. */
  819. private Document createDocument(RevCommit commit, List<String> tags) {
  820. Document doc = new Document();
  821. doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), StringField.TYPE_STORED));
  822. doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
  823. doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
  824. Resolution.MINUTE), StringField.TYPE_STORED));
  825. doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
  826. doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
  827. doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), TextField.TYPE_STORED));
  828. doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), TextField.TYPE_STORED));
  829. if (!ArrayUtils.isEmpty(tags)) {
  830. doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), TextField.TYPE_STORED));
  831. }
  832. return doc;
  833. }
  834. /**
  835. * Incrementally index an object for the repository.
  836. *
  837. * @param repositoryName
  838. * @param doc
  839. * @return true, if successful
  840. */
  841. private boolean index(String repositoryName, Document doc) {
  842. try {
  843. IndexWriter writer = getIndexWriter(repositoryName);
  844. writer.addDocument(doc);
  845. writer.commit();
  846. resetIndexSearcher(repositoryName);
  847. return true;
  848. } catch (Exception e) {
  849. logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
  850. }
  851. return false;
  852. }
  853. private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
  854. SearchResult result = new SearchResult();
  855. result.hitId = hitId;
  856. result.totalHits = totalHits;
  857. result.score = score;
  858. result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
  859. result.summary = doc.get(FIELD_SUMMARY);
  860. result.author = doc.get(FIELD_AUTHOR);
  861. result.committer = doc.get(FIELD_COMMITTER);
  862. result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
  863. result.branch = doc.get(FIELD_BRANCH);
  864. result.commitId = doc.get(FIELD_COMMIT);
  865. result.path = doc.get(FIELD_PATH);
  866. if (doc.get(FIELD_TAG) != null) {
  867. result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
  868. }
  869. return result;
  870. }
  871. private synchronized void resetIndexSearcher(String repository) throws IOException {
  872. IndexSearcher searcher = searchers.remove(repository);
  873. if (searcher != null) {
  874. searcher.getIndexReader().close();
  875. }
  876. }
  877. /**
  878. * Gets an index searcher for the repository.
  879. *
  880. * @param repository
  881. * @return
  882. * @throws IOException
  883. */
  884. private IndexSearcher getIndexSearcher(String repository) throws IOException {
  885. IndexSearcher searcher = searchers.get(repository);
  886. if (searcher == null) {
  887. IndexWriter writer = getIndexWriter(repository);
  888. searcher = new IndexSearcher(DirectoryReader.open(writer, true));
  889. searchers.put(repository, searcher);
  890. }
  891. return searcher;
  892. }
  893. /**
  894. * Gets an index writer for the repository. The index will be created if it
  895. * does not already exist or if forceCreate is specified.
  896. *
  897. * @param repository
  898. * @return an IndexWriter
  899. * @throws IOException
  900. */
  901. private IndexWriter getIndexWriter(String repository) throws IOException {
  902. IndexWriter indexWriter = writers.get(repository);
  903. File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
  904. File indexFolder = new File(repositoryFolder, LUCENE_DIR);
  905. Directory directory = FSDirectory.open(indexFolder);
  906. if (indexWriter == null) {
  907. if (!indexFolder.exists()) {
  908. indexFolder.mkdirs();
  909. }
  910. StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
  911. IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
  912. config.setOpenMode(OpenMode.CREATE_OR_APPEND);
  913. indexWriter = new IndexWriter(directory, config);
  914. writers.put(repository, indexWriter);
  915. }
  916. return indexWriter;
  917. }
  918. /**
  919. * Searches the specified repositories for the given text or query
  920. *
  921. * @param text
  922. * if the text is null or empty, null is returned
  923. * @param page
  924. * the page number to retrieve. page is 1-indexed.
  925. * @param pageSize
  926. * the number of elements to return for this page
  927. * @param repositories
  928. * a list of repositories to search. if no repositories are
  929. * specified null is returned.
  930. * @return a list of SearchResults in order from highest to the lowest score
  931. *
  932. */
  933. public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
  934. if (ArrayUtils.isEmpty(repositories)) {
  935. return null;
  936. }
  937. return search(text, page, pageSize, repositories.toArray(new String[0]));
  938. }
  939. /**
  940. * Searches the specified repositories for the given text or query
  941. *
  942. * @param text
  943. * if the text is null or empty, null is returned
  944. * @param page
  945. * the page number to retrieve. page is 1-indexed.
  946. * @param pageSize
  947. * the number of elements to return for this page
  948. * @param repositories
  949. * a list of repositories to search. if no repositories are
  950. * specified null is returned.
  951. * @return a list of SearchResults in order from highest to the lowest score
  952. *
  953. */
  954. public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
  955. if (StringUtils.isEmpty(text)) {
  956. return null;
  957. }
  958. if (ArrayUtils.isEmpty(repositories)) {
  959. return null;
  960. }
  961. Set<SearchResult> results = new LinkedHashSet<SearchResult>();
  962. StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
  963. try {
  964. // default search checks summary and content
  965. BooleanQuery query = new BooleanQuery();
  966. QueryParser qp;
  967. qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
  968. qp.setAllowLeadingWildcard(true);
  969. query.add(qp.parse(text), Occur.SHOULD);
  970. qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
  971. qp.setAllowLeadingWildcard(true);
  972. query.add(qp.parse(text), Occur.SHOULD);
  973. IndexSearcher searcher;
  974. if (repositories.length == 1) {
  975. // single repository search
  976. searcher = getIndexSearcher(repositories[0]);
  977. } else {
  978. // multiple repository search
  979. List<IndexReader> readers = new ArrayList<IndexReader>();
  980. for (String repository : repositories) {
  981. IndexSearcher repositoryIndex = getIndexSearcher(repository);
  982. readers.add(repositoryIndex.getIndexReader());
  983. }
  984. IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
  985. MultiSourceReader reader = new MultiSourceReader(rdrs);
  986. searcher = new IndexSearcher(reader);
  987. }
  988. Query rewrittenQuery = searcher.rewrite(query);
  989. logger.debug(rewrittenQuery.toString());
  990. TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
  991. searcher.search(rewrittenQuery, collector);
  992. int offset = Math.max(0, (page - 1) * pageSize);
  993. ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
  994. int totalHits = collector.getTotalHits();
  995. for (int i = 0; i < hits.length; i++) {
  996. int docId = hits[i].doc;
  997. Document doc = searcher.doc(docId);
  998. SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
  999. if (repositories.length == 1) {
  1000. // single repository search
  1001. result.repository = repositories[0];
  1002. } else {
  1003. // multi-repository search
  1004. MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
  1005. int index = reader.getSourceIndex(docId);
  1006. result.repository = repositories[index];
  1007. }
  1008. String content = doc.get(FIELD_CONTENT);
  1009. result.fragment = getHighlightedFragment(analyzer, query, content, result);
  1010. results.add(result);
  1011. }
  1012. } catch (Exception e) {
  1013. logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
  1014. }
  1015. return new ArrayList<SearchResult>(results);
  1016. }
  1017. /**
  1018. *
  1019. * @param analyzer
  1020. * @param query
  1021. * @param content
  1022. * @param result
  1023. * @return
  1024. * @throws IOException
  1025. * @throws InvalidTokenOffsetsException
  1026. */
  1027. private String getHighlightedFragment(Analyzer analyzer, Query query,
  1028. String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
  1029. if (content == null) {
  1030. content = "";
  1031. }
  1032. int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4);
  1033. int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
  1034. QueryScorer scorer = new QueryScorer(query, "content");
  1035. Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);
  1036. // use an artificial delimiter for the token
  1037. String termTag = "!!--[";
  1038. String termTagEnd = "]--!!";
  1039. SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
  1040. Highlighter highlighter = new Highlighter(formatter, scorer);
  1041. highlighter.setTextFragmenter(fragmenter);
  1042. String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
  1043. if (ArrayUtils.isEmpty(fragments)) {
  1044. if (SearchObjectType.blob == result.type) {
  1045. return "";
  1046. }
  1047. // clip commit message
  1048. String fragment = content;
  1049. if (fragment.length() > fragmentLength) {
  1050. fragment = fragment.substring(0, fragmentLength) + "...";
  1051. }
  1052. return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>";
  1053. }
  1054. // make sure we have unique fragments
  1055. Set<String> uniqueFragments = new LinkedHashSet<String>();
  1056. for (String fragment : fragments) {
  1057. uniqueFragments.add(fragment);
  1058. }
  1059. fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);
  1060. StringBuilder sb = new StringBuilder();
  1061. for (int i = 0, len = fragments.length; i < len; i++) {
  1062. String fragment = fragments[i];
  1063. String tag = "<pre class=\"text\">";
  1064. // resurrect the raw fragment from removing the artificial delimiters
  1065. String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
  1066. // determine position of the raw fragment in the content
  1067. int pos = content.indexOf(raw);
  1068. // restore complete first line of fragment
  1069. int c = pos;
  1070. while (c > 0) {
  1071. c--;
  1072. if (content.charAt(c) == '\n') {
  1073. break;
  1074. }
  1075. }
  1076. if (c > 0) {
  1077. // inject leading chunk of first fragment line
  1078. fragment = content.substring(c + 1, pos) + fragment;
  1079. }
  1080. if (SearchObjectType.blob == result.type) {
  1081. // count lines as offset into the content for this fragment
  1082. int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
  1083. // create fragment tag with line number and language
  1084. String lang = "";
  1085. String ext = StringUtils.getFileExtension(result.path).toLowerCase();
  1086. if (!StringUtils.isEmpty(ext)) {
  1087. // maintain leading space!
  1088. lang = " lang-" + ext;
  1089. }
  1090. tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
  1091. }
  1092. sb.append(tag);
  1093. // replace the artificial delimiter with html tags
  1094. String html = StringUtils.escapeForHtml(fragment, false);
  1095. html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
  1096. sb.append(html);
  1097. sb.append("</pre>");
  1098. if (i < len - 1) {
  1099. sb.append("<span class=\"ellipses\">...</span><br/>");
  1100. }
  1101. }
  1102. return sb.toString();
  1103. }
  1104. /**
  1105. * Simple class to track the results of an index update.
  1106. */
  1107. private class IndexResult {
  1108. long startTime = System.currentTimeMillis();
  1109. long endTime = startTime;
  1110. boolean success;
  1111. int branchCount;
  1112. int commitCount;
  1113. int blobCount;
  1114. void add(IndexResult result) {
  1115. this.branchCount += result.branchCount;
  1116. this.commitCount += result.commitCount;
  1117. this.blobCount += result.blobCount;
  1118. }
  1119. void success() {
  1120. success = true;
  1121. endTime = System.currentTimeMillis();
  1122. }
  1123. float duration() {
  1124. return (endTime - startTime)/1000f;
  1125. }
  1126. }
  1127. /**
  1128. * Custom subclass of MultiReader to identify the source index for a given
  1129. * doc id. This would not be necessary of there was a public method to
  1130. * obtain this information.
  1131. *
  1132. */
  1133. private class MultiSourceReader extends MultiReader {
  1134. MultiSourceReader(IndexReader [] readers) {
  1135. super(readers, false);
  1136. }
  1137. int getSourceIndex(int docId) {
  1138. int index = -1;
  1139. try {
  1140. index = super.readerIndex(docId);
  1141. } catch (Exception e) {
  1142. logger.error("Error getting source index", e);
  1143. }
  1144. return index;
  1145. }
  1146. }
  1147. }