summaryrefslogtreecommitdiffstats
path: root/src/com/gitblit/utils/LuceneUtils.java
blob: d463cdf1fdaffd64b20600f30ae000f1a55ff2cd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
package com.gitblit.utils;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.file.FileBasedConfig;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.util.FS;

import com.gitblit.models.IssueModel;
import com.gitblit.models.IssueModel.Attachment;
import com.gitblit.models.PathModel.PathChangeModel;
import com.gitblit.models.RefModel;
import com.gitblit.models.SearchResult;

/**
 * A collection of utility methods for indexing and querying a Lucene repository
 * index.
 * 
 * @author James Moger
 * 
 */
public class LuceneUtils {

	/**
	 * The types of objects that can be indexed and queried.
	 */
	public static enum ObjectType {
		commit, blob, issue;

		static ObjectType fromName(String name) {
			for (ObjectType value : values()) {
				if (value.name().equals(name)) {
					return value;
				}
			}
			return null;
		}
	}

	private static final Version LUCENE_VERSION = Version.LUCENE_35;
	private static final int INDEX_VERSION = 1;

	private static final String FIELD_OBJECT_TYPE = "type";
	private static final String FIELD_OBJECT_ID = "id";
	private static final String FIELD_BRANCH = "branch";
	private static final String FIELD_REPOSITORY = "repository";
	private static final String FIELD_SUMMARY = "summary";
	private static final String FIELD_CONTENT = "content";
	private static final String FIELD_AUTHOR = "author";
	private static final String FIELD_COMMITTER = "committer";
	private static final String FIELD_DATE = "date";
	private static final String FIELD_LABEL = "label";
	private static final String FIELD_ATTACHMENT = "attachment";

	private static Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",
			"arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
			"lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));

	private static Set<String> excludedBranches = new TreeSet<String>(
			Arrays.asList("/refs/heads/gb-issues"));

	private static final Map<File, IndexSearcher> SEARCHERS = new ConcurrentHashMap<File, IndexSearcher>();
	private static final Map<File, IndexWriter> WRITERS = new ConcurrentHashMap<File, IndexWriter>();

	private static final String CONF_FILE = "lucene.conf";
	private static final String CONF_INDEX = "index";
	private static final String CONF_VERSION = "version";
	private static final String CONF_ALIAS = "aliases";
	private static final String CONF_BRANCH = "branches";

	/**
	 * Returns the name of the repository.
	 * 
	 * @param repository
	 * @return the repository name
	 */
	private static String getName(Repository repository) {
		if (repository.isBare()) {
			return repository.getDirectory().getName();
		} else {
			return repository.getDirectory().getParentFile().getName();
		}
	}

	/**
	 * Construct a keyname from the branch.
	 * 
	 * @param branchName
	 * @return a keyname appropriate for the Git config file format
	 */
	private static String getBranchKey(String branchName) {
		return StringUtils.getSHA1(branchName);
	}

	/**
	 * Returns the Lucene configuration for the specified repository.
	 * 
	 * @param repository
	 * @return a config object
	 */
	private static FileBasedConfig getConfig(Repository repository) {
		File file = new File(repository.getDirectory(), CONF_FILE);
		FileBasedConfig config = new FileBasedConfig(file, FS.detect());
		return config;
	}

	/**
	 * Reads the Lucene config file for the repository to check the index
	 * version. If the index version is different, then rebuild the repository
	 * index.
	 * 
	 * @param repository
	 * @return true of the on-disk index format is different than INDEX_VERSION
	 */
	public static boolean shouldReindex(Repository repository) {
		try {
			FileBasedConfig config = getConfig(repository);
			config.load();
			int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
			// reindex if versions do not match
			return indexVersion != INDEX_VERSION;
		} catch (Throwable t) {
		}
		return true;
	}

	/**
	 * Deletes the Lucene index for the specified repository.
	 * 
	 * @param repository
	 * @return true, if successful
	 */
	public static boolean deleteIndex(Repository repository) {
		try {
			File luceneIndex = new File(repository.getDirectory(), "lucene");
			if (luceneIndex.exists()) {
				org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
						org.eclipse.jgit.util.FileUtils.RECURSIVE);
			}
			File luceneConfig = new File(repository.getDirectory(), CONF_FILE);
			if (luceneConfig.exists()) {
				luceneConfig.delete();
			}
			return true;
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}

	/**
	 * This completely indexes the repository and will destroy any existing
	 * index.
	 * 
	 * @param repository
	 * @return true if the indexing has succeeded
	 */
	public static boolean reindex(Repository repository) {
		if (!LuceneUtils.deleteIndex(repository)) {
			return false;
		}
		try {
			String repositoryName = getName(repository);
			FileBasedConfig config = getConfig(repository);
			Set<String> indexedCommits = new TreeSet<String>();
			IndexWriter writer = getIndexWriter(repository, true);
			// build a quick lookup of tags
			Map<String, List<String>> tags = new HashMap<String, List<String>>();
			for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
				if (!tag.isAnnotatedTag()) {
					// skip non-annotated tags
					continue;
				}
				if (!tags.containsKey(tag.getObjectId())) {
					tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
				}
				tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
			}

			// walk through each branch
			List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
			for (RefModel branch : branches) {
				if (excludedBranches.contains(branch.getName())) {
					continue;
				}
				String branchName = branch.getName();
				RevWalk revWalk = new RevWalk(repository);
				RevCommit rev = revWalk.parseCommit(branch.getObjectId());

				String keyName = getBranchKey(branchName);
				config.setString(CONF_ALIAS, null, keyName, branchName);
				config.setString(CONF_BRANCH, null, keyName, rev.getName());

				// index the blob contents of the tree
				ByteArrayOutputStream os = new ByteArrayOutputStream();
				byte[] tmp = new byte[32767];
				TreeWalk treeWalk = new TreeWalk(repository);
				treeWalk.addTree(rev.getTree());
				treeWalk.setRecursive(true);
				String revDate = DateTools.timeToString(rev.getCommitTime() * 1000L,
						Resolution.MINUTE);
				while (treeWalk.next()) {
					Document doc = new Document();
					doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES,
							Index.NOT_ANALYZED_NO_NORMS));
					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
							Index.NOT_ANALYZED));
					doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
					doc.add(new Field(FIELD_OBJECT_ID, treeWalk.getPathString(), Store.YES,
							Index.NOT_ANALYZED));
					doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
					doc.add(new Field(FIELD_AUTHOR, rev.getAuthorIdent().getName(), Store.YES,
							Index.NOT_ANALYZED_NO_NORMS));
					doc.add(new Field(FIELD_COMMITTER, rev.getCommitterIdent().getName(),
							Store.YES, Index.NOT_ANALYZED_NO_NORMS));
					doc.add(new Field(FIELD_LABEL, branch.getName(), Store.YES, Index.ANALYZED));

					// determine extension to compare to the extension
					// blacklist
					String ext = null;
					String name = treeWalk.getPathString().toLowerCase();
					if (name.indexOf('.') > -1) {
						ext = name.substring(name.lastIndexOf('.') + 1);
					}

					if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
						// read the blob content
						ObjectId entid = treeWalk.getObjectId(0);
						FileMode entmode = treeWalk.getFileMode(0);
						RevObject ro = revWalk.lookupAny(entid, entmode.getObjectType());
						revWalk.parseBody(ro);
						ObjectLoader ldr = repository.open(ro.getId(), Constants.OBJ_BLOB);
						InputStream in = ldr.openStream();
						os.reset();
						int n = 0;
						while ((n = in.read(tmp)) > 0) {
							os.write(tmp, 0, n);
						}
						in.close();
						byte[] content = os.toByteArray();
						String str = new String(content, "UTF-8");
						doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));
						writer.addDocument(doc);
					}
				}

				os.close();
				treeWalk.release();

				// index the head commit object
				String head = rev.getId().getName();
				if (indexedCommits.add(head)) {
					Document doc = createDocument(rev, tags.get(head));
					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
							Index.NOT_ANALYZED));
					doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
					writer.addDocument(doc);
				}

				// traverse the log and index the previous commit objects
				revWalk.markStart(rev);
				while ((rev = revWalk.next()) != null) {
					String hash = rev.getId().getName();
					if (indexedCommits.add(hash)) {
						Document doc = createDocument(rev, tags.get(hash));
						doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
								Index.NOT_ANALYZED));
						doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.NOT_ANALYZED));
						writer.addDocument(doc);
					}
				}

				// finished
				revWalk.dispose();
			}

			// this repository has a gb-issues branch, index all issues
			if (IssueUtils.getIssuesBranch(repository) != null) {
				List<IssueModel> issues = IssueUtils.getIssues(repository, null);
				for (IssueModel issue : issues) {
					Document doc = createDocument(issue);
					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
							Index.NOT_ANALYZED));
					writer.addDocument(doc);
				}
			}

			// commit all changes and reset the searcher
			config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
			config.save();
			resetIndexSearcher(repository);
			writer.commit();
			return true;
		} catch (Exception e) {
			e.printStackTrace();
		}
		return false;
	}

	/**
	 * Incrementally update the index with the specified commit for the
	 * repository.
	 * 
	 * @param repository
	 * @param branch
	 *            the fully qualified branch name (e.g. refs/heads/master)
	 * @param commit
	 * @return true, if successful
	 */
	public static boolean index(Repository repository, String branch, RevCommit commit) {
		try {
			if (excludedBranches.contains(branch)) {
				if (IssueUtils.GB_ISSUES.equals(branch)) {
					// index an issue
					String issueId = commit.getShortMessage().substring(2).trim();
					IssueModel issue = IssueUtils.getIssue(repository, issueId);
					if (issue == null) {
						// delete the old issue from the index, if exists
						IndexWriter writer = getIndexWriter(repository, false);
						writer.deleteDocuments(
								new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
										FIELD_OBJECT_ID, issueId));
						writer.commit();
						return true;
					}
					return index(repository, issue);
				}
				return false;
			}
			List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
			String repositoryName = getName(repository);
			String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
					Resolution.MINUTE);
			IndexWriter writer = getIndexWriter(repository, false);
			for (PathChangeModel path : changedPaths) {
				// delete the indexed blob
				writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.blob.name()),
						new Term(FIELD_BRANCH, branch), new Term(FIELD_OBJECT_ID, path.path));

				// re-index the blob
				if (!ChangeType.DELETE.equals(path.changeType)) {
					Document doc = new Document();
					doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.blob.name(), Store.YES,
							Index.NOT_ANALYZED_NO_NORMS));
					doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES,
							Index.NOT_ANALYZED));
					doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.NOT_ANALYZED));
					doc.add(new Field(FIELD_OBJECT_ID, path.path, Store.YES, Index.NOT_ANALYZED));
					doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
					doc.add(new Field(FIELD_AUTHOR, commit.getAuthorIdent().getName(), Store.YES,
							Index.NOT_ANALYZED_NO_NORMS));
					doc.add(new Field(FIELD_COMMITTER, commit.getCommitterIdent().getName(),
							Store.YES, Index.NOT_ANALYZED_NO_NORMS));
					doc.add(new Field(FIELD_LABEL, branch, Store.YES, Index.ANALYZED));

					// determine extension to compare to the extension
					// blacklist
					String ext = null;
					String name = path.name.toLowerCase();
					if (name.indexOf('.') > -1) {
						ext = name.substring(name.lastIndexOf('.') + 1);
					}

					if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
						// read the blob content
						String str = JGitUtils.getStringContent(repository, commit.getTree(),
								path.path);
						doc.add(new Field(FIELD_CONTENT, str, Store.NO, Index.ANALYZED));
						writer.addDocument(doc);
					}
				}
			}
			writer.commit();

			Document doc = createDocument(commit, null);
			return index(repository, doc);
		} catch (Exception e) {
			e.printStackTrace();
		}
		return false;
	}

	/**
	 * Incrementally update the index with the specified issue for the
	 * repository.
	 * 
	 * @param repository
	 * @param issue
	 * @return true, if successful
	 */
	public static boolean index(Repository repository, IssueModel issue) {
		try {
			// delete the old issue from the index, if exists
			IndexWriter writer = getIndexWriter(repository, false);
			writer.deleteDocuments(new Term(FIELD_OBJECT_TYPE, ObjectType.issue.name()), new Term(
					FIELD_OBJECT_ID, String.valueOf(issue.id)));
			writer.commit();

			Document doc = createDocument(issue);
			return index(repository, doc);
		} catch (Exception e) {
			e.printStackTrace();
		}
		return false;
	}

	/**
	 * Updates a repository index incrementally from the last indexed commits.
	 * 
	 * @param repository
	 */
	public static boolean updateIndex(Repository repository) {
		boolean success = false;
		try {
			FileBasedConfig config = getConfig(repository);
			config.load();

			// build a quick lookup of annotated tags
			Map<String, List<String>> tags = new HashMap<String, List<String>>();
			for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
				if (!tag.isAnnotatedTag()) {
					// skip non-annotated tags
					continue;
				}
				if (!tags.containsKey(tag.getObjectId())) {
					tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
				}
				tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
			}

			// detect branch deletion
			// first assume all branches are deleted and then remove each
			// existing branch from deletedBranches during indexing			
			Set<String> deletedBranches = new TreeSet<String>();
			for (String alias : config.getNames(CONF_ALIAS)) {
				String branch = config.getString(CONF_ALIAS, null, alias);
				deletedBranches.add(branch);
			}
			
			// walk through each branches
			List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
			for (RefModel branch : branches) {
				String branchName = branch.getName();

				// remove this branch from the deletedBranches set
				deletedBranches.remove(branchName);

				// determine last commit				
				String keyName = getBranchKey(branchName);
				String lastCommit = config.getString(CONF_BRANCH, null, keyName);

				List<RevCommit> revs;
				if (StringUtils.isEmpty(lastCommit)) {
					// new branch/unindexed branch, get all commits on branch
					revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
				} else {
					// pre-existing branch, get changes since last commit
					revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
				}

				// reverse the list of commits so we start with the first commit
				Collections.reverse(revs);
				for (RevCommit commit : revs) {
					index(repository, branchName, commit);
				}

				// update the config
				config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
				config.setString(CONF_ALIAS, null, keyName, branchName);
				config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
				config.save();
			}
			
			// the deletedBranches set will normally be empty by this point
			// unless a branch really was deleted and no longer exists
			if (deletedBranches.size() > 0) {
				for (String branch : deletedBranches) {
					IndexWriter writer = getIndexWriter(repository, false);
					writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
					writer.commit();
				}
			}
			success = true;
		} catch (Throwable t) {
			t.printStackTrace();
		}
		return success;
	}

	/**
	 * Creates a Lucene document from an issue.
	 * 
	 * @param issue
	 * @return a Lucene document
	 */
	private static Document createDocument(IssueModel issue) {
		Document doc = new Document();
		doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.issue.name(), Store.YES,
				Field.Index.NOT_ANALYZED_NO_NORMS));
		doc.add(new Field(FIELD_OBJECT_ID, issue.id, Store.YES, Index.NOT_ANALYZED));
		doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.NOT_ANALYZED));
		doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),
				Store.YES, Field.Index.NO));
		doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
		List<String> attachments = new ArrayList<String>();
		for (Attachment attachment : issue.getAttachments()) {
			attachments.add(attachment.name.toLowerCase());
		}
		doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES,
				Index.ANALYZED));
		doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED));
		doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.NO, Index.ANALYZED));
		doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES,
				Index.ANALYZED));
		return doc;
	}

	/**
	 * Creates a Lucene document for a commit
	 * 
	 * @param commit
	 * @param tags
	 * @return a Lucene document
	 */
	private static Document createDocument(RevCommit commit, List<String> tags) {
		Document doc = new Document();
		doc.add(new Field(FIELD_OBJECT_TYPE, ObjectType.commit.name(), Store.YES,
				Index.NOT_ANALYZED_NO_NORMS));
		doc.add(new Field(FIELD_OBJECT_ID, commit.getName(), Store.YES, Index.NOT_ANALYZED));
		doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
				Resolution.MINUTE), Store.YES, Index.NO));
		doc.add(new Field(FIELD_AUTHOR, commit.getCommitterIdent().getName(), Store.YES,
				Index.NOT_ANALYZED_NO_NORMS));
		doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED));
		doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.NO, Index.ANALYZED));
		if (!ArrayUtils.isEmpty(tags)) {
			if (!ArrayUtils.isEmpty(tags)) {
				doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(tags), Store.YES,
						Index.ANALYZED));
			}
		}
		return doc;
	}

	/**
	 * Incrementally index an object for the repository.
	 * 
	 * @param repository
	 * @param doc
	 * @return true, if successful
	 */
	private static boolean index(Repository repository, Document doc) {
		try {
			String repositoryName = getName(repository);
			doc.add(new Field(FIELD_REPOSITORY, repositoryName, Store.YES, Index.NOT_ANALYZED));
			IndexWriter writer = getIndexWriter(repository, false);
			writer.addDocument(doc);
			resetIndexSearcher(repository);
			writer.commit();
			return true;
		} catch (Exception e) {
			e.printStackTrace();
		}
		return false;
	}

	private static SearchResult createSearchResult(Document doc, float score) throws ParseException {
		SearchResult result = new SearchResult();
		result.score = score;
		result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
		result.summary = doc.get(FIELD_SUMMARY);
		result.author = doc.get(FIELD_AUTHOR);
		result.committer = doc.get(FIELD_COMMITTER);
		result.type = ObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
		result.repository = doc.get(FIELD_REPOSITORY);
		result.branch = doc.get(FIELD_BRANCH);
		result.id = doc.get(FIELD_OBJECT_ID);
		if (doc.get(FIELD_LABEL) != null) {
			result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));
		}
		return result;
	}

	private static void resetIndexSearcher(Repository repository) throws IOException {
		IndexSearcher searcher = SEARCHERS.get(repository.getDirectory());
		if (searcher != null) {
			SEARCHERS.remove(repository.getDirectory());
			searcher.close();
		}
	}

	/**
	 * Gets an index searcher for the repository.
	 * 
	 * @param repository
	 * @return
	 * @throws IOException
	 */
	private static IndexSearcher getIndexSearcher(Repository repository) throws IOException {
		IndexSearcher searcher = SEARCHERS.get(repository.getDirectory());
		if (searcher == null) {
			IndexWriter writer = getIndexWriter(repository, false);
			searcher = new IndexSearcher(IndexReader.open(writer, true));
			SEARCHERS.put(repository.getDirectory(), searcher);
		}
		return searcher;
	}

	/**
	 * Gets an index writer for the repository. The index will be created if it
	 * does not already exist or if forceCreate is specified.
	 * 
	 * @param repository
	 * @param forceCreate
	 * @return an IndexWriter
	 * @throws IOException
	 */
	private static IndexWriter getIndexWriter(Repository repository, boolean forceCreate)
			throws IOException {
		IndexWriter indexWriter = WRITERS.get(repository.getDirectory());
		File indexFolder = new File(repository.getDirectory(), "lucene");
		Directory directory = FSDirectory.open(indexFolder);
		if (forceCreate || !indexFolder.exists()) {
			// if the writer is going to blow away the existing index and create
			// a new one then it should not be cached. instead, close any open
			// writer, create a new one, and return.
			if (indexWriter != null) {
				indexWriter.close();
				indexWriter = null;
				WRITERS.remove(repository.getDirectory());
			}
			indexFolder.mkdirs();
			IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer(
					LUCENE_VERSION));
			config.setOpenMode(OpenMode.CREATE);
			IndexWriter writer = new IndexWriter(directory, config);
			writer.close();
		}

		if (indexWriter == null) {
			IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer(
					LUCENE_VERSION));
			config.setOpenMode(OpenMode.APPEND);
			indexWriter = new IndexWriter(directory, config);
			WRITERS.put(repository.getDirectory(), indexWriter);
		}
		return indexWriter;
	}

	/**
	 * Searches the specified repositories for the given text or query
	 * 
	 * @param text
	 *            if the text is null or empty, null is returned
	 * @param maximumHits
	 *            the maximum number of hits to collect
	 * @param repositories
	 *            a list of repositories to search. if no repositories are
	 *            specified null is returned.
	 * @return a list of SearchResults in order from highest to the lowest score
	 * 
	 */
	public static List<SearchResult> search(String text, int maximumHits,
			Repository... repositories) {
		if (StringUtils.isEmpty(text)) {
			return null;
		}
		if (repositories.length == 0) {
			return null;
		}
		Set<SearchResult> results = new LinkedHashSet<SearchResult>();
		StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
		try {
			// default search checks summary and content
			BooleanQuery query = new BooleanQuery();
			QueryParser qp;
			qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
			qp.setAllowLeadingWildcard(true);
			query.add(qp.parse(text), Occur.SHOULD);

			qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
			qp.setAllowLeadingWildcard(true);
			query.add(qp.parse(text), Occur.SHOULD);

			IndexSearcher searcher;
			if (repositories.length == 1) {
				// single repository search
				searcher = getIndexSearcher(repositories[0]);
			} else {
				// multiple repository search
				List<IndexReader> readers = new ArrayList<IndexReader>();
				for (Repository repository : repositories) {
					IndexSearcher repositoryIndex = getIndexSearcher(repository);
					readers.add(repositoryIndex.getIndexReader());
				}
				IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
				MultiReader reader = new MultiReader(rdrs);
				searcher = new IndexSearcher(reader);
			}
			Query rewrittenQuery = searcher.rewrite(query);
			TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true);
			searcher.search(rewrittenQuery, collector);
			ScoreDoc[] hits = collector.topDocs().scoreDocs;
			for (int i = 0; i < hits.length; i++) {
				int docId = hits[i].doc;
				Document doc = searcher.doc(docId);
				SearchResult result = createSearchResult(doc, hits[i].score);
				results.add(result);
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return new ArrayList<SearchResult>(results);
	}

	/**
	 * Close all the index writers and searchers
	 */
	public static void close() {
		// close writers
		for (File file : WRITERS.keySet()) {
			try {
				WRITERS.get(file).close(true);
			} catch (Throwable t) {
				t.printStackTrace();
			}
		}
		WRITERS.clear();

		// close searchers
		for (File file : SEARCHERS.keySet()) {
			try {
				SEARCHERS.get(file).close();
			} catch (Throwable t) {
				t.printStackTrace();
			}
		}
		SEARCHERS.clear();
	}
}