mirrors
/
gitblit
espejo de https://github.com/gitblit/gitblit.git


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271
							/*
 * Copyright 2012 gitblit.com.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.gitblit.service;

import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.MessageFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.sax.BodyContentHandler;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.RepositoryCache.FileKey;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.file.FileBasedConfig;
import org.eclipse.jgit.treewalk.EmptyTreeIterator;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.util.FS;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.gitblit.Constants.SearchObjectType;
import com.gitblit.GitBlit;
import com.gitblit.IStoredSettings;
import com.gitblit.Keys;
import com.gitblit.manager.FilestoreManager;
import com.gitblit.manager.IFilestoreManager;
import com.gitblit.manager.IRepositoryManager;
import com.gitblit.models.PathModel.PathChangeModel;
import com.gitblit.models.RefModel;
import com.gitblit.models.RepositoryModel;
import com.gitblit.models.SearchResult;
import com.gitblit.utils.ArrayUtils;
import com.gitblit.utils.JGitUtils;
import com.gitblit.utils.StringUtils;

/**
 * The Lucene service handles indexing and searching repositories.
 *
 * @author James Moger
 *
 */
public class LuceneService implements Runnable {


	private static final int INDEX_VERSION = 6;

	private static final String FIELD_OBJECT_TYPE = "type";
	private static final String FIELD_PATH = "path";
	private static final String FIELD_COMMIT = "commit";
	private static final String FIELD_BRANCH = "branch";
	private static final String FIELD_SUMMARY = "summary";
	private static final String FIELD_CONTENT = "content";
	private static final String FIELD_AUTHOR = "author";
	private static final String FIELD_COMMITTER = "committer";
	private static final String FIELD_DATE = "date";
	private static final String FIELD_TAG = "tag";

	private static final String CONF_FILE = "lucene.conf";
	private static final String LUCENE_DIR = "lucene";
	private static final String CONF_INDEX = "index";
	private static final String CONF_VERSION = "version";
	private static final String CONF_ALIAS = "aliases";
	private static final String CONF_BRANCH = "branches";

	private static final Version LUCENE_VERSION = Version.LUCENE_4_10_0;

	private final Logger logger = LoggerFactory.getLogger(LuceneService.class);

	private final IStoredSettings storedSettings;
	private final IRepositoryManager repositoryManager;
	private final IFilestoreManager filestoreManager;
	
	private final File repositoriesFolder;

	private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
	private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();

	private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
	private Set<String> excludedExtensions;

	public LuceneService(
			IStoredSettings settings,
			IRepositoryManager repositoryManager, 
			IFilestoreManager filestoreManager) {

		this.storedSettings = settings;
		this.repositoryManager = repositoryManager;
		this.filestoreManager = filestoreManager;
		this.repositoriesFolder = repositoryManager.getRepositoriesFolder();
		String exts = luceneIgnoreExtensions;
		if (settings != null) {
			exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
		}
		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
	}

	/**
	 * Run is executed by the Gitblit executor service.  Because this is called
	 * by an executor service, calls will queue - i.e. there can never be
	 * concurrent execution of repository index updates.
	 */
	@Override
	public void run() {
		if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
			// Lucene indexing is disabled
			return;
		}
		// reload the excluded extensions
		String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));

		if (repositoryManager.isCollectingGarbage()) {
			// busy collecting garbage, try again later
			return;
		}

		for (String repositoryName: repositoryManager.getRepositoryList()) {
			RepositoryModel model = repositoryManager.getRepositoryModel(repositoryName);
			if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
				Repository repository = repositoryManager.getRepository(model.name);
				if (repository == null) {
					if (repositoryManager.isCollectingGarbage(model.name)) {
						logger.info(MessageFormat.format("Skipping Lucene index of {0}, busy garbage collecting", repositoryName));
					}
					continue;
				}
				index(model, repository);
				repository.close();
				System.gc();
			}
		}
	}

	/**
	 * Synchronously indexes a repository. This may build a complete index of a
	 * repository or it may update an existing index.
	 *
	 * @param displayName
	 *            the name of the repository
	 * @param repository
	 *            the repository object
	 */
	private void index(RepositoryModel model, Repository repository) {
		try {
			if (shouldReindex(repository)) {
				// (re)build the entire index
				IndexResult result = reindex(model, repository);

				if (result.success) {
					if (result.commitCount > 0) {
						String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
						logger.info(MessageFormat.format(msg, model.name, result.commitCount,
								result.blobCount, result.branchCount, result.duration()));
					}
				} else {
					String msg = "Could not build {0} Lucene index!";
					logger.error(MessageFormat.format(msg, model.name));
				}
			} else {
				// update the index with latest commits
				IndexResult result = updateIndex(model, repository);
				if (result.success) {
					if (result.commitCount > 0) {
						String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
						logger.info(MessageFormat.format(msg, model.name, result.commitCount,
								result.blobCount, result.branchCount, result.duration()));
					}
				} else {
					String msg = "Could not update {0} Lucene index!";
					logger.error(MessageFormat.format(msg, model.name));
				}
			}
		} catch (Throwable t) {
			logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
		}
	}

	/**
	 * Close the writer/searcher objects for a repository.
	 *
	 * @param repositoryName
	 */
	public synchronized void close(String repositoryName) {
		try {
			IndexSearcher searcher = searchers.remove(repositoryName);
			if (searcher != null) {
				searcher.getIndexReader().close();
			}
		} catch (Exception e) {
			logger.error("Failed to close index searcher for " + repositoryName, e);
		}

		try {
			IndexWriter writer = writers.remove(repositoryName);
			if (writer != null) {
				writer.close();
			}
		} catch (Exception e) {
			logger.error("Failed to close index writer for " + repositoryName, e);
		}
	}

	/**
	 * Close all Lucene indexers.
	 *
	 */
	public synchronized void close() {
		// close all writers
		for (String writer : writers.keySet()) {
			try {
				writers.get(writer).close(true);
			} catch (Throwable t) {
				logger.error("Failed to close Lucene writer for " + writer, t);
			}
		}
		writers.clear();

		// close all searchers
		for (String searcher : searchers.keySet()) {
			try {
				searchers.get(searcher).getIndexReader().close();
			} catch (Throwable t) {
				logger.error("Failed to close Lucene searcher for " + searcher, t);
			}
		}
		searchers.clear();
	}


	/**
	 * Deletes the Lucene index for the specified repository.
	 *
	 * @param repositoryName
	 * @return true, if successful
	 */
	public boolean deleteIndex(String repositoryName) {
		try {
			// close any open writer/searcher
			close(repositoryName);

			// delete the index folder
			File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
			File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
			if (luceneIndex.exists()) {
				org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
						org.eclipse.jgit.util.FileUtils.RECURSIVE);
			}
			// delete the config file
			File luceneConfig = new File(repositoryFolder, CONF_FILE);
			if (luceneConfig.exists()) {
				luceneConfig.delete();
			}
			return true;
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}

	/**
	 * Returns the author for the commit, if this information is available.
	 *
	 * @param commit
	 * @return an author or unknown
	 */
	private String getAuthor(RevCommit commit) {
		String name = "unknown";
		try {
			name = commit.getAuthorIdent().getName();
			if (StringUtils.isEmpty(name)) {
				name = commit.getAuthorIdent().getEmailAddress();
			}
		} catch (NullPointerException n) {
		}
		return name;
	}

	/**
	 * Returns the committer for the commit, if this information is available.
	 *
	 * @param commit
	 * @return an committer or unknown
	 */
	private String getCommitter(RevCommit commit) {
		String name = "unknown";
		try {
			name = commit.getCommitterIdent().getName();
			if (StringUtils.isEmpty(name)) {
				name = commit.getCommitterIdent().getEmailAddress();
			}
		} catch (NullPointerException n) {
		}
		return name;
	}

	/**
	 * Get the tree associated with the given commit.
	 *
	 * @param walk
	 * @param commit
	 * @return tree
	 * @throws IOException
	 */
	private RevTree getTree(final RevWalk walk, final RevCommit commit)
			throws IOException {
		final RevTree tree = commit.getTree();
		if (tree != null) {
			return tree;
		}
		walk.parseHeaders(commit);
		return commit.getTree();
	}

	/**
	 * Construct a keyname from the branch.
	 *
	 * @param branchName
	 * @return a keyname appropriate for the Git config file format
	 */
	private String getBranchKey(String branchName) {
		return StringUtils.getSHA1(branchName);
	}

	/**
	 * Returns the Lucene configuration for the specified repository.
	 *
	 * @param repository
	 * @return a config object
	 */
	private FileBasedConfig getConfig(Repository repository) {
		File file = new File(repository.getDirectory(), CONF_FILE);
		FileBasedConfig config = new FileBasedConfig(file, FS.detect());
		return config;
	}

	/**
	 * Reads the Lucene config file for the repository to check the index
	 * version. If the index version is different, then rebuild the repository
	 * index.
	 *
	 * @param repository
	 * @return true of the on-disk index format is different than INDEX_VERSION
	 */
	private boolean shouldReindex(Repository repository) {
		try {
			FileBasedConfig config = getConfig(repository);
			config.load();
			int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
			// reindex if versions do not match
			return indexVersion != INDEX_VERSION;
		} catch (Throwable t) {
		}
		return true;
	}


	/**
	 * This completely indexes the repository and will destroy any existing
	 * index.
	 *
	 * @param repositoryName
	 * @param repository
	 * @return IndexResult
	 */
	public IndexResult reindex(RepositoryModel model, Repository repository) {
		IndexResult result = new IndexResult();
		if (!deleteIndex(model.name)) {
			return result;
		}
		try {
			String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
			FileBasedConfig config = getConfig(repository);
			Set<String> indexedCommits = new TreeSet<String>();
			IndexWriter writer = getIndexWriter(model.name);
			// build a quick lookup of tags
			Map<String, List<String>> tags = new HashMap<String, List<String>>();
			for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
				if (!tag.isAnnotatedTag()) {
					// skip non-annotated tags
					continue;
				}
				if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
					tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
				}
				tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
			}

			ObjectReader reader = repository.newObjectReader();

			// get the local branches
			List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);

			// sort them by most recently updated
			Collections.sort(branches, new Comparator<RefModel>() {
				@Override
				public int compare(RefModel ref1, RefModel ref2) {
					return ref2.getDate().compareTo(ref1.getDate());
				}
			});

			// reorder default branch to first position
			RefModel defaultBranch = null;
			ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
			for (RefModel branch :  branches) {
				if (branch.getObjectId().equals(defaultBranchId)) {
					defaultBranch = branch;
					break;
				}
			}
			branches.remove(defaultBranch);
			branches.add(0, defaultBranch);

			// walk through each branch
			for (RefModel branch : branches) {

				boolean indexBranch = false;
				if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
						&& branch.equals(defaultBranch)) {
					// indexing "default" branch
					indexBranch = true;
				} else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
					// skip internal meta branches
					indexBranch = false;
				} else {
					// normal explicit branch check
					indexBranch = model.indexedBranches.contains(branch.getName());
				}

				// if this branch is not specifically indexed then skip
				if (!indexBranch) {
					continue;
				}

				String branchName = branch.getName();
				RevWalk revWalk = new RevWalk(reader);
				RevCommit tip = revWalk.parseCommit(branch.getObjectId());
				String tipId = tip.getId().getName();

				String keyName = getBranchKey(branchName);
				config.setString(CONF_ALIAS, null, keyName, branchName);
				config.setString(CONF_BRANCH, null, keyName, tipId);

				// index the blob contents of the tree
				TreeWalk treeWalk = new TreeWalk(repository);
				treeWalk.addTree(tip.getTree());
				treeWalk.setRecursive(true);

				Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
				while (treeWalk.next()) {
					// ensure path is not in a submodule
					if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
						paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
					}
				}

				ByteArrayOutputStream os = new ByteArrayOutputStream();
				byte[] tmp = new byte[32767];

				RevWalk commitWalk = new RevWalk(reader);
				commitWalk.markStart(tip);

				RevCommit commit;
				while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
					TreeWalk diffWalk = new TreeWalk(reader);
					int parentCount = commit.getParentCount();
					switch (parentCount) {
					case 0:
						diffWalk.addTree(new EmptyTreeIterator());
						break;
					case 1:
						diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
						break;
					default:
						// skip merge commits
						continue;
					}
					diffWalk.addTree(getTree(commitWalk, commit));
					diffWalk.setFilter(ANY_DIFF);
					diffWalk.setRecursive(true);
					while ((paths.size() > 0) && diffWalk.next()) {
						String path = diffWalk.getPathString();
						if (!paths.containsKey(path)) {
							continue;
						}
//TODO: Figure out filestore oid the path - bit more involved than updating the index
						
						// remove path from set
						ObjectId blobId = paths.remove(path);
						result.blobCount++;

						// index the blob metadata
						String blobAuthor = getAuthor(commit);
						String blobCommitter = getCommitter(commit);
						String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
								Resolution.MINUTE);

						Document doc = new Document();
						doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
						doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
						doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
						doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
						doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
						doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
						doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));

						// determine extension to compare to the extension
						// blacklist
						String ext = null;
						String name = path.toLowerCase();
						if (name.indexOf('.') > -1) {
							ext = name.substring(name.lastIndexOf('.') + 1);
						}

						// index the blob content
						if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
							ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
							InputStream in = ldr.openStream();
							int n;
							while ((n = in.read(tmp)) > 0) {
								os.write(tmp, 0, n);
							}
							in.close();
							byte[] content = os.toByteArray();
							String str = StringUtils.decodeString(content, encodings);
							doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
							os.reset();
						}

						// add the blob to the index
						writer.addDocument(doc);
					}
				}

				os.close();

				// index the tip commit object
				if (indexedCommits.add(tipId)) {
					Document doc = createDocument(tip, tags.get(tipId));
					doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
					writer.addDocument(doc);
					result.commitCount += 1;
					result.branchCount += 1;
				}

				// traverse the log and index the previous commit objects
				RevWalk historyWalk = new RevWalk(reader);
				historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
				RevCommit rev;
				while ((rev = historyWalk.next()) != null) {
					String hash = rev.getId().getName();
					if (indexedCommits.add(hash)) {
						Document doc = createDocument(rev, tags.get(hash));
						doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
						writer.addDocument(doc);
						result.commitCount += 1;
					}
				}
			}

			// finished
			reader.close();

			// commit all changes and reset the searcher
			config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
			config.save();
			writer.commit();
			resetIndexSearcher(model.name);
			result.success();
		} catch (Exception e) {
			logger.error("Exception while reindexing " + model.name, e);
		}
		return result;
	}

	/**
	 * Incrementally update the index with the specified commit for the
	 * repository.
	 *
	 * @param repositoryName
	 * @param repository
	 * @param branch
	 *            the fully qualified branch name (e.g. refs/heads/master)
	 * @param commit
	 * @return true, if successful
	 */
	private IndexResult index(String repositoryName, Repository repository,
			String branch, RevCommit commit) {
		IndexResult result = new IndexResult();
		try {
			String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
			List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
			String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
					Resolution.MINUTE);
			IndexWriter writer = getIndexWriter(repositoryName);
			for (PathChangeModel path : changedPaths) {
				if (path.isSubmodule()) {
					continue;
				}
				// delete the indexed blob
				deleteBlob(repositoryName, branch, path.name);

				// re-index the blob
				if (!ChangeType.DELETE.equals(path.changeType)) {
					result.blobCount++;
					Document doc = new Document();
					doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
					doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
					doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
					doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
					doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
					doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
					doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));

					// determine extension to compare to the extension
					// blacklist
					String ext = null;
					String name = path.name.toLowerCase();
					if (name.indexOf('.') > -1) {
						ext = name.substring(name.lastIndexOf('.') + 1);
					}

					if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
						String str = "";
						// read the blob content
						if (path.isFilestoreItem()) {
							//Get file from filestore
							BodyContentHandler handler = new BodyContentHandler();
	                        Metadata metadata = new Metadata();
	                        PDFParser parser = new PDFParser();
	                        
	                        ParseContext parseContext = new ParseContext();
	                        File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid());
	                        FileInputStream inputstream = new FileInputStream(lfsFile);
	                        parser.parse(inputstream, handler, metadata, parseContext);
							str = handler.toString();
						} else {
							str = JGitUtils.getStringContent(repository, commit.getTree(),
								path.path, encodings);
						}
						
						if (str != null) {
							doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
							writer.addDocument(doc);
						}
					}
				}
			}
			writer.commit();

			// get any annotated commit tags
			List<String> commitTags = new ArrayList<String>();
			for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
				if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
					commitTags.add(ref.displayName);
				}
			}

			// create and write the Lucene document
			Document doc = createDocument(commit, commitTags);
			doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
			result.commitCount++;
			result.success = index(repositoryName, doc);
		} catch (Exception e) {
			logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
		}
		return result;
	}

	/**
	 * Delete a blob from the specified branch of the repository index.
	 *
	 * @param repositoryName
	 * @param branch
	 * @param path
	 * @throws Exception
	 * @return true, if deleted, false if no record was deleted
	 */
	public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
		String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
		String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);

		BooleanQuery query = new BooleanQuery();
		StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
		QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
		query.add(qp.parse(q), Occur.MUST);

		IndexWriter writer = getIndexWriter(repositoryName);
		int numDocsBefore = writer.numDocs();
		writer.deleteDocuments(query);
		writer.commit();
		int numDocsAfter = writer.numDocs();
		if (numDocsBefore == numDocsAfter) {
			logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
			return false;
		} else {
			logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
			return true;
		}
	}

	/**
	 * Updates a repository index incrementally from the last indexed commits.
	 *
	 * @param model
	 * @param repository
	 * @return IndexResult
	 */
	private IndexResult updateIndex(RepositoryModel model, Repository repository) {
		IndexResult result = new IndexResult();
		try {
			FileBasedConfig config = getConfig(repository);
			config.load();

			// build a quick lookup of annotated tags
			Map<String, List<String>> tags = new HashMap<String, List<String>>();
			for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
				if (!tag.isAnnotatedTag()) {
					// skip non-annotated tags
					continue;
				}
				if (!tags.containsKey(tag.getObjectId().getName())) {
					tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
				}
				tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
			}

			// detect branch deletion
			// first assume all branches are deleted and then remove each
			// existing branch from deletedBranches during indexing
			Set<String> deletedBranches = new TreeSet<String>();
			for (String alias : config.getNames(CONF_ALIAS)) {
				String branch = config.getString(CONF_ALIAS, null, alias);
				deletedBranches.add(branch);
			}

			// get the local branches
			List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);

			// sort them by most recently updated
			Collections.sort(branches, new Comparator<RefModel>() {
				@Override
				public int compare(RefModel ref1, RefModel ref2) {
					return ref2.getDate().compareTo(ref1.getDate());
				}
			});

			// reorder default branch to first position
			RefModel defaultBranch = null;
			ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
			for (RefModel branch :  branches) {
				if (branch.getObjectId().equals(defaultBranchId)) {
					defaultBranch = branch;
					break;
				}
			}
			branches.remove(defaultBranch);
			branches.add(0, defaultBranch);

			// walk through each branches
			for (RefModel branch : branches) {
				String branchName = branch.getName();

				boolean indexBranch = false;
				if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
						&& branch.equals(defaultBranch)) {
					// indexing "default" branch
					indexBranch = true;
				} else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
					// ignore internal meta branches
					indexBranch = false;
				} else {
					// normal explicit branch check
					indexBranch = model.indexedBranches.contains(branch.getName());
				}

				// if this branch is not specifically indexed then skip
				if (!indexBranch) {
					continue;
				}

				// remove this branch from the deletedBranches set
				deletedBranches.remove(branchName);

				// determine last commit
				String keyName = getBranchKey(branchName);
				String lastCommit = config.getString(CONF_BRANCH, null, keyName);

				List<RevCommit> revs;
				if (StringUtils.isEmpty(lastCommit)) {
					// new branch/unindexed branch, get all commits on branch
					revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
				} else {
					// pre-existing branch, get changes since last commit
					revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
				}

				if (revs.size() > 0) {
					result.branchCount += 1;
				}

				// reverse the list of commits so we start with the first commit
				Collections.reverse(revs);
				for (RevCommit commit : revs) {
					// index a commit
					result.add(index(model.name, repository, branchName, commit));
				}

				// update the config
				config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
				config.setString(CONF_ALIAS, null, keyName, branchName);
				config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
				config.save();
			}

			// the deletedBranches set will normally be empty by this point
			// unless a branch really was deleted and no longer exists
			if (deletedBranches.size() > 0) {
				for (String branch : deletedBranches) {
					IndexWriter writer = getIndexWriter(model.name);
					writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
					writer.commit();
				}
			}
			result.success = true;
		} catch (Throwable t) {
			logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
		}
		return result;
	}

	/**
	 * Creates a Lucene document for a commit
	 *
	 * @param commit
	 * @param tags
	 * @return a Lucene document
	 */
	private Document createDocument(RevCommit commit, List<String> tags) {
		Document doc = new Document();
		doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), StringField.TYPE_STORED));
		doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
		doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
				Resolution.MINUTE), StringField.TYPE_STORED));
		doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
		doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
		doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), TextField.TYPE_STORED));
		doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), TextField.TYPE_STORED));
		if (!ArrayUtils.isEmpty(tags)) {
			doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), TextField.TYPE_STORED));
		}
		return doc;
	}

	/**
	 * Incrementally index an object for the repository.
	 *
	 * @param repositoryName
	 * @param doc
	 * @return true, if successful
	 */
	private boolean index(String repositoryName, Document doc) {
		try {
			IndexWriter writer = getIndexWriter(repositoryName);
			writer.addDocument(doc);
			writer.commit();
			resetIndexSearcher(repositoryName);
			return true;
		} catch (Exception e) {
			logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
		}
		return false;
	}

	private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
		SearchResult result = new SearchResult();
		result.hitId = hitId;
		result.totalHits = totalHits;
		result.score = score;
		result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
		result.summary = doc.get(FIELD_SUMMARY);
		result.author = doc.get(FIELD_AUTHOR);
		result.committer = doc.get(FIELD_COMMITTER);
		result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
		result.branch = doc.get(FIELD_BRANCH);
		result.commitId = doc.get(FIELD_COMMIT);
		result.path = doc.get(FIELD_PATH);
		if (doc.get(FIELD_TAG) != null) {
			result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
		}
		return result;
	}

	private synchronized void resetIndexSearcher(String repository) throws IOException {
		IndexSearcher searcher = searchers.remove(repository);
		if (searcher != null) {
			searcher.getIndexReader().close();
		}
	}

	/**
	 * Gets an index searcher for the repository.
	 *
	 * @param repository
	 * @return
	 * @throws IOException
	 */
	private IndexSearcher getIndexSearcher(String repository) throws IOException {
		IndexSearcher searcher = searchers.get(repository);
		if (searcher == null) {
			IndexWriter writer = getIndexWriter(repository);
			searcher = new IndexSearcher(DirectoryReader.open(writer, true));
			searchers.put(repository, searcher);
		}
		return searcher;
	}

	/**
	 * Gets an index writer for the repository. The index will be created if it
	 * does not already exist or if forceCreate is specified.
	 *
	 * @param repository
	 * @return an IndexWriter
	 * @throws IOException
	 */
	private IndexWriter getIndexWriter(String repository) throws IOException {
		IndexWriter indexWriter = writers.get(repository);
		File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
		File indexFolder = new File(repositoryFolder, LUCENE_DIR);
		Directory directory = FSDirectory.open(indexFolder);

		if (indexWriter == null) {
			if (!indexFolder.exists()) {
				indexFolder.mkdirs();
			}
			StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
			IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
			config.setOpenMode(OpenMode.CREATE_OR_APPEND);
			indexWriter = new IndexWriter(directory, config);
			writers.put(repository, indexWriter);
		}
		return indexWriter;
	}

	/**
	 * Searches the specified repositories for the given text or query
	 *
	 * @param text
	 *            if the text is null or empty, null is returned
	 * @param page
	 *            the page number to retrieve. page is 1-indexed.
	 * @param pageSize
	 *            the number of elements to return for this page
	 * @param repositories
	 *            a list of repositories to search. if no repositories are
	 *            specified null is returned.
	 * @return a list of SearchResults in order from highest to the lowest score
	 *
	 */
	public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
		if (ArrayUtils.isEmpty(repositories)) {
			return null;
		}
		return search(text, page, pageSize, repositories.toArray(new String[0]));
	}

	/**
	 * Searches the specified repositories for the given text or query
	 *
	 * @param text
	 *            if the text is null or empty, null is returned
	 * @param page
	 *            the page number to retrieve. page is 1-indexed.
	 * @param pageSize
	 *            the number of elements to return for this page
	 * @param repositories
	 *            a list of repositories to search. if no repositories are
	 *            specified null is returned.
	 * @return a list of SearchResults in order from highest to the lowest score
	 *
	 */
	public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
		if (StringUtils.isEmpty(text)) {
			return null;
		}
		if (ArrayUtils.isEmpty(repositories)) {
			return null;
		}
		Set<SearchResult> results = new LinkedHashSet<SearchResult>();
		StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
		try {
			// default search checks summary and content
			BooleanQuery query = new BooleanQuery();
			QueryParser qp;
			qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
			qp.setAllowLeadingWildcard(true);
			query.add(qp.parse(text), Occur.SHOULD);

			qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
			qp.setAllowLeadingWildcard(true);
			query.add(qp.parse(text), Occur.SHOULD);

			IndexSearcher searcher;
			if (repositories.length == 1) {
				// single repository search
				searcher = getIndexSearcher(repositories[0]);
			} else {
				// multiple repository search
				List<IndexReader> readers = new ArrayList<IndexReader>();
				for (String repository : repositories) {
					IndexSearcher repositoryIndex = getIndexSearcher(repository);
					readers.add(repositoryIndex.getIndexReader());
				}
				IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
				MultiSourceReader reader = new MultiSourceReader(rdrs);
				searcher = new IndexSearcher(reader);
			}

			Query rewrittenQuery = searcher.rewrite(query);
			logger.debug(rewrittenQuery.toString());

			TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
			searcher.search(rewrittenQuery, collector);
			int offset = Math.max(0, (page - 1) * pageSize);
			ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
			int totalHits = collector.getTotalHits();
			for (int i = 0; i < hits.length; i++) {
				int docId = hits[i].doc;
				Document doc = searcher.doc(docId);
				SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
				if (repositories.length == 1) {
					// single repository search
					result.repository = repositories[0];
				} else {
					// multi-repository search
					MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
					int index = reader.getSourceIndex(docId);
					result.repository = repositories[index];
				}
				String content = doc.get(FIELD_CONTENT);
				result.fragment = getHighlightedFragment(analyzer, query, content, result);
				results.add(result);
			}
		} catch (Exception e) {
			logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
		}
		return new ArrayList<SearchResult>(results);
	}

	/**
	 *
	 * @param analyzer
	 * @param query
	 * @param content
	 * @param result
	 * @return
	 * @throws IOException
	 * @throws InvalidTokenOffsetsException
	 */
	private String getHighlightedFragment(Analyzer analyzer, Query query,
			String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
		if (content == null) {
			content = "";
		}

		int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4);
		int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;

		QueryScorer scorer = new QueryScorer(query, "content");
		Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);

		// use an artificial delimiter for the token
		String termTag = "!!--[";
		String termTagEnd = "]--!!";
		SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
		Highlighter highlighter = new Highlighter(formatter, scorer);
		highlighter.setTextFragmenter(fragmenter);

		String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
		if (ArrayUtils.isEmpty(fragments)) {
			if (SearchObjectType.blob  == result.type) {
				return "";
			}
			// clip commit message
			String fragment = content;
			if (fragment.length() > fragmentLength) {
				fragment = fragment.substring(0, fragmentLength) + "...";
			}
			return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>";
		}

		// make sure we have unique fragments
		Set<String> uniqueFragments = new LinkedHashSet<String>();
		for (String fragment : fragments) {
			uniqueFragments.add(fragment);
		}
		fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);

		StringBuilder sb = new StringBuilder();
		for (int i = 0, len = fragments.length; i < len; i++) {
			String fragment = fragments[i];
			String tag = "<pre class=\"text\">";

			// resurrect the raw fragment from removing the artificial delimiters
			String raw = fragment.replace(termTag, "").replace(termTagEnd, "");

			// determine position of the raw fragment in the content
			int pos = content.indexOf(raw);

			// restore complete first line of fragment
			int c = pos;
			while (c > 0) {
				c--;
				if (content.charAt(c) == '\n') {
					break;
				}
			}
			if (c > 0) {
				// inject leading chunk of first fragment line
				fragment = content.substring(c + 1, pos) + fragment;
			}

			if (SearchObjectType.blob  == result.type) {
				// count lines as offset into the content for this fragment
				int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));

				// create fragment tag with line number and language
				String lang = "";
				String ext = StringUtils.getFileExtension(result.path).toLowerCase();
				if (!StringUtils.isEmpty(ext)) {
					// maintain leading space!
					lang = " lang-" + ext;
				}
				tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);

			}

			sb.append(tag);

			// replace the artificial delimiter with html tags
			String html = StringUtils.escapeForHtml(fragment, false);
			html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
			sb.append(html);
			sb.append("</pre>");
			if (i < len - 1) {
				sb.append("<span class=\"ellipses\">...</span><br/>");
			}
		}
		return sb.toString();
	}

	/**
	 * Simple class to track the results of an index update.
	 */
	private class IndexResult {
		long startTime = System.currentTimeMillis();
		long endTime = startTime;
		boolean success;
		int branchCount;
		int commitCount;
		int blobCount;

		void add(IndexResult result) {
			this.branchCount += result.branchCount;
			this.commitCount += result.commitCount;
			this.blobCount += result.blobCount;
		}

		void success() {
			success = true;
			endTime = System.currentTimeMillis();
		}

		float duration() {
			return (endTime - startTime)/1000f;
		}
	}

	/**
	 * Custom subclass of MultiReader to identify the source index for a given
	 * doc id.  This would not be necessary of there was a public method to
	 * obtain this information.
	 *
	 */
	private class MultiSourceReader extends MultiReader {

		MultiSourceReader(IndexReader [] readers) {
			super(readers, false);
		}

		int getSourceIndex(int docId) {
			int index = -1;
			try {
				index = super.readerIndex(docId);
			} catch (Exception e) {
				logger.error("Error getting source index", e);
			}
			return index;
		}
	}
}