@@ -169,14 +169,14 @@ dependencies: | |||
- compile 'com.unboundid:unboundid-ldapsdk:2.3.8' :war | |||
- compile 'org.apache.ivy:ivy:2.2.0' :war | |||
- compile 'com.toedter:jcalendar:1.3.2' :authority | |||
- compile 'org.apache.commons:commons-compress:1.4.1' :war | |||
- compile 'commons-io:commons-io:2.2' :war | |||
- compile 'org.apache.commons:commons-compress:1.16' :war | |||
- compile 'commons-io:commons-io:2.6' :war | |||
- compile 'com.force.api:force-partner-api:24.0.0' :war | |||
- compile 'org.freemarker:freemarker:2.3.22' :war | |||
- compile 'com.github.dblock.waffle:waffle-jna:1.7.3' :war | |||
- compile 'org.kohsuke:libpam4j:1.8' :war | |||
- compile 'args4j:args4j:2.0.29' :war :fedclient | |||
- compile 'commons-codec:commons-codec:1.7' :war | |||
- compile 'commons-codec:commons-codec:1.11' :war | |||
- compile 'redis.clients:jedis:2.6.2' :war | |||
- compile 'ro.fortsoft.pf4j:pf4j:0.9.0' :war | |||
- compile 'org.apache.tika:tika-core:1.17' :war |
@@ -93,8 +93,13 @@ import com.gitblit.models.RepositoryModel; | |||
import com.gitblit.models.SearchResult; | |||
import com.gitblit.utils.ArrayUtils; | |||
import com.gitblit.utils.JGitUtils; | |||
import static com.gitblit.utils.JGitUtils.getDefaultBranch; | |||
import com.gitblit.utils.StringUtils; | |||
import java.io.ByteArrayInputStream; | |||
import java.util.logging.Level; | |||
import org.eclipse.jgit.lib.ObjectStream; | |||
import org.eclipse.jgit.revwalk.RevBlob; | |||
import org.eclipse.jgit.treewalk.filter.PathFilterGroup; | |||
/** | |||
* The Lucene service handles indexing and searching repositories. | |||
@@ -552,41 +557,43 @@ public class LuceneService implements Runnable { | |||
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { | |||
boolean useTika = useTika(ext); | |||
ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB); | |||
InputStream in = ldr.openStream(); | |||
int n; | |||
while ((n = in.read(tmp)) > 0) { | |||
os.write(tmp, 0, n); | |||
} | |||
in.close(); | |||
byte[] content = os.toByteArray(); | |||
String str; | |||
if (useTika) { | |||
str = TikaUtils.extractText(ext, name, content, this, path, new Indexer() { | |||
@Override | |||
public boolean index(String name, String content) { | |||
try { | |||
Document doc = new Document(); | |||
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_COMMIT, commitName, TextField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_PATH, name, TextField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_ARCHIVE, path, TextField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_CONTENT, content, TextField.TYPE_STORED)); | |||
writer.addDocument(doc); | |||
return true; | |||
} catch (IOException ex) { | |||
java.util.logging.Logger.getLogger(LuceneService.class.getName()).log(Level.SEVERE, null, ex); | |||
return false; | |||
try (InputStream is = ldr.openStream()) { | |||
str = TikaUtils.extractText(ext, name, is, this, path, new Indexer() { | |||
@Override | |||
public boolean index(String name, String content) { | |||
try { | |||
Document doc = new Document(); | |||
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_COMMIT, commitName, TextField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_PATH, name, TextField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_ARCHIVE, path, TextField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED)); | |||
doc.add(new Field(FIELD_CONTENT, content, TextField.TYPE_STORED)); | |||
writer.addDocument(doc); | |||
return true; | |||
} catch (IOException ex) { | |||
java.util.logging.Logger.getLogger(LuceneService.class.getName()).log(Level.SEVERE, null, ex); | |||
return false; | |||
} | |||
} | |||
} | |||
}); | |||
}); | |||
} | |||
} else { | |||
InputStream in = ldr.openStream(); | |||
int n; | |||
while ((n = in.read(tmp)) > 0) { | |||
os.write(tmp, 0, n); | |||
} | |||
in.close(); | |||
byte[] content = os.toByteArray(); | |||
str = StringUtils.decodeString(content, encodings); | |||
} | |||
if (str!=null) { | |||
if (str != null) { | |||
doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED)); | |||
} | |||
os.reset(); | |||
@@ -644,7 +651,7 @@ public class LuceneService implements Runnable { | |||
String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); | |||
return StringUtils.decodeString(content, encodings); | |||
} | |||
/** | |||
* Incrementally update the index with the specified commit for the | |||
* repository. | |||
@@ -697,9 +704,11 @@ public class LuceneService implements Runnable { | |||
// read the blob content | |||
String str; | |||
if (useTika) { | |||
byte[] content = JGitUtils.getByteContent(repository, commit.getTree(), | |||
path.path, true); | |||
str = TikaUtils.extractText(ext, name, content, this, spath, new Indexer() { | |||
RevWalk rw = new RevWalk(repository); | |||
RevBlob blob = rw.lookupBlob(ObjectId.fromString(path.objectId)); | |||
ObjectLoader ldr = repository.open(blob.getId(), Constants.OBJ_BLOB); | |||
try (ObjectStream is = ldr.openStream()) { | |||
str = TikaUtils.extractText(ext, name,is , this, spath, new Indexer() { | |||
@Override | |||
public boolean index(String name, String content) { | |||
try { | |||
@@ -721,7 +730,8 @@ public class LuceneService implements Runnable { | |||
} | |||
} | |||
}); | |||
} | |||
rw.dispose(); | |||
} else { | |||
str = JGitUtils.getStringContent(repository, commit.getTree(), | |||
path.path, encodings); | |||
@@ -755,7 +765,7 @@ public class LuceneService implements Runnable { | |||
} | |||
protected boolean useTika(String ext) { | |||
return tikaExtensions != null && tikaExtensions.contains(ext); | |||
return tikaExtensions != null && ext != null && tikaExtensions.contains(ext); | |||
} | |||
/** |
@@ -33,58 +33,55 @@ import org.apache.tika.exception.TikaException; | |||
public class TikaUtils { | |||
public static String extractText(String ext, String filename, byte[] data, LuceneService service, String path, LuceneService.Indexer indexer) { | |||
public static String extractText(String ext, String filename, InputStream is, LuceneService service, String path, LuceneService.Indexer indexer) { | |||
Tika tika = new Tika(); | |||
String fileType = tika.detect(filename); | |||
try (InputStream is = new ByteArrayInputStream(data)) { | |||
try { | |||
Logger.getLogger(TikaUtils.class.getName()).info("Tika parsing " + filename); | |||
if (isArchive(filename, ext)) { | |||
return extractTextFromArchive(ext, filename, data, service,path, indexer); | |||
return extractTextFromArchive(ext, filename, is, service, path, indexer); | |||
} | |||
return tika.parseToString(is); | |||
} catch (IOException ex) { | |||
Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, ex); | |||
return ""; | |||
} catch (Throwable tex) { | |||
Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, tex); | |||
return ""; | |||
} | |||
} | |||
private static String extractTextFromArchive(String ext, String filename, byte[] data, LuceneService service, String path, LuceneService.Indexer indexer) { | |||
Logger.getLogger(TikaUtils.class.getName()).info("Tika zip parsing " + filename + " " + data.length); | |||
try (InputStream is = new ByteArrayInputStream(data)) { | |||
try (ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is)) { | |||
ArchiveEntry nextEntry; | |||
while ((nextEntry = in.getNextEntry()) != null) { | |||
String archiveExt = null; | |||
String name = nextEntry.getName().toLowerCase(); | |||
if (name.indexOf('.') > -1) { | |||
archiveExt = name.substring(name.lastIndexOf('.') + 1); | |||
} | |||
name = filename + "/" + name; | |||
Logger.getLogger(TikaUtils.class.getName()).info("Tika zip parsing " + name); | |||
if (!nextEntry.isDirectory()) { | |||
try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { | |||
IOUtils.copy(in, bos); | |||
bos.flush(); | |||
String result = service.getEncodedString(bos.toByteArray(), archiveExt); | |||
if (result == null && service.useTika(ext)) { | |||
result = extractText(archiveExt, path+"/"+nextEntry.getName(), bos.toByteArray(), service, path+"/"+nextEntry.getName(), indexer); | |||
} | |||
if (result!=null) { | |||
indexer.index(path+"/"+nextEntry.getName(), result); | |||
Logger.getLogger(TikaUtils.class.getName()).info("Tika zip extract " + name + " " + result.length()); | |||
} | |||
private static String extractTextFromArchive(String ext, String filename, InputStream is, LuceneService service, String path, LuceneService.Indexer indexer) { | |||
Logger.getLogger(TikaUtils.class.getName()).info("Tika zip parsing " + filename + " "); | |||
try (ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is)) { | |||
ArchiveEntry nextEntry; | |||
while ((nextEntry = in.getNextEntry()) != null) { | |||
String archiveExt = null; | |||
String name = nextEntry.getName().toLowerCase(); | |||
if (name.indexOf('.') > -1) { | |||
archiveExt = name.substring(name.lastIndexOf('.') + 1); | |||
} | |||
name = filename + "/" + name; | |||
Logger.getLogger(TikaUtils.class.getName()).info("Tika zip parsing " + name); | |||
if (!nextEntry.isDirectory()) { | |||
try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { | |||
IOUtils.copy(in, bos); | |||
bos.flush(); | |||
String result = service.getEncodedString(bos.toByteArray(), archiveExt); | |||
if (result == null && service.useTika(ext)) { | |||
result = extractText(archiveExt, path + "/" + nextEntry.getName(), new ByteArrayInputStream(bos.toByteArray()), service, path + "/" + nextEntry.getName(), indexer); | |||
} | |||
if (result != null) { | |||
indexer.index(path + "/" + nextEntry.getName(), result); | |||
Logger.getLogger(TikaUtils.class.getName()).info("Tika zip extract " + name + " " + result.length()); | |||
} | |||
} catch (IOException ex) { | |||
Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, ex); | |||
} | |||
} | |||
} catch (ArchiveException ex) { | |||
Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, ex); | |||
} | |||
} catch (IOException ex) { | |||
Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, ex); | |||
} catch (ArchiveException ex) { | |||
Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, ex); | |||
} | |||
return null; | |||
} |