- compile 'com.unboundid:unboundid-ldapsdk:2.3.8' :war | - compile 'com.unboundid:unboundid-ldapsdk:2.3.8' :war | ||||
- compile 'org.apache.ivy:ivy:2.2.0' :war | - compile 'org.apache.ivy:ivy:2.2.0' :war | ||||
- compile 'com.toedter:jcalendar:1.3.2' :authority | - compile 'com.toedter:jcalendar:1.3.2' :authority | ||||
- compile 'org.apache.commons:commons-compress:1.4.1' :war | |||||
- compile 'commons-io:commons-io:2.2' :war | |||||
- compile 'org.apache.commons:commons-compress:1.16' :war | |||||
- compile 'commons-io:commons-io:2.6' :war | |||||
- compile 'com.force.api:force-partner-api:24.0.0' :war | - compile 'com.force.api:force-partner-api:24.0.0' :war | ||||
- compile 'org.freemarker:freemarker:2.3.22' :war | - compile 'org.freemarker:freemarker:2.3.22' :war | ||||
- compile 'com.github.dblock.waffle:waffle-jna:1.7.3' :war | - compile 'com.github.dblock.waffle:waffle-jna:1.7.3' :war | ||||
- compile 'org.kohsuke:libpam4j:1.8' :war | - compile 'org.kohsuke:libpam4j:1.8' :war | ||||
- compile 'args4j:args4j:2.0.29' :war :fedclient | - compile 'args4j:args4j:2.0.29' :war :fedclient | ||||
- compile 'commons-codec:commons-codec:1.7' :war | |||||
- compile 'commons-codec:commons-codec:1.11' :war | |||||
- compile 'redis.clients:jedis:2.6.2' :war | - compile 'redis.clients:jedis:2.6.2' :war | ||||
- compile 'ro.fortsoft.pf4j:pf4j:0.9.0' :war | - compile 'ro.fortsoft.pf4j:pf4j:0.9.0' :war | ||||
- compile 'org.apache.tika:tika-core:1.17' :war | - compile 'org.apache.tika:tika-core:1.17' :war |
import com.gitblit.models.SearchResult; | import com.gitblit.models.SearchResult; | ||||
import com.gitblit.utils.ArrayUtils; | import com.gitblit.utils.ArrayUtils; | ||||
import com.gitblit.utils.JGitUtils; | import com.gitblit.utils.JGitUtils; | ||||
import static com.gitblit.utils.JGitUtils.getDefaultBranch; | |||||
import com.gitblit.utils.StringUtils; | import com.gitblit.utils.StringUtils; | ||||
import java.io.ByteArrayInputStream; | |||||
import java.util.logging.Level; | import java.util.logging.Level; | ||||
import org.eclipse.jgit.lib.ObjectStream; | |||||
import org.eclipse.jgit.revwalk.RevBlob; | |||||
import org.eclipse.jgit.treewalk.filter.PathFilterGroup; | |||||
/** | /** | ||||
* The Lucene service handles indexing and searching repositories. | * The Lucene service handles indexing and searching repositories. | ||||
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { | if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { | ||||
boolean useTika = useTika(ext); | boolean useTika = useTika(ext); | ||||
ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB); | ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB); | ||||
InputStream in = ldr.openStream(); | |||||
int n; | |||||
while ((n = in.read(tmp)) > 0) { | |||||
os.write(tmp, 0, n); | |||||
} | |||||
in.close(); | |||||
byte[] content = os.toByteArray(); | |||||
String str; | String str; | ||||
if (useTika) { | if (useTika) { | ||||
str = TikaUtils.extractText(ext, name, content, this, path, new Indexer() { | |||||
@Override | |||||
public boolean index(String name, String content) { | |||||
try { | |||||
Document doc = new Document(); | |||||
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_COMMIT, commitName, TextField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_PATH, name, TextField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_ARCHIVE, path, TextField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_CONTENT, content, TextField.TYPE_STORED)); | |||||
writer.addDocument(doc); | |||||
return true; | |||||
} catch (IOException ex) { | |||||
java.util.logging.Logger.getLogger(LuceneService.class.getName()).log(Level.SEVERE, null, ex); | |||||
return false; | |||||
try (InputStream is = ldr.openStream()) { | |||||
str = TikaUtils.extractText(ext, name, is, this, path, new Indexer() { | |||||
@Override | |||||
public boolean index(String name, String content) { | |||||
try { | |||||
Document doc = new Document(); | |||||
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_COMMIT, commitName, TextField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_PATH, name, TextField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_ARCHIVE, path, TextField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED)); | |||||
doc.add(new Field(FIELD_CONTENT, content, TextField.TYPE_STORED)); | |||||
writer.addDocument(doc); | |||||
return true; | |||||
} catch (IOException ex) { | |||||
java.util.logging.Logger.getLogger(LuceneService.class.getName()).log(Level.SEVERE, null, ex); | |||||
return false; | |||||
} | |||||
} | } | ||||
} | |||||
}); | |||||
}); | |||||
} | |||||
} else { | } else { | ||||
InputStream in = ldr.openStream(); | |||||
int n; | |||||
while ((n = in.read(tmp)) > 0) { | |||||
os.write(tmp, 0, n); | |||||
} | |||||
in.close(); | |||||
byte[] content = os.toByteArray(); | |||||
str = StringUtils.decodeString(content, encodings); | str = StringUtils.decodeString(content, encodings); | ||||
} | } | ||||
if (str!=null) { | |||||
if (str != null) { | |||||
doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED)); | doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED)); | ||||
} | } | ||||
os.reset(); | os.reset(); | ||||
String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); | String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); | ||||
return StringUtils.decodeString(content, encodings); | return StringUtils.decodeString(content, encodings); | ||||
} | } | ||||
/** | /** | ||||
* Incrementally update the index with the specified commit for the | * Incrementally update the index with the specified commit for the | ||||
* repository. | * repository. | ||||
// read the blob content | // read the blob content | ||||
String str; | String str; | ||||
if (useTika) { | if (useTika) { | ||||
byte[] content = JGitUtils.getByteContent(repository, commit.getTree(), | |||||
path.path, true); | |||||
str = TikaUtils.extractText(ext, name, content, this, spath, new Indexer() { | |||||
RevWalk rw = new RevWalk(repository); | |||||
RevBlob blob = rw.lookupBlob(ObjectId.fromString(path.objectId)); | |||||
ObjectLoader ldr = repository.open(blob.getId(), Constants.OBJ_BLOB); | |||||
try (ObjectStream is = ldr.openStream()) { | |||||
str = TikaUtils.extractText(ext, name,is , this, spath, new Indexer() { | |||||
@Override | @Override | ||||
public boolean index(String name, String content) { | public boolean index(String name, String content) { | ||||
try { | try { | ||||
} | } | ||||
} | } | ||||
}); | }); | ||||
} | |||||
rw.dispose(); | |||||
} else { | } else { | ||||
str = JGitUtils.getStringContent(repository, commit.getTree(), | str = JGitUtils.getStringContent(repository, commit.getTree(), | ||||
path.path, encodings); | path.path, encodings); | ||||
} | } | ||||
protected boolean useTika(String ext) { | protected boolean useTika(String ext) { | ||||
return tikaExtensions != null && tikaExtensions.contains(ext); | |||||
return tikaExtensions != null && ext != null && tikaExtensions.contains(ext); | |||||
} | } | ||||
/** | /** |
public class TikaUtils { | public class TikaUtils { | ||||
public static String extractText(String ext, String filename, byte[] data, LuceneService service, String path, LuceneService.Indexer indexer) { | |||||
public static String extractText(String ext, String filename, InputStream is, LuceneService service, String path, LuceneService.Indexer indexer) { | |||||
Tika tika = new Tika(); | Tika tika = new Tika(); | ||||
String fileType = tika.detect(filename); | String fileType = tika.detect(filename); | ||||
try (InputStream is = new ByteArrayInputStream(data)) { | |||||
try { | |||||
Logger.getLogger(TikaUtils.class.getName()).info("Tika parsing " + filename); | Logger.getLogger(TikaUtils.class.getName()).info("Tika parsing " + filename); | ||||
if (isArchive(filename, ext)) { | if (isArchive(filename, ext)) { | ||||
return extractTextFromArchive(ext, filename, data, service,path, indexer); | |||||
return extractTextFromArchive(ext, filename, is, service, path, indexer); | |||||
} | } | ||||
return tika.parseToString(is); | return tika.parseToString(is); | ||||
} catch (IOException ex) { | |||||
Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, ex); | |||||
return ""; | |||||
} catch (Throwable tex) { | } catch (Throwable tex) { | ||||
Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, tex); | Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, tex); | ||||
return ""; | return ""; | ||||
} | } | ||||
} | } | ||||
private static String extractTextFromArchive(String ext, String filename, byte[] data, LuceneService service, String path, LuceneService.Indexer indexer) { | |||||
Logger.getLogger(TikaUtils.class.getName()).info("Tika zip parsing " + filename + " " + data.length); | |||||
try (InputStream is = new ByteArrayInputStream(data)) { | |||||
try (ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is)) { | |||||
ArchiveEntry nextEntry; | |||||
while ((nextEntry = in.getNextEntry()) != null) { | |||||
String archiveExt = null; | |||||
String name = nextEntry.getName().toLowerCase(); | |||||
if (name.indexOf('.') > -1) { | |||||
archiveExt = name.substring(name.lastIndexOf('.') + 1); | |||||
} | |||||
name = filename + "/" + name; | |||||
Logger.getLogger(TikaUtils.class.getName()).info("Tika zip parsing " + name); | |||||
if (!nextEntry.isDirectory()) { | |||||
try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { | |||||
IOUtils.copy(in, bos); | |||||
bos.flush(); | |||||
String result = service.getEncodedString(bos.toByteArray(), archiveExt); | |||||
if (result == null && service.useTika(ext)) { | |||||
result = extractText(archiveExt, path+"/"+nextEntry.getName(), bos.toByteArray(), service, path+"/"+nextEntry.getName(), indexer); | |||||
} | |||||
if (result!=null) { | |||||
indexer.index(path+"/"+nextEntry.getName(), result); | |||||
Logger.getLogger(TikaUtils.class.getName()).info("Tika zip extract " + name + " " + result.length()); | |||||
} | |||||
private static String extractTextFromArchive(String ext, String filename, InputStream is, LuceneService service, String path, LuceneService.Indexer indexer) { | |||||
Logger.getLogger(TikaUtils.class.getName()).info("Tika zip parsing " + filename + " "); | |||||
try (ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is)) { | |||||
ArchiveEntry nextEntry; | |||||
while ((nextEntry = in.getNextEntry()) != null) { | |||||
String archiveExt = null; | |||||
String name = nextEntry.getName().toLowerCase(); | |||||
if (name.indexOf('.') > -1) { | |||||
archiveExt = name.substring(name.lastIndexOf('.') + 1); | |||||
} | |||||
name = filename + "/" + name; | |||||
Logger.getLogger(TikaUtils.class.getName()).info("Tika zip parsing " + name); | |||||
if (!nextEntry.isDirectory()) { | |||||
try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { | |||||
IOUtils.copy(in, bos); | |||||
bos.flush(); | |||||
String result = service.getEncodedString(bos.toByteArray(), archiveExt); | |||||
if (result == null && service.useTika(ext)) { | |||||
result = extractText(archiveExt, path + "/" + nextEntry.getName(), new ByteArrayInputStream(bos.toByteArray()), service, path + "/" + nextEntry.getName(), indexer); | |||||
} | |||||
if (result != null) { | |||||
indexer.index(path + "/" + nextEntry.getName(), result); | |||||
Logger.getLogger(TikaUtils.class.getName()).info("Tika zip extract " + name + " " + result.length()); | |||||
} | } | ||||
} catch (IOException ex) { | |||||
Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, ex); | |||||
} | } | ||||
} | } | ||||
} catch (ArchiveException ex) { | |||||
Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, ex); | |||||
} | } | ||||
} catch (IOException ex) { | } catch (IOException ex) { | ||||
Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, ex); | Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, ex); | ||||
} catch (ArchiveException ex) { | |||||
Logger.getLogger(TikaUtils.class.getName()).log(Level.SEVERE, null, ex); | |||||
} | } | ||||
return null; | return null; | ||||
} | } |