summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Martin <paul@paulsputer.com>2016-04-16 22:35:32 +0100
committerPaul Martin <paul@paulsputer.com>2016-04-16 22:35:32 +0100
commiteecaad8b8e2c447429c31a01d49260ddd6b4ee03 (patch)
treef3a9090d85aadd4788e31c897b4a70c73721b2d1
parent56619e42f0a1e555c004374e7f2b13f744ab23f5 (diff)
downloadgitblit-1026-Lucene-Index-PDF.tar.gz
gitblit-1026-Lucene-Index-PDF.zip
Proof of concept #10261026-Lucene-Index-PDF
+ Update tika + Pull in Tika parsers + Remove chicken and egg scenario for FilestoreManager vs RepositoryManager
-rw-r--r--build.moxie3
-rw-r--r--src/main/java/com/gitblit/FederationClient.java2
-rw-r--r--src/main/java/com/gitblit/MigrateTickets.java2
-rw-r--r--src/main/java/com/gitblit/ReindexTickets.java2
-rw-r--r--src/main/java/com/gitblit/manager/FilestoreManager.java10
-rw-r--r--src/main/java/com/gitblit/manager/GitblitManager.java4
-rw-r--r--src/main/java/com/gitblit/manager/IFilestoreManager.java2
-rw-r--r--src/main/java/com/gitblit/manager/RepositoryManager.java8
-rw-r--r--src/main/java/com/gitblit/service/LuceneService.java35
-rw-r--r--src/main/java/com/gitblit/utils/SyndicationUtils.java26
-rw-r--r--src/main/java/com/gitblit/wicket/pages/FilestorePage.java2
-rw-r--r--src/test/java/com/gitblit/tests/BranchTicketServiceTest.java5
-rw-r--r--src/test/java/com/gitblit/tests/FileTicketServiceTest.java3
-rw-r--r--src/test/java/com/gitblit/tests/LuceneExecutorTest.java6
-rw-r--r--src/test/java/com/gitblit/tests/RedisTicketServiceTest.java2
-rw-r--r--src/test/java/com/gitblit/tests/UITicketTest.java2
16 files changed, 74 insertions, 40 deletions
diff --git a/build.moxie b/build.moxie
index 8eb2bbfd..022d574d 100644
--- a/build.moxie
+++ b/build.moxie
@@ -178,7 +178,8 @@ dependencies:
- compile 'commons-codec:commons-codec:1.7' :war
- compile 'redis.clients:jedis:2.6.2' :war
- compile 'ro.fortsoft.pf4j:pf4j:0.9.0' :war
-- compile 'org.apache.tika:tika-core:1.5' :war
+- compile 'org.apache.tika:tika-core:1.12' :war
+- compile 'org.apache.tika:tika-parsers:1.12' :war
- compile 'org.jsoup:jsoup:1.7.3' :war
- test 'junit'
# Dependencies for Selenium web page testing
diff --git a/src/main/java/com/gitblit/FederationClient.java b/src/main/java/com/gitblit/FederationClient.java
index 64ff0172..9b714dc5 100644
--- a/src/main/java/com/gitblit/FederationClient.java
+++ b/src/main/java/com/gitblit/FederationClient.java
@@ -98,7 +98,7 @@ public class FederationClient {
RuntimeManager runtime = new RuntimeManager(settings, xssFilter, baseFolder).start();
NoopNotificationManager notifications = new NoopNotificationManager().start();
UserManager users = new UserManager(runtime, null).start();
- RepositoryManager repositories = new RepositoryManager(runtime, null, users).start();
+ RepositoryManager repositories = new RepositoryManager(runtime, null, users, null).start();
FederationManager federation = new FederationManager(runtime, notifications, repositories).start();
IGitblit gitblit = new GitblitManager(null, null, runtime, null, notifications, users, null, repositories, null, federation, null);
diff --git a/src/main/java/com/gitblit/MigrateTickets.java b/src/main/java/com/gitblit/MigrateTickets.java
index b08228ef..035b010e 100644
--- a/src/main/java/com/gitblit/MigrateTickets.java
+++ b/src/main/java/com/gitblit/MigrateTickets.java
@@ -138,7 +138,7 @@ public class MigrateTickets {
XssFilter xssFilter = new AllowXssFilter();
IRuntimeManager runtimeManager = new RuntimeManager(settings, xssFilter, baseFolder).start();
- IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null).start();
+ IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null, null).start();
String inputServiceName = settings.getString(Keys.tickets.service, BranchTicketService.class.getSimpleName());
if (StringUtils.isEmpty(inputServiceName)) {
diff --git a/src/main/java/com/gitblit/ReindexTickets.java b/src/main/java/com/gitblit/ReindexTickets.java
index 858436af..10b53e54 100644
--- a/src/main/java/com/gitblit/ReindexTickets.java
+++ b/src/main/java/com/gitblit/ReindexTickets.java
@@ -130,7 +130,7 @@ public class ReindexTickets {
XssFilter xssFilter = new AllowXssFilter();
IRuntimeManager runtimeManager = new RuntimeManager(settings, xssFilter, baseFolder).start();
- IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null).start();
+ IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null, null).start();
String serviceName = settings.getString(Keys.tickets.service, BranchTicketService.class.getSimpleName());
if (StringUtils.isEmpty(serviceName)) {
diff --git a/src/main/java/com/gitblit/manager/FilestoreManager.java b/src/main/java/com/gitblit/manager/FilestoreManager.java
index 11108557..6ed7c929 100644
--- a/src/main/java/com/gitblit/manager/FilestoreManager.java
+++ b/src/main/java/com/gitblit/manager/FilestoreManager.java
@@ -78,8 +78,6 @@ public class FilestoreManager implements IFilestoreManager {
private final IRuntimeManager runtimeManager;
- private final IRepositoryManager repositoryManager;
-
private final IStoredSettings settings;
public static final int UNDEFINED_SIZE = -1;
@@ -94,11 +92,8 @@ public class FilestoreManager implements IFilestoreManager {
@Inject
- FilestoreManager(
- IRuntimeManager runtimeManager,
- IRepositoryManager repositoryManager) {
+ public FilestoreManager(IRuntimeManager runtimeManager) {
this.runtimeManager = runtimeManager;
- this.repositoryManager = repositoryManager;
this.settings = runtimeManager.getSettings();
}
@@ -328,9 +323,8 @@ public class FilestoreManager implements IFilestoreManager {
}
@Override
- public List<FilestoreModel> getAllObjects(UserModel user) {
+ public List<FilestoreModel> getAllObjects(List<RepositoryModel> viewableRepositories) {
- final List<RepositoryModel> viewableRepositories = repositoryManager.getRepositoryModels(user);
List<String> viewableRepositoryNames = new ArrayList<String>(viewableRepositories.size());
for (RepositoryModel repository : viewableRepositories) {
diff --git a/src/main/java/com/gitblit/manager/GitblitManager.java b/src/main/java/com/gitblit/manager/GitblitManager.java
index 85d5c19f..bda7a1cb 100644
--- a/src/main/java/com/gitblit/manager/GitblitManager.java
+++ b/src/main/java/com/gitblit/manager/GitblitManager.java
@@ -1274,8 +1274,8 @@ public class GitblitManager implements IGitblit {
}
@Override
- public List<FilestoreModel> getAllObjects(UserModel user) {
- return filestoreManager.getAllObjects(user);
+ public List<FilestoreModel> getAllObjects(List<RepositoryModel> viewableRepositories) {
+ return filestoreManager.getAllObjects(viewableRepositories);
}
@Override
diff --git a/src/main/java/com/gitblit/manager/IFilestoreManager.java b/src/main/java/com/gitblit/manager/IFilestoreManager.java
index 454331a3..7c3f3c6a 100644
--- a/src/main/java/com/gitblit/manager/IFilestoreManager.java
+++ b/src/main/java/com/gitblit/manager/IFilestoreManager.java
@@ -37,7 +37,7 @@ public interface IFilestoreManager extends IManager {
FilestoreModel.Status downloadBlob(String oid, UserModel user, RepositoryModel repo, OutputStream streamOut );
- List<FilestoreModel> getAllObjects(UserModel user);
+ List<FilestoreModel> getAllObjects(List<RepositoryModel> viewableRepositories);
File getStorageFolder();
diff --git a/src/main/java/com/gitblit/manager/RepositoryManager.java b/src/main/java/com/gitblit/manager/RepositoryManager.java
index e2e4de68..8d1a6a71 100644
--- a/src/main/java/com/gitblit/manager/RepositoryManager.java
+++ b/src/main/java/com/gitblit/manager/RepositoryManager.java
@@ -123,6 +123,8 @@ public class RepositoryManager implements IRepositoryManager {
private final IPluginManager pluginManager;
private final IUserManager userManager;
+
+ private final IFilestoreManager filestoreManager;
private File repositoriesFolder;
@@ -136,12 +138,14 @@ public class RepositoryManager implements IRepositoryManager {
public RepositoryManager(
IRuntimeManager runtimeManager,
IPluginManager pluginManager,
- IUserManager userManager) {
+ IUserManager userManager,
+ IFilestoreManager filestoreManager) {
this.settings = runtimeManager.getSettings();
this.runtimeManager = runtimeManager;
this.pluginManager = pluginManager;
this.userManager = userManager;
+ this.filestoreManager = filestoreManager;
}
@Override
@@ -1866,7 +1870,7 @@ public class RepositoryManager implements IRepositoryManager {
}
protected void configureLuceneIndexing() {
- luceneExecutor = new LuceneService(settings, this);
+ luceneExecutor = new LuceneService(settings, this, filestoreManager);
String frequency = settings.getString(Keys.web.luceneFrequency, "2 mins");
int mins = TimeUtils.convertFrequencyToMinutes(frequency, 2);
scheduledExecutor.scheduleAtFixedRate(luceneExecutor, 1, mins, TimeUnit.MINUTES);
diff --git a/src/main/java/com/gitblit/service/LuceneService.java b/src/main/java/com/gitblit/service/LuceneService.java
index 097a39b2..62f7df79 100644
--- a/src/main/java/com/gitblit/service/LuceneService.java
+++ b/src/main/java/com/gitblit/service/LuceneService.java
@@ -19,6 +19,7 @@ import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
import java.io.ByteArrayOutputStream;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.MessageFormat;
@@ -66,6 +67,11 @@ import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.pdf.PDFParser;
+import org.apache.tika.sax.BodyContentHandler;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
@@ -85,8 +91,11 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.gitblit.Constants.SearchObjectType;
+import com.gitblit.GitBlit;
import com.gitblit.IStoredSettings;
import com.gitblit.Keys;
+import com.gitblit.manager.FilestoreManager;
+import com.gitblit.manager.IFilestoreManager;
import com.gitblit.manager.IRepositoryManager;
import com.gitblit.models.PathModel.PathChangeModel;
import com.gitblit.models.RefModel;
@@ -131,6 +140,8 @@ public class LuceneService implements Runnable {
private final IStoredSettings storedSettings;
private final IRepositoryManager repositoryManager;
+ private final IFilestoreManager filestoreManager;
+
private final File repositoriesFolder;
private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
@@ -141,10 +152,12 @@ public class LuceneService implements Runnable {
public LuceneService(
IStoredSettings settings,
- IRepositoryManager repositoryManager) {
+ IRepositoryManager repositoryManager,
+ IFilestoreManager filestoreManager) {
this.storedSettings = settings;
this.repositoryManager = repositoryManager;
+ this.filestoreManager = filestoreManager;
this.repositoriesFolder = repositoryManager.getRepositoriesFolder();
String exts = luceneIgnoreExtensions;
if (settings != null) {
@@ -540,7 +553,8 @@ public class LuceneService implements Runnable {
if (!paths.containsKey(path)) {
continue;
}
-
+//TODO: Figure out filestore oid the path - bit more involved than updating the index
+
// remove path from set
ObjectId blobId = paths.remove(path);
result.blobCount++;
@@ -677,9 +691,24 @@ public class LuceneService implements Runnable {
}
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
+ String str = "";
// read the blob content
- String str = JGitUtils.getStringContent(repository, commit.getTree(),
+ if (path.isFilestoreItem()) {
+ //Get file from filestore
+ BodyContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+ PDFParser parser = new PDFParser();
+
+ ParseContext parseContext = new ParseContext();
+ File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid());
+ FileInputStream inputstream = new FileInputStream(lfsFile);
+ parser.parse(inputstream, handler, metadata, parseContext);
+ str = handler.toString();
+ } else {
+ str = JGitUtils.getStringContent(repository, commit.getTree(),
path.path, encodings);
+ }
+
if (str != null) {
doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
writer.addDocument(doc);
diff --git a/src/main/java/com/gitblit/utils/SyndicationUtils.java b/src/main/java/com/gitblit/utils/SyndicationUtils.java
index 7afd0383..dfbca352 100644
--- a/src/main/java/com/gitblit/utils/SyndicationUtils.java
+++ b/src/main/java/com/gitblit/utils/SyndicationUtils.java
@@ -28,19 +28,19 @@ import com.gitblit.Constants;
import com.gitblit.Constants.FeedObjectType;
import com.gitblit.GitBlitException;
import com.gitblit.models.FeedEntryModel;
-import com.sun.syndication.feed.synd.SyndCategory;
-import com.sun.syndication.feed.synd.SyndCategoryImpl;
-import com.sun.syndication.feed.synd.SyndContent;
-import com.sun.syndication.feed.synd.SyndContentImpl;
-import com.sun.syndication.feed.synd.SyndEntry;
-import com.sun.syndication.feed.synd.SyndEntryImpl;
-import com.sun.syndication.feed.synd.SyndFeed;
-import com.sun.syndication.feed.synd.SyndFeedImpl;
-import com.sun.syndication.feed.synd.SyndImageImpl;
-import com.sun.syndication.io.FeedException;
-import com.sun.syndication.io.SyndFeedInput;
-import com.sun.syndication.io.SyndFeedOutput;
-import com.sun.syndication.io.XmlReader;
+import com.rometools.rome.feed.synd.SyndCategory;
+import com.rometools.rome.feed.synd.SyndCategoryImpl;
+import com.rometools.rome.feed.synd.SyndContent;
+import com.rometools.rome.feed.synd.SyndContentImpl;
+import com.rometools.rome.feed.synd.SyndEntry;
+import com.rometools.rome.feed.synd.SyndEntryImpl;
+import com.rometools.rome.feed.synd.SyndFeed;
+import com.rometools.rome.feed.synd.SyndFeedImpl;
+import com.rometools.rome.feed.synd.SyndImageImpl;
+import com.rometools.rome.io.FeedException;
+import com.rometools.rome.io.SyndFeedInput;
+import com.rometools.rome.io.SyndFeedOutput;
+import com.rometools.rome.io.XmlReader;
/**
* Utility class for RSS feeds.
diff --git a/src/main/java/com/gitblit/wicket/pages/FilestorePage.java b/src/main/java/com/gitblit/wicket/pages/FilestorePage.java
index 7130f6c2..cf9ee251 100644
--- a/src/main/java/com/gitblit/wicket/pages/FilestorePage.java
+++ b/src/main/java/com/gitblit/wicket/pages/FilestorePage.java
@@ -67,7 +67,7 @@ public class FilestorePage extends RootPage {
final UserModel user = (GitBlitWebSession.get().getUser() == null) ? UserModel.ANONYMOUS : GitBlitWebSession.get().getUser();
final long nBytesUsed = app().filestore().getFilestoreUsedByteCount();
final long nBytesAvailable = app().filestore().getFilestoreAvailableByteCount();
- List<FilestoreModel> files = app().filestore().getAllObjects(user);
+ List<FilestoreModel> files = app().filestore().getAllObjects(app().repositories().getRepositoryModels(user));
if (files == null) {
files = new ArrayList<FilestoreModel>();
diff --git a/src/test/java/com/gitblit/tests/BranchTicketServiceTest.java b/src/test/java/com/gitblit/tests/BranchTicketServiceTest.java
index 0a5de196..248ee16e 100644
--- a/src/test/java/com/gitblit/tests/BranchTicketServiceTest.java
+++ b/src/test/java/com/gitblit/tests/BranchTicketServiceTest.java
@@ -16,6 +16,8 @@
package com.gitblit.tests;
import com.gitblit.IStoredSettings;
+import com.gitblit.manager.FilestoreManager;
+import com.gitblit.manager.IFilestoreManager;
import com.gitblit.manager.INotificationManager;
import com.gitblit.manager.IPluginManager;
import com.gitblit.manager.IRepositoryManager;
@@ -57,7 +59,8 @@ public class BranchTicketServiceTest extends TicketServiceTest {
IPluginManager pluginManager = new PluginManager(runtimeManager).start();
INotificationManager notificationManager = new NotificationManager(settings).start();
IUserManager userManager = new UserManager(runtimeManager, pluginManager).start();
- IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start();
+ IRepositoryManager repositoryManager = new RepositoryManager(
+ runtimeManager, pluginManager, userManager, null).start();
BranchTicketService service = new BranchTicketService(
runtimeManager,
diff --git a/src/test/java/com/gitblit/tests/FileTicketServiceTest.java b/src/test/java/com/gitblit/tests/FileTicketServiceTest.java
index 1fb2eed9..38523843 100644
--- a/src/test/java/com/gitblit/tests/FileTicketServiceTest.java
+++ b/src/test/java/com/gitblit/tests/FileTicketServiceTest.java
@@ -56,7 +56,8 @@ public class FileTicketServiceTest extends TicketServiceTest {
IPluginManager pluginManager = new PluginManager(runtimeManager).start();
INotificationManager notificationManager = new NotificationManager(settings).start();
IUserManager userManager = new UserManager(runtimeManager, pluginManager).start();
- IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start();
+ IRepositoryManager repositoryManager = new RepositoryManager(
+ runtimeManager, pluginManager, userManager, null).start();
FileTicketService service = new FileTicketService(
runtimeManager,
diff --git a/src/test/java/com/gitblit/tests/LuceneExecutorTest.java b/src/test/java/com/gitblit/tests/LuceneExecutorTest.java
index a8358b99..b9ecbbbd 100644
--- a/src/test/java/com/gitblit/tests/LuceneExecutorTest.java
+++ b/src/test/java/com/gitblit/tests/LuceneExecutorTest.java
@@ -24,6 +24,7 @@ import org.junit.Before;
import org.junit.Test;
import com.gitblit.Keys;
+import com.gitblit.manager.FilestoreManager;
import com.gitblit.manager.RepositoryManager;
import com.gitblit.manager.RuntimeManager;
import com.gitblit.manager.UserManager;
@@ -53,8 +54,9 @@ public class LuceneExecutorTest extends GitblitUnitTest {
XssFilter xssFilter = new AllowXssFilter();
RuntimeManager runtime = new RuntimeManager(settings, xssFilter, GitBlitSuite.BASEFOLDER).start();
UserManager users = new UserManager(runtime, null).start();
- RepositoryManager repos = new RepositoryManager(runtime, null, users);
- return new LuceneService(settings, repos);
+ RepositoryManager repos = new RepositoryManager(runtime, null, users, null);
+ //TODO: May need filestore
+ return new LuceneService(settings, repos, null);
}
private RepositoryModel newRepositoryModel(Repository repository) {
diff --git a/src/test/java/com/gitblit/tests/RedisTicketServiceTest.java b/src/test/java/com/gitblit/tests/RedisTicketServiceTest.java
index 48011ade..6bfab43d 100644
--- a/src/test/java/com/gitblit/tests/RedisTicketServiceTest.java
+++ b/src/test/java/com/gitblit/tests/RedisTicketServiceTest.java
@@ -64,7 +64,7 @@ public class RedisTicketServiceTest extends TicketServiceTest {
IPluginManager pluginManager = new PluginManager(runtimeManager).start();
INotificationManager notificationManager = new NotificationManager(settings).start();
IUserManager userManager = new UserManager(runtimeManager, pluginManager).start();
- IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start();
+ IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager, null).start();
RedisTicketService service = new RedisTicketService(
runtimeManager,
diff --git a/src/test/java/com/gitblit/tests/UITicketTest.java b/src/test/java/com/gitblit/tests/UITicketTest.java
index 54aa1e1e..cb61b3e8 100644
--- a/src/test/java/com/gitblit/tests/UITicketTest.java
+++ b/src/test/java/com/gitblit/tests/UITicketTest.java
@@ -81,7 +81,7 @@ public class UITicketTest extends GitblitUnitTest {
IPluginManager pluginManager = new PluginManager(runtimeManager).start();
INotificationManager notificationManager = new NotificationManager(settings).start();
IUserManager userManager = new UserManager(runtimeManager, pluginManager).start();
- IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start();
+ IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager, null).start();
BranchTicketService service = new BranchTicketService(
runtimeManager,