diff options
author | Paul Martin <paul@paulsputer.com> | 2016-04-16 22:35:32 +0100 |
---|---|---|
committer | Paul Martin <paul@paulsputer.com> | 2016-04-16 22:35:32 +0100 |
commit | eecaad8b8e2c447429c31a01d49260ddd6b4ee03 (patch) | |
tree | f3a9090d85aadd4788e31c897b4a70c73721b2d1 | |
parent | 56619e42f0a1e555c004374e7f2b13f744ab23f5 (diff) | |
download | gitblit-1026-Lucene-Index-PDF.tar.gz gitblit-1026-Lucene-Index-PDF.zip |
Proof of concept #10261026-Lucene-Index-PDF
+ Update tika
+ Pull in Tika parsers
+ Remove chicken and egg scenario for FilestoreManager vs
RepositoryManager
16 files changed, 74 insertions, 40 deletions
diff --git a/build.moxie b/build.moxie index 8eb2bbfd..022d574d 100644 --- a/build.moxie +++ b/build.moxie @@ -178,7 +178,8 @@ dependencies: - compile 'commons-codec:commons-codec:1.7' :war - compile 'redis.clients:jedis:2.6.2' :war - compile 'ro.fortsoft.pf4j:pf4j:0.9.0' :war -- compile 'org.apache.tika:tika-core:1.5' :war +- compile 'org.apache.tika:tika-core:1.12' :war +- compile 'org.apache.tika:tika-parsers:1.12' :war - compile 'org.jsoup:jsoup:1.7.3' :war - test 'junit' # Dependencies for Selenium web page testing diff --git a/src/main/java/com/gitblit/FederationClient.java b/src/main/java/com/gitblit/FederationClient.java index 64ff0172..9b714dc5 100644 --- a/src/main/java/com/gitblit/FederationClient.java +++ b/src/main/java/com/gitblit/FederationClient.java @@ -98,7 +98,7 @@ public class FederationClient { RuntimeManager runtime = new RuntimeManager(settings, xssFilter, baseFolder).start(); NoopNotificationManager notifications = new NoopNotificationManager().start(); UserManager users = new UserManager(runtime, null).start(); - RepositoryManager repositories = new RepositoryManager(runtime, null, users).start(); + RepositoryManager repositories = new RepositoryManager(runtime, null, users, null).start(); FederationManager federation = new FederationManager(runtime, notifications, repositories).start(); IGitblit gitblit = new GitblitManager(null, null, runtime, null, notifications, users, null, repositories, null, federation, null); diff --git a/src/main/java/com/gitblit/MigrateTickets.java b/src/main/java/com/gitblit/MigrateTickets.java index b08228ef..035b010e 100644 --- a/src/main/java/com/gitblit/MigrateTickets.java +++ b/src/main/java/com/gitblit/MigrateTickets.java @@ -138,7 +138,7 @@ public class MigrateTickets { XssFilter xssFilter = new AllowXssFilter(); IRuntimeManager runtimeManager = new RuntimeManager(settings, xssFilter, baseFolder).start(); - IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null).start(); + IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null, null).start(); String inputServiceName = settings.getString(Keys.tickets.service, BranchTicketService.class.getSimpleName()); if (StringUtils.isEmpty(inputServiceName)) { diff --git a/src/main/java/com/gitblit/ReindexTickets.java b/src/main/java/com/gitblit/ReindexTickets.java index 858436af..10b53e54 100644 --- a/src/main/java/com/gitblit/ReindexTickets.java +++ b/src/main/java/com/gitblit/ReindexTickets.java @@ -130,7 +130,7 @@ public class ReindexTickets { XssFilter xssFilter = new AllowXssFilter(); IRuntimeManager runtimeManager = new RuntimeManager(settings, xssFilter, baseFolder).start(); - IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null).start(); + IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null, null).start(); String serviceName = settings.getString(Keys.tickets.service, BranchTicketService.class.getSimpleName()); if (StringUtils.isEmpty(serviceName)) { diff --git a/src/main/java/com/gitblit/manager/FilestoreManager.java b/src/main/java/com/gitblit/manager/FilestoreManager.java index 11108557..6ed7c929 100644 --- a/src/main/java/com/gitblit/manager/FilestoreManager.java +++ b/src/main/java/com/gitblit/manager/FilestoreManager.java @@ -78,8 +78,6 @@ public class FilestoreManager implements IFilestoreManager { private final IRuntimeManager runtimeManager; - private final IRepositoryManager repositoryManager; - private final IStoredSettings settings; public static final int UNDEFINED_SIZE = -1; @@ -94,11 +92,8 @@ public class FilestoreManager implements IFilestoreManager { @Inject - FilestoreManager( - IRuntimeManager runtimeManager, - IRepositoryManager repositoryManager) { + public FilestoreManager(IRuntimeManager runtimeManager) { this.runtimeManager = runtimeManager; - this.repositoryManager = repositoryManager; this.settings = runtimeManager.getSettings(); } @@ -328,9 +323,8 @@ public class FilestoreManager implements IFilestoreManager { } @Override - public List<FilestoreModel> getAllObjects(UserModel user) { + public List<FilestoreModel> getAllObjects(List<RepositoryModel> viewableRepositories) { - final List<RepositoryModel> viewableRepositories = repositoryManager.getRepositoryModels(user); List<String> viewableRepositoryNames = new ArrayList<String>(viewableRepositories.size()); for (RepositoryModel repository : viewableRepositories) { diff --git a/src/main/java/com/gitblit/manager/GitblitManager.java b/src/main/java/com/gitblit/manager/GitblitManager.java index 85d5c19f..bda7a1cb 100644 --- a/src/main/java/com/gitblit/manager/GitblitManager.java +++ b/src/main/java/com/gitblit/manager/GitblitManager.java @@ -1274,8 +1274,8 @@ public class GitblitManager implements IGitblit { } @Override - public List<FilestoreModel> getAllObjects(UserModel user) { - return filestoreManager.getAllObjects(user); + public List<FilestoreModel> getAllObjects(List<RepositoryModel> viewableRepositories) { + return filestoreManager.getAllObjects(viewableRepositories); } @Override diff --git a/src/main/java/com/gitblit/manager/IFilestoreManager.java b/src/main/java/com/gitblit/manager/IFilestoreManager.java index 454331a3..7c3f3c6a 100644 --- a/src/main/java/com/gitblit/manager/IFilestoreManager.java +++ b/src/main/java/com/gitblit/manager/IFilestoreManager.java @@ -37,7 +37,7 @@ public interface IFilestoreManager extends IManager { FilestoreModel.Status downloadBlob(String oid, UserModel user, RepositoryModel repo, OutputStream streamOut ); - List<FilestoreModel> getAllObjects(UserModel user); + List<FilestoreModel> getAllObjects(List<RepositoryModel> viewableRepositories); File getStorageFolder(); diff --git a/src/main/java/com/gitblit/manager/RepositoryManager.java b/src/main/java/com/gitblit/manager/RepositoryManager.java index e2e4de68..8d1a6a71 100644 --- a/src/main/java/com/gitblit/manager/RepositoryManager.java +++ b/src/main/java/com/gitblit/manager/RepositoryManager.java @@ -123,6 +123,8 @@ public class RepositoryManager implements IRepositoryManager { private final IPluginManager pluginManager; private final IUserManager userManager; + + private final IFilestoreManager filestoreManager; private File repositoriesFolder; @@ -136,12 +138,14 @@ public class RepositoryManager implements IRepositoryManager { public RepositoryManager( IRuntimeManager runtimeManager, IPluginManager pluginManager, - IUserManager userManager) { + IUserManager userManager, + IFilestoreManager filestoreManager) { this.settings = runtimeManager.getSettings(); this.runtimeManager = runtimeManager; this.pluginManager = pluginManager; this.userManager = userManager; + this.filestoreManager = filestoreManager; } @Override @@ -1866,7 +1870,7 @@ public class RepositoryManager implements IRepositoryManager { } protected void configureLuceneIndexing() { - luceneExecutor = new LuceneService(settings, this); + luceneExecutor = new LuceneService(settings, this, filestoreManager); String frequency = settings.getString(Keys.web.luceneFrequency, "2 mins"); int mins = TimeUtils.convertFrequencyToMinutes(frequency, 2); scheduledExecutor.scheduleAtFixedRate(luceneExecutor, 1, mins, TimeUnit.MINUTES); diff --git a/src/main/java/com/gitblit/service/LuceneService.java b/src/main/java/com/gitblit/service/LuceneService.java index 097a39b2..62f7df79 100644 --- a/src/main/java/com/gitblit/service/LuceneService.java +++ b/src/main/java/com/gitblit/service/LuceneService.java @@ -19,6 +19,7 @@ import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF; import java.io.ByteArrayOutputStream;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.MessageFormat;
@@ -66,6 +67,11 @@ import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.pdf.PDFParser;
+import org.apache.tika.sax.BodyContentHandler;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
@@ -85,8 +91,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory;
import com.gitblit.Constants.SearchObjectType;
+import com.gitblit.GitBlit;
import com.gitblit.IStoredSettings;
import com.gitblit.Keys;
+import com.gitblit.manager.FilestoreManager;
+import com.gitblit.manager.IFilestoreManager;
import com.gitblit.manager.IRepositoryManager;
import com.gitblit.models.PathModel.PathChangeModel;
import com.gitblit.models.RefModel;
@@ -131,6 +140,8 @@ public class LuceneService implements Runnable { private final IStoredSettings storedSettings;
private final IRepositoryManager repositoryManager;
+ private final IFilestoreManager filestoreManager;
+
private final File repositoriesFolder;
private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
@@ -141,10 +152,12 @@ public class LuceneService implements Runnable { public LuceneService(
IStoredSettings settings,
- IRepositoryManager repositoryManager) {
+ IRepositoryManager repositoryManager,
+ IFilestoreManager filestoreManager) {
this.storedSettings = settings;
this.repositoryManager = repositoryManager;
+ this.filestoreManager = filestoreManager;
this.repositoriesFolder = repositoryManager.getRepositoriesFolder();
String exts = luceneIgnoreExtensions;
if (settings != null) {
@@ -540,7 +553,8 @@ public class LuceneService implements Runnable { if (!paths.containsKey(path)) {
continue;
}
-
+//TODO: Figure out filestore oid the path - bit more involved than updating the index
+
// remove path from set
ObjectId blobId = paths.remove(path);
result.blobCount++;
@@ -677,9 +691,24 @@ public class LuceneService implements Runnable { }
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
+ String str = "";
// read the blob content
- String str = JGitUtils.getStringContent(repository, commit.getTree(),
+ if (path.isFilestoreItem()) {
+ //Get file from filestore
+ BodyContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+ PDFParser parser = new PDFParser();
+
+ ParseContext parseContext = new ParseContext();
+ File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid());
+ FileInputStream inputstream = new FileInputStream(lfsFile);
+ parser.parse(inputstream, handler, metadata, parseContext);
+ str = handler.toString();
+ } else {
+ str = JGitUtils.getStringContent(repository, commit.getTree(),
path.path, encodings);
+ }
+
if (str != null) {
doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
writer.addDocument(doc);
diff --git a/src/main/java/com/gitblit/utils/SyndicationUtils.java b/src/main/java/com/gitblit/utils/SyndicationUtils.java index 7afd0383..dfbca352 100644 --- a/src/main/java/com/gitblit/utils/SyndicationUtils.java +++ b/src/main/java/com/gitblit/utils/SyndicationUtils.java @@ -28,19 +28,19 @@ import com.gitblit.Constants; import com.gitblit.Constants.FeedObjectType;
import com.gitblit.GitBlitException;
import com.gitblit.models.FeedEntryModel;
-import com.sun.syndication.feed.synd.SyndCategory;
-import com.sun.syndication.feed.synd.SyndCategoryImpl;
-import com.sun.syndication.feed.synd.SyndContent;
-import com.sun.syndication.feed.synd.SyndContentImpl;
-import com.sun.syndication.feed.synd.SyndEntry;
-import com.sun.syndication.feed.synd.SyndEntryImpl;
-import com.sun.syndication.feed.synd.SyndFeed;
-import com.sun.syndication.feed.synd.SyndFeedImpl;
-import com.sun.syndication.feed.synd.SyndImageImpl;
-import com.sun.syndication.io.FeedException;
-import com.sun.syndication.io.SyndFeedInput;
-import com.sun.syndication.io.SyndFeedOutput;
-import com.sun.syndication.io.XmlReader;
+import com.rometools.rome.feed.synd.SyndCategory;
+import com.rometools.rome.feed.synd.SyndCategoryImpl;
+import com.rometools.rome.feed.synd.SyndContent;
+import com.rometools.rome.feed.synd.SyndContentImpl;
+import com.rometools.rome.feed.synd.SyndEntry;
+import com.rometools.rome.feed.synd.SyndEntryImpl;
+import com.rometools.rome.feed.synd.SyndFeed;
+import com.rometools.rome.feed.synd.SyndFeedImpl;
+import com.rometools.rome.feed.synd.SyndImageImpl;
+import com.rometools.rome.io.FeedException;
+import com.rometools.rome.io.SyndFeedInput;
+import com.rometools.rome.io.SyndFeedOutput;
+import com.rometools.rome.io.XmlReader;
/**
* Utility class for RSS feeds.
diff --git a/src/main/java/com/gitblit/wicket/pages/FilestorePage.java b/src/main/java/com/gitblit/wicket/pages/FilestorePage.java index 7130f6c2..cf9ee251 100644 --- a/src/main/java/com/gitblit/wicket/pages/FilestorePage.java +++ b/src/main/java/com/gitblit/wicket/pages/FilestorePage.java @@ -67,7 +67,7 @@ public class FilestorePage extends RootPage { final UserModel user = (GitBlitWebSession.get().getUser() == null) ? UserModel.ANONYMOUS : GitBlitWebSession.get().getUser(); final long nBytesUsed = app().filestore().getFilestoreUsedByteCount(); final long nBytesAvailable = app().filestore().getFilestoreAvailableByteCount(); - List<FilestoreModel> files = app().filestore().getAllObjects(user); + List<FilestoreModel> files = app().filestore().getAllObjects(app().repositories().getRepositoryModels(user)); if (files == null) { files = new ArrayList<FilestoreModel>(); diff --git a/src/test/java/com/gitblit/tests/BranchTicketServiceTest.java b/src/test/java/com/gitblit/tests/BranchTicketServiceTest.java index 0a5de196..248ee16e 100644 --- a/src/test/java/com/gitblit/tests/BranchTicketServiceTest.java +++ b/src/test/java/com/gitblit/tests/BranchTicketServiceTest.java @@ -16,6 +16,8 @@ package com.gitblit.tests; import com.gitblit.IStoredSettings; +import com.gitblit.manager.FilestoreManager; +import com.gitblit.manager.IFilestoreManager; import com.gitblit.manager.INotificationManager; import com.gitblit.manager.IPluginManager; import com.gitblit.manager.IRepositoryManager; @@ -57,7 +59,8 @@ public class BranchTicketServiceTest extends TicketServiceTest { IPluginManager pluginManager = new PluginManager(runtimeManager).start(); INotificationManager notificationManager = new NotificationManager(settings).start(); IUserManager userManager = new UserManager(runtimeManager, pluginManager).start(); - IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start(); + IRepositoryManager repositoryManager = new RepositoryManager( + runtimeManager, pluginManager, userManager, null).start(); BranchTicketService service = new BranchTicketService( runtimeManager, diff --git a/src/test/java/com/gitblit/tests/FileTicketServiceTest.java b/src/test/java/com/gitblit/tests/FileTicketServiceTest.java index 1fb2eed9..38523843 100644 --- a/src/test/java/com/gitblit/tests/FileTicketServiceTest.java +++ b/src/test/java/com/gitblit/tests/FileTicketServiceTest.java @@ -56,7 +56,8 @@ public class FileTicketServiceTest extends TicketServiceTest { IPluginManager pluginManager = new PluginManager(runtimeManager).start(); INotificationManager notificationManager = new NotificationManager(settings).start(); IUserManager userManager = new UserManager(runtimeManager, pluginManager).start(); - IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start(); + IRepositoryManager repositoryManager = new RepositoryManager( + runtimeManager, pluginManager, userManager, null).start(); FileTicketService service = new FileTicketService( runtimeManager, diff --git a/src/test/java/com/gitblit/tests/LuceneExecutorTest.java b/src/test/java/com/gitblit/tests/LuceneExecutorTest.java index a8358b99..b9ecbbbd 100644 --- a/src/test/java/com/gitblit/tests/LuceneExecutorTest.java +++ b/src/test/java/com/gitblit/tests/LuceneExecutorTest.java @@ -24,6 +24,7 @@ import org.junit.Before; import org.junit.Test;
import com.gitblit.Keys;
+import com.gitblit.manager.FilestoreManager;
import com.gitblit.manager.RepositoryManager;
import com.gitblit.manager.RuntimeManager;
import com.gitblit.manager.UserManager;
@@ -53,8 +54,9 @@ public class LuceneExecutorTest extends GitblitUnitTest { XssFilter xssFilter = new AllowXssFilter();
RuntimeManager runtime = new RuntimeManager(settings, xssFilter, GitBlitSuite.BASEFOLDER).start();
UserManager users = new UserManager(runtime, null).start();
- RepositoryManager repos = new RepositoryManager(runtime, null, users);
- return new LuceneService(settings, repos);
+ RepositoryManager repos = new RepositoryManager(runtime, null, users, null);
+ //TODO: May need filestore
+ return new LuceneService(settings, repos, null);
}
private RepositoryModel newRepositoryModel(Repository repository) {
diff --git a/src/test/java/com/gitblit/tests/RedisTicketServiceTest.java b/src/test/java/com/gitblit/tests/RedisTicketServiceTest.java index 48011ade..6bfab43d 100644 --- a/src/test/java/com/gitblit/tests/RedisTicketServiceTest.java +++ b/src/test/java/com/gitblit/tests/RedisTicketServiceTest.java @@ -64,7 +64,7 @@ public class RedisTicketServiceTest extends TicketServiceTest { IPluginManager pluginManager = new PluginManager(runtimeManager).start(); INotificationManager notificationManager = new NotificationManager(settings).start(); IUserManager userManager = new UserManager(runtimeManager, pluginManager).start(); - IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start(); + IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager, null).start(); RedisTicketService service = new RedisTicketService( runtimeManager, diff --git a/src/test/java/com/gitblit/tests/UITicketTest.java b/src/test/java/com/gitblit/tests/UITicketTest.java index 54aa1e1e..cb61b3e8 100644 --- a/src/test/java/com/gitblit/tests/UITicketTest.java +++ b/src/test/java/com/gitblit/tests/UITicketTest.java @@ -81,7 +81,7 @@ public class UITicketTest extends GitblitUnitTest { IPluginManager pluginManager = new PluginManager(runtimeManager).start(); INotificationManager notificationManager = new NotificationManager(settings).start(); IUserManager userManager = new UserManager(runtimeManager, pluginManager).start(); - IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start(); + IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager, null).start(); BranchTicketService service = new BranchTicketService( runtimeManager, |