From eecaad8b8e2c447429c31a01d49260ddd6b4ee03 Mon Sep 17 00:00:00 2001 From: Paul Martin Date: Sat, 16 Apr 2016 22:35:32 +0100 Subject: [PATCH] Proof of concept #1026 + Update tika + Pull in Tika parsers + Remove chicken and egg scenario for FilestoreManager vs RepositoryManager --- build.moxie | 3 +- .../java/com/gitblit/FederationClient.java | 2 +- src/main/java/com/gitblit/MigrateTickets.java | 2 +- src/main/java/com/gitblit/ReindexTickets.java | 2 +- .../com/gitblit/manager/FilestoreManager.java | 10 ++---- .../com/gitblit/manager/GitblitManager.java | 4 +-- .../gitblit/manager/IFilestoreManager.java | 2 +- .../gitblit/manager/RepositoryManager.java | 8 +++-- .../com/gitblit/service/LuceneService.java | 35 +++++++++++++++++-- .../com/gitblit/utils/SyndicationUtils.java | 26 +++++++------- .../gitblit/wicket/pages/FilestorePage.java | 2 +- .../tests/BranchTicketServiceTest.java | 5 ++- .../gitblit/tests/FileTicketServiceTest.java | 3 +- .../com/gitblit/tests/LuceneExecutorTest.java | 6 ++-- .../gitblit/tests/RedisTicketServiceTest.java | 2 +- .../java/com/gitblit/tests/UITicketTest.java | 2 +- 16 files changed, 74 insertions(+), 40 deletions(-) diff --git a/build.moxie b/build.moxie index 8eb2bbfd..022d574d 100644 --- a/build.moxie +++ b/build.moxie @@ -178,7 +178,8 @@ dependencies: - compile 'commons-codec:commons-codec:1.7' :war - compile 'redis.clients:jedis:2.6.2' :war - compile 'ro.fortsoft.pf4j:pf4j:0.9.0' :war -- compile 'org.apache.tika:tika-core:1.5' :war +- compile 'org.apache.tika:tika-core:1.12' :war +- compile 'org.apache.tika:tika-parsers:1.12' :war - compile 'org.jsoup:jsoup:1.7.3' :war - test 'junit' # Dependencies for Selenium web page testing diff --git a/src/main/java/com/gitblit/FederationClient.java b/src/main/java/com/gitblit/FederationClient.java index 64ff0172..9b714dc5 100644 --- a/src/main/java/com/gitblit/FederationClient.java +++ b/src/main/java/com/gitblit/FederationClient.java @@ -98,7 +98,7 @@ public class FederationClient { RuntimeManager runtime = new RuntimeManager(settings, xssFilter, baseFolder).start(); NoopNotificationManager notifications = new NoopNotificationManager().start(); UserManager users = new UserManager(runtime, null).start(); - RepositoryManager repositories = new RepositoryManager(runtime, null, users).start(); + RepositoryManager repositories = new RepositoryManager(runtime, null, users, null).start(); FederationManager federation = new FederationManager(runtime, notifications, repositories).start(); IGitblit gitblit = new GitblitManager(null, null, runtime, null, notifications, users, null, repositories, null, federation, null); diff --git a/src/main/java/com/gitblit/MigrateTickets.java b/src/main/java/com/gitblit/MigrateTickets.java index b08228ef..035b010e 100644 --- a/src/main/java/com/gitblit/MigrateTickets.java +++ b/src/main/java/com/gitblit/MigrateTickets.java @@ -138,7 +138,7 @@ public class MigrateTickets { XssFilter xssFilter = new AllowXssFilter(); IRuntimeManager runtimeManager = new RuntimeManager(settings, xssFilter, baseFolder).start(); - IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null).start(); + IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null, null).start(); String inputServiceName = settings.getString(Keys.tickets.service, BranchTicketService.class.getSimpleName()); if (StringUtils.isEmpty(inputServiceName)) { diff --git a/src/main/java/com/gitblit/ReindexTickets.java b/src/main/java/com/gitblit/ReindexTickets.java index 858436af..10b53e54 100644 --- a/src/main/java/com/gitblit/ReindexTickets.java +++ b/src/main/java/com/gitblit/ReindexTickets.java @@ -130,7 +130,7 @@ public class ReindexTickets { XssFilter xssFilter = new AllowXssFilter(); IRuntimeManager runtimeManager = new RuntimeManager(settings, xssFilter, baseFolder).start(); - IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null).start(); + IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null, null).start(); String serviceName = settings.getString(Keys.tickets.service, BranchTicketService.class.getSimpleName()); if (StringUtils.isEmpty(serviceName)) { diff --git a/src/main/java/com/gitblit/manager/FilestoreManager.java b/src/main/java/com/gitblit/manager/FilestoreManager.java index 11108557..6ed7c929 100644 --- a/src/main/java/com/gitblit/manager/FilestoreManager.java +++ b/src/main/java/com/gitblit/manager/FilestoreManager.java @@ -78,8 +78,6 @@ public class FilestoreManager implements IFilestoreManager { private final IRuntimeManager runtimeManager; - private final IRepositoryManager repositoryManager; - private final IStoredSettings settings; public static final int UNDEFINED_SIZE = -1; @@ -94,11 +92,8 @@ public class FilestoreManager implements IFilestoreManager { @Inject - FilestoreManager( - IRuntimeManager runtimeManager, - IRepositoryManager repositoryManager) { + public FilestoreManager(IRuntimeManager runtimeManager) { this.runtimeManager = runtimeManager; - this.repositoryManager = repositoryManager; this.settings = runtimeManager.getSettings(); } @@ -328,9 +323,8 @@ public class FilestoreManager implements IFilestoreManager { } @Override - public List getAllObjects(UserModel user) { + public List getAllObjects(List viewableRepositories) { - final List viewableRepositories = repositoryManager.getRepositoryModels(user); List viewableRepositoryNames = new ArrayList(viewableRepositories.size()); for (RepositoryModel repository : viewableRepositories) { diff --git a/src/main/java/com/gitblit/manager/GitblitManager.java b/src/main/java/com/gitblit/manager/GitblitManager.java index 85d5c19f..bda7a1cb 100644 --- a/src/main/java/com/gitblit/manager/GitblitManager.java +++ b/src/main/java/com/gitblit/manager/GitblitManager.java @@ -1274,8 +1274,8 @@ public class GitblitManager implements IGitblit { } @Override - public List getAllObjects(UserModel user) { - return filestoreManager.getAllObjects(user); + public List getAllObjects(List viewableRepositories) { + return filestoreManager.getAllObjects(viewableRepositories); } @Override diff --git a/src/main/java/com/gitblit/manager/IFilestoreManager.java b/src/main/java/com/gitblit/manager/IFilestoreManager.java index 454331a3..7c3f3c6a 100644 --- a/src/main/java/com/gitblit/manager/IFilestoreManager.java +++ b/src/main/java/com/gitblit/manager/IFilestoreManager.java @@ -37,7 +37,7 @@ public interface IFilestoreManager extends IManager { FilestoreModel.Status downloadBlob(String oid, UserModel user, RepositoryModel repo, OutputStream streamOut ); - List getAllObjects(UserModel user); + List getAllObjects(List viewableRepositories); File getStorageFolder(); diff --git a/src/main/java/com/gitblit/manager/RepositoryManager.java b/src/main/java/com/gitblit/manager/RepositoryManager.java index e2e4de68..8d1a6a71 100644 --- a/src/main/java/com/gitblit/manager/RepositoryManager.java +++ b/src/main/java/com/gitblit/manager/RepositoryManager.java @@ -123,6 +123,8 @@ public class RepositoryManager implements IRepositoryManager { private final IPluginManager pluginManager; private final IUserManager userManager; + + private final IFilestoreManager filestoreManager; private File repositoriesFolder; @@ -136,12 +138,14 @@ public class RepositoryManager implements IRepositoryManager { public RepositoryManager( IRuntimeManager runtimeManager, IPluginManager pluginManager, - IUserManager userManager) { + IUserManager userManager, + IFilestoreManager filestoreManager) { this.settings = runtimeManager.getSettings(); this.runtimeManager = runtimeManager; this.pluginManager = pluginManager; this.userManager = userManager; + this.filestoreManager = filestoreManager; } @Override @@ -1866,7 +1870,7 @@ public class RepositoryManager implements IRepositoryManager { } protected void configureLuceneIndexing() { - luceneExecutor = new LuceneService(settings, this); + luceneExecutor = new LuceneService(settings, this, filestoreManager); String frequency = settings.getString(Keys.web.luceneFrequency, "2 mins"); int mins = TimeUtils.convertFrequencyToMinutes(frequency, 2); scheduledExecutor.scheduleAtFixedRate(luceneExecutor, 1, mins, TimeUnit.MINUTES); diff --git a/src/main/java/com/gitblit/service/LuceneService.java b/src/main/java/com/gitblit/service/LuceneService.java index 097a39b2..62f7df79 100644 --- a/src/main/java/com/gitblit/service/LuceneService.java +++ b/src/main/java/com/gitblit/service/LuceneService.java @@ -19,6 +19,7 @@ import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF; import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.text.MessageFormat; @@ -66,6 +67,11 @@ import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.pdf.PDFParser; +import org.apache.tika.sax.BodyContentHandler; import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; @@ -85,8 +91,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.gitblit.Constants.SearchObjectType; +import com.gitblit.GitBlit; import com.gitblit.IStoredSettings; import com.gitblit.Keys; +import com.gitblit.manager.FilestoreManager; +import com.gitblit.manager.IFilestoreManager; import com.gitblit.manager.IRepositoryManager; import com.gitblit.models.PathModel.PathChangeModel; import com.gitblit.models.RefModel; @@ -131,6 +140,8 @@ public class LuceneService implements Runnable { private final IStoredSettings storedSettings; private final IRepositoryManager repositoryManager; + private final IFilestoreManager filestoreManager; + private final File repositoriesFolder; private final Map searchers = new ConcurrentHashMap(); @@ -141,10 +152,12 @@ public class LuceneService implements Runnable { public LuceneService( IStoredSettings settings, - IRepositoryManager repositoryManager) { + IRepositoryManager repositoryManager, + IFilestoreManager filestoreManager) { this.storedSettings = settings; this.repositoryManager = repositoryManager; + this.filestoreManager = filestoreManager; this.repositoriesFolder = repositoryManager.getRepositoriesFolder(); String exts = luceneIgnoreExtensions; if (settings != null) { @@ -540,7 +553,8 @@ public class LuceneService implements Runnable { if (!paths.containsKey(path)) { continue; } - +//TODO: Figure out filestore oid the path - bit more involved than updating the index + // remove path from set ObjectId blobId = paths.remove(path); result.blobCount++; @@ -677,9 +691,24 @@ public class LuceneService implements Runnable { } if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { + String str = ""; // read the blob content - String str = JGitUtils.getStringContent(repository, commit.getTree(), + if (path.isFilestoreItem()) { + //Get file from filestore + BodyContentHandler handler = new BodyContentHandler(); + Metadata metadata = new Metadata(); + PDFParser parser = new PDFParser(); + + ParseContext parseContext = new ParseContext(); + File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid()); + FileInputStream inputstream = new FileInputStream(lfsFile); + parser.parse(inputstream, handler, metadata, parseContext); + str = handler.toString(); + } else { + str = JGitUtils.getStringContent(repository, commit.getTree(), path.path, encodings); + } + if (str != null) { doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED)); writer.addDocument(doc); diff --git a/src/main/java/com/gitblit/utils/SyndicationUtils.java b/src/main/java/com/gitblit/utils/SyndicationUtils.java index 7afd0383..dfbca352 100644 --- a/src/main/java/com/gitblit/utils/SyndicationUtils.java +++ b/src/main/java/com/gitblit/utils/SyndicationUtils.java @@ -28,19 +28,19 @@ import com.gitblit.Constants; import com.gitblit.Constants.FeedObjectType; import com.gitblit.GitBlitException; import com.gitblit.models.FeedEntryModel; -import com.sun.syndication.feed.synd.SyndCategory; -import com.sun.syndication.feed.synd.SyndCategoryImpl; -import com.sun.syndication.feed.synd.SyndContent; -import com.sun.syndication.feed.synd.SyndContentImpl; -import com.sun.syndication.feed.synd.SyndEntry; -import com.sun.syndication.feed.synd.SyndEntryImpl; -import com.sun.syndication.feed.synd.SyndFeed; -import com.sun.syndication.feed.synd.SyndFeedImpl; -import com.sun.syndication.feed.synd.SyndImageImpl; -import com.sun.syndication.io.FeedException; -import com.sun.syndication.io.SyndFeedInput; -import com.sun.syndication.io.SyndFeedOutput; -import com.sun.syndication.io.XmlReader; +import com.rometools.rome.feed.synd.SyndCategory; +import com.rometools.rome.feed.synd.SyndCategoryImpl; +import com.rometools.rome.feed.synd.SyndContent; +import com.rometools.rome.feed.synd.SyndContentImpl; +import com.rometools.rome.feed.synd.SyndEntry; +import com.rometools.rome.feed.synd.SyndEntryImpl; +import com.rometools.rome.feed.synd.SyndFeed; +import com.rometools.rome.feed.synd.SyndFeedImpl; +import com.rometools.rome.feed.synd.SyndImageImpl; +import com.rometools.rome.io.FeedException; +import com.rometools.rome.io.SyndFeedInput; +import com.rometools.rome.io.SyndFeedOutput; +import com.rometools.rome.io.XmlReader; /** * Utility class for RSS feeds. diff --git a/src/main/java/com/gitblit/wicket/pages/FilestorePage.java b/src/main/java/com/gitblit/wicket/pages/FilestorePage.java index 7130f6c2..cf9ee251 100644 --- a/src/main/java/com/gitblit/wicket/pages/FilestorePage.java +++ b/src/main/java/com/gitblit/wicket/pages/FilestorePage.java @@ -67,7 +67,7 @@ public class FilestorePage extends RootPage { final UserModel user = (GitBlitWebSession.get().getUser() == null) ? UserModel.ANONYMOUS : GitBlitWebSession.get().getUser(); final long nBytesUsed = app().filestore().getFilestoreUsedByteCount(); final long nBytesAvailable = app().filestore().getFilestoreAvailableByteCount(); - List files = app().filestore().getAllObjects(user); + List files = app().filestore().getAllObjects(app().repositories().getRepositoryModels(user)); if (files == null) { files = new ArrayList(); diff --git a/src/test/java/com/gitblit/tests/BranchTicketServiceTest.java b/src/test/java/com/gitblit/tests/BranchTicketServiceTest.java index 0a5de196..248ee16e 100644 --- a/src/test/java/com/gitblit/tests/BranchTicketServiceTest.java +++ b/src/test/java/com/gitblit/tests/BranchTicketServiceTest.java @@ -16,6 +16,8 @@ package com.gitblit.tests; import com.gitblit.IStoredSettings; +import com.gitblit.manager.FilestoreManager; +import com.gitblit.manager.IFilestoreManager; import com.gitblit.manager.INotificationManager; import com.gitblit.manager.IPluginManager; import com.gitblit.manager.IRepositoryManager; @@ -57,7 +59,8 @@ public class BranchTicketServiceTest extends TicketServiceTest { IPluginManager pluginManager = new PluginManager(runtimeManager).start(); INotificationManager notificationManager = new NotificationManager(settings).start(); IUserManager userManager = new UserManager(runtimeManager, pluginManager).start(); - IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start(); + IRepositoryManager repositoryManager = new RepositoryManager( + runtimeManager, pluginManager, userManager, null).start(); BranchTicketService service = new BranchTicketService( runtimeManager, diff --git a/src/test/java/com/gitblit/tests/FileTicketServiceTest.java b/src/test/java/com/gitblit/tests/FileTicketServiceTest.java index 1fb2eed9..38523843 100644 --- a/src/test/java/com/gitblit/tests/FileTicketServiceTest.java +++ b/src/test/java/com/gitblit/tests/FileTicketServiceTest.java @@ -56,7 +56,8 @@ public class FileTicketServiceTest extends TicketServiceTest { IPluginManager pluginManager = new PluginManager(runtimeManager).start(); INotificationManager notificationManager = new NotificationManager(settings).start(); IUserManager userManager = new UserManager(runtimeManager, pluginManager).start(); - IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start(); + IRepositoryManager repositoryManager = new RepositoryManager( + runtimeManager, pluginManager, userManager, null).start(); FileTicketService service = new FileTicketService( runtimeManager, diff --git a/src/test/java/com/gitblit/tests/LuceneExecutorTest.java b/src/test/java/com/gitblit/tests/LuceneExecutorTest.java index a8358b99..b9ecbbbd 100644 --- a/src/test/java/com/gitblit/tests/LuceneExecutorTest.java +++ b/src/test/java/com/gitblit/tests/LuceneExecutorTest.java @@ -24,6 +24,7 @@ import org.junit.Before; import org.junit.Test; import com.gitblit.Keys; +import com.gitblit.manager.FilestoreManager; import com.gitblit.manager.RepositoryManager; import com.gitblit.manager.RuntimeManager; import com.gitblit.manager.UserManager; @@ -53,8 +54,9 @@ public class LuceneExecutorTest extends GitblitUnitTest { XssFilter xssFilter = new AllowXssFilter(); RuntimeManager runtime = new RuntimeManager(settings, xssFilter, GitBlitSuite.BASEFOLDER).start(); UserManager users = new UserManager(runtime, null).start(); - RepositoryManager repos = new RepositoryManager(runtime, null, users); - return new LuceneService(settings, repos); + RepositoryManager repos = new RepositoryManager(runtime, null, users, null); + //TODO: May need filestore + return new LuceneService(settings, repos, null); } private RepositoryModel newRepositoryModel(Repository repository) { diff --git a/src/test/java/com/gitblit/tests/RedisTicketServiceTest.java b/src/test/java/com/gitblit/tests/RedisTicketServiceTest.java index 48011ade..6bfab43d 100644 --- a/src/test/java/com/gitblit/tests/RedisTicketServiceTest.java +++ b/src/test/java/com/gitblit/tests/RedisTicketServiceTest.java @@ -64,7 +64,7 @@ public class RedisTicketServiceTest extends TicketServiceTest { IPluginManager pluginManager = new PluginManager(runtimeManager).start(); INotificationManager notificationManager = new NotificationManager(settings).start(); IUserManager userManager = new UserManager(runtimeManager, pluginManager).start(); - IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start(); + IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager, null).start(); RedisTicketService service = new RedisTicketService( runtimeManager, diff --git a/src/test/java/com/gitblit/tests/UITicketTest.java b/src/test/java/com/gitblit/tests/UITicketTest.java index 54aa1e1e..cb61b3e8 100644 --- a/src/test/java/com/gitblit/tests/UITicketTest.java +++ b/src/test/java/com/gitblit/tests/UITicketTest.java @@ -81,7 +81,7 @@ public class UITicketTest extends GitblitUnitTest { IPluginManager pluginManager = new PluginManager(runtimeManager).start(); INotificationManager notificationManager = new NotificationManager(settings).start(); IUserManager userManager = new UserManager(runtimeManager, pluginManager).start(); - IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start(); + IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager, null).start(); BranchTicketService service = new BranchTicketService( runtimeManager, -- 2.39.5