]> source.dussan.org Git - gitblit.git/commitdiff
Proof of concept #1026 1026-Lucene-Index-PDF
authorPaul Martin <paul@paulsputer.com>
Sat, 16 Apr 2016 21:35:32 +0000 (22:35 +0100)
committerPaul Martin <paul@paulsputer.com>
Sat, 16 Apr 2016 21:35:32 +0000 (22:35 +0100)
+ Update tika
+ Pull in Tika parsers
+ Remove chicken and egg scenario for FilestoreManager vs
RepositoryManager

16 files changed:
build.moxie
src/main/java/com/gitblit/FederationClient.java
src/main/java/com/gitblit/MigrateTickets.java
src/main/java/com/gitblit/ReindexTickets.java
src/main/java/com/gitblit/manager/FilestoreManager.java
src/main/java/com/gitblit/manager/GitblitManager.java
src/main/java/com/gitblit/manager/IFilestoreManager.java
src/main/java/com/gitblit/manager/RepositoryManager.java
src/main/java/com/gitblit/service/LuceneService.java
src/main/java/com/gitblit/utils/SyndicationUtils.java
src/main/java/com/gitblit/wicket/pages/FilestorePage.java
src/test/java/com/gitblit/tests/BranchTicketServiceTest.java
src/test/java/com/gitblit/tests/FileTicketServiceTest.java
src/test/java/com/gitblit/tests/LuceneExecutorTest.java
src/test/java/com/gitblit/tests/RedisTicketServiceTest.java
src/test/java/com/gitblit/tests/UITicketTest.java

index 8eb2bbfd24ea05e1dcfad46b85bb683079cf0c5e..022d574d7cfcba3047d4f650549ba6c2655537b3 100644 (file)
@@ -178,7 +178,8 @@ dependencies:
 - compile 'commons-codec:commons-codec:1.7' :war
 - compile 'redis.clients:jedis:2.6.2' :war
 - compile 'ro.fortsoft.pf4j:pf4j:0.9.0' :war
-- compile 'org.apache.tika:tika-core:1.5' :war
+- compile 'org.apache.tika:tika-core:1.12' :war
+- compile 'org.apache.tika:tika-parsers:1.12' :war
 - compile 'org.jsoup:jsoup:1.7.3' :war
 - test 'junit'
 # Dependencies for Selenium web page testing
index 64ff017264c65c066836c4d09d1e9db6db1c659a..9b714dc56de72c5826cab1eb06aa1dba3dbacd83 100644 (file)
@@ -98,7 +98,7 @@ public class FederationClient {
                RuntimeManager runtime = new RuntimeManager(settings, xssFilter, baseFolder).start();
                NoopNotificationManager notifications = new NoopNotificationManager().start();
                UserManager users = new UserManager(runtime, null).start();
-               RepositoryManager repositories = new RepositoryManager(runtime, null, users).start();
+               RepositoryManager repositories = new RepositoryManager(runtime, null, users, null).start();
                FederationManager federation = new FederationManager(runtime, notifications, repositories).start();
                IGitblit gitblit = new GitblitManager(null, null, runtime, null, notifications, users, null, repositories, null, federation, null);
 
index b08228efac1b9ed588f56aa1a146761514f3dcd0..035b010e65673028ceb489268fb6a5389cd40b63 100644 (file)
@@ -138,7 +138,7 @@ public class MigrateTickets {
 
                XssFilter xssFilter = new AllowXssFilter();
                IRuntimeManager runtimeManager = new RuntimeManager(settings, xssFilter, baseFolder).start();
-               IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null).start();
+               IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null, null).start();
 
                String inputServiceName = settings.getString(Keys.tickets.service, BranchTicketService.class.getSimpleName());
                if (StringUtils.isEmpty(inputServiceName)) {
index 858436afb850ffff586c871c3dc5ba6c70516718..10b53e54d8fee67e28237dced22abb7acee18494 100644 (file)
@@ -130,7 +130,7 @@ public class ReindexTickets {
 
                XssFilter xssFilter = new AllowXssFilter();
                IRuntimeManager runtimeManager = new RuntimeManager(settings, xssFilter, baseFolder).start();
-               IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null).start();
+               IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null, null).start();
 
                String serviceName = settings.getString(Keys.tickets.service, BranchTicketService.class.getSimpleName());
                if (StringUtils.isEmpty(serviceName)) {
index 111085573869b89b43bad40108a8cbfb62653415..6ed7c92998b5616deac319a1852dd50fbf853acd 100644 (file)
@@ -78,8 +78,6 @@ public class FilestoreManager implements IFilestoreManager {
 
        private final IRuntimeManager runtimeManager;
        
-       private final IRepositoryManager repositoryManager;
-
        private final IStoredSettings settings;
 
        public static final int UNDEFINED_SIZE = -1;
@@ -94,11 +92,8 @@ public class FilestoreManager implements IFilestoreManager {
 
 
        @Inject
-       FilestoreManager(
-                       IRuntimeManager runtimeManager,
-                       IRepositoryManager repositoryManager) {
+       public FilestoreManager(IRuntimeManager runtimeManager) {
                this.runtimeManager = runtimeManager;
-               this.repositoryManager = repositoryManager;
                this.settings = runtimeManager.getSettings();
        }
 
@@ -328,9 +323,8 @@ public class FilestoreManager implements IFilestoreManager {
        }
 
        @Override
-       public List<FilestoreModel> getAllObjects(UserModel user) {
+       public List<FilestoreModel> getAllObjects(List<RepositoryModel> viewableRepositories) {
                
-               final List<RepositoryModel> viewableRepositories = repositoryManager.getRepositoryModels(user);
                List<String> viewableRepositoryNames = new ArrayList<String>(viewableRepositories.size());
                
                for (RepositoryModel repository : viewableRepositories) {
index 85d5c19faad3e4a99910635aa0d7b029a165533c..bda7a1cb360fdef63b36b37df9dc3fec0285fb32 100644 (file)
@@ -1274,8 +1274,8 @@ public class GitblitManager implements IGitblit {
        }
        
        @Override
-       public List<FilestoreModel> getAllObjects(UserModel user) {
-               return filestoreManager.getAllObjects(user);
+       public List<FilestoreModel> getAllObjects(List<RepositoryModel> viewableRepositories) {
+               return filestoreManager.getAllObjects(viewableRepositories);
        }
        
        @Override
index 454331a3b20803105ebfeaa07567e704ec81a326..7c3f3c6aad9a90144e9a0e6d457cfec8e37d346c 100644 (file)
@@ -37,7 +37,7 @@ public interface IFilestoreManager extends IManager {
        
        FilestoreModel.Status downloadBlob(String oid, UserModel user, RepositoryModel repo, OutputStream streamOut );
        
-       List<FilestoreModel> getAllObjects(UserModel user);
+       List<FilestoreModel> getAllObjects(List<RepositoryModel> viewableRepositories);
        
        File getStorageFolder();
        
index e2e4de686d99fd2d3fffb322c0b7468fcac18df1..8d1a6a71e6db3bc83d46d7f0ddf89e234fea31d5 100644 (file)
@@ -123,6 +123,8 @@ public class RepositoryManager implements IRepositoryManager {
        private final IPluginManager pluginManager;
 
        private final IUserManager userManager;
+       
+       private final IFilestoreManager filestoreManager;
 
        private File repositoriesFolder;
 
@@ -136,12 +138,14 @@ public class RepositoryManager implements IRepositoryManager {
        public RepositoryManager(
                        IRuntimeManager runtimeManager,
                        IPluginManager pluginManager,
-                       IUserManager userManager) {
+                       IUserManager userManager,
+                       IFilestoreManager filestoreManager) {
 
                this.settings = runtimeManager.getSettings();
                this.runtimeManager = runtimeManager;
                this.pluginManager = pluginManager;
                this.userManager = userManager;
+               this.filestoreManager = filestoreManager;
        }
 
        @Override
@@ -1866,7 +1870,7 @@ public class RepositoryManager implements IRepositoryManager {
        }
 
        protected void configureLuceneIndexing() {
-               luceneExecutor = new LuceneService(settings, this);
+               luceneExecutor = new LuceneService(settings, this, filestoreManager);
                String frequency = settings.getString(Keys.web.luceneFrequency, "2 mins");
                int mins = TimeUtils.convertFrequencyToMinutes(frequency, 2);
                scheduledExecutor.scheduleAtFixedRate(luceneExecutor, 1, mins,  TimeUnit.MINUTES);
index 097a39b2f3e11750a180154f6cfe2385c19a3889..62f7df79d32eb973af6a036762d25c9527fce5f9 100644 (file)
@@ -19,6 +19,7 @@ import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
 \r
 import java.io.ByteArrayOutputStream;\r
 import java.io.File;\r
+import java.io.FileInputStream;\r
 import java.io.IOException;\r
 import java.io.InputStream;\r
 import java.text.MessageFormat;\r
@@ -66,6 +67,11 @@ import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
 import org.apache.lucene.store.Directory;\r
 import org.apache.lucene.store.FSDirectory;\r
 import org.apache.lucene.util.Version;\r
+import org.apache.tika.metadata.Metadata;\r
+import org.apache.tika.parser.AutoDetectParser;\r
+import org.apache.tika.parser.ParseContext;\r
+import org.apache.tika.parser.pdf.PDFParser;\r
+import org.apache.tika.sax.BodyContentHandler;\r
 import org.eclipse.jgit.diff.DiffEntry.ChangeType;\r
 import org.eclipse.jgit.lib.Constants;\r
 import org.eclipse.jgit.lib.FileMode;\r
@@ -85,8 +91,11 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;\r
 \r
 import com.gitblit.Constants.SearchObjectType;\r
+import com.gitblit.GitBlit;\r
 import com.gitblit.IStoredSettings;\r
 import com.gitblit.Keys;\r
+import com.gitblit.manager.FilestoreManager;\r
+import com.gitblit.manager.IFilestoreManager;\r
 import com.gitblit.manager.IRepositoryManager;\r
 import com.gitblit.models.PathModel.PathChangeModel;\r
 import com.gitblit.models.RefModel;\r
@@ -131,6 +140,8 @@ public class LuceneService implements Runnable {
 \r
        private final IStoredSettings storedSettings;\r
        private final IRepositoryManager repositoryManager;\r
+       private final IFilestoreManager filestoreManager;\r
+       \r
        private final File repositoriesFolder;\r
 \r
        private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();\r
@@ -141,10 +152,12 @@ public class LuceneService implements Runnable {
 \r
        public LuceneService(\r
                        IStoredSettings settings,\r
-                       IRepositoryManager repositoryManager) {\r
+                       IRepositoryManager repositoryManager, \r
+                       IFilestoreManager filestoreManager) {\r
 \r
                this.storedSettings = settings;\r
                this.repositoryManager = repositoryManager;\r
+               this.filestoreManager = filestoreManager;\r
                this.repositoriesFolder = repositoryManager.getRepositoriesFolder();\r
                String exts = luceneIgnoreExtensions;\r
                if (settings != null) {\r
@@ -540,7 +553,8 @@ public class LuceneService implements Runnable {
                                                if (!paths.containsKey(path)) {\r
                                                        continue;\r
                                                }\r
-\r
+//TODO: Figure out filestore oid the path - bit more involved than updating the index\r
+                                               \r
                                                // remove path from set\r
                                                ObjectId blobId = paths.remove(path);\r
                                                result.blobCount++;\r
@@ -677,9 +691,24 @@ public class LuceneService implements Runnable {
                                        }\r
 \r
                                        if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {\r
+                                               String str = "";\r
                                                // read the blob content\r
-                                               String str = JGitUtils.getStringContent(repository, commit.getTree(),\r
+                                               if (path.isFilestoreItem()) {\r
+                                                       //Get file from filestore\r
+                                                       BodyContentHandler handler = new BodyContentHandler();\r
+                               Metadata metadata = new Metadata();\r
+                               PDFParser parser = new PDFParser();\r
+                               \r
+                               ParseContext parseContext = new ParseContext();\r
+                               File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid());\r
+                               FileInputStream inputstream = new FileInputStream(lfsFile);\r
+                               parser.parse(inputstream, handler, metadata, parseContext);\r
+                                                       str = handler.toString();\r
+                                               } else {\r
+                                                       str = JGitUtils.getStringContent(repository, commit.getTree(),\r
                                                                path.path, encodings);\r
+                                               }\r
+                                               \r
                                                if (str != null) {\r
                                                        doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));\r
                                                        writer.addDocument(doc);\r
index 7afd038392daf407d179604c193b41058a56c8a7..dfbca3522be2d6bff6e350a7f3c9f968de34b9b2 100644 (file)
@@ -28,19 +28,19 @@ import com.gitblit.Constants;
 import com.gitblit.Constants.FeedObjectType;\r
 import com.gitblit.GitBlitException;\r
 import com.gitblit.models.FeedEntryModel;\r
-import com.sun.syndication.feed.synd.SyndCategory;\r
-import com.sun.syndication.feed.synd.SyndCategoryImpl;\r
-import com.sun.syndication.feed.synd.SyndContent;\r
-import com.sun.syndication.feed.synd.SyndContentImpl;\r
-import com.sun.syndication.feed.synd.SyndEntry;\r
-import com.sun.syndication.feed.synd.SyndEntryImpl;\r
-import com.sun.syndication.feed.synd.SyndFeed;\r
-import com.sun.syndication.feed.synd.SyndFeedImpl;\r
-import com.sun.syndication.feed.synd.SyndImageImpl;\r
-import com.sun.syndication.io.FeedException;\r
-import com.sun.syndication.io.SyndFeedInput;\r
-import com.sun.syndication.io.SyndFeedOutput;\r
-import com.sun.syndication.io.XmlReader;\r
+import com.rometools.rome.feed.synd.SyndCategory;\r
+import com.rometools.rome.feed.synd.SyndCategoryImpl;\r
+import com.rometools.rome.feed.synd.SyndContent;\r
+import com.rometools.rome.feed.synd.SyndContentImpl;\r
+import com.rometools.rome.feed.synd.SyndEntry;\r
+import com.rometools.rome.feed.synd.SyndEntryImpl;\r
+import com.rometools.rome.feed.synd.SyndFeed;\r
+import com.rometools.rome.feed.synd.SyndFeedImpl;\r
+import com.rometools.rome.feed.synd.SyndImageImpl;\r
+import com.rometools.rome.io.FeedException;\r
+import com.rometools.rome.io.SyndFeedInput;\r
+import com.rometools.rome.io.SyndFeedOutput;\r
+import com.rometools.rome.io.XmlReader;\r
 \r
 /**\r
  * Utility class for RSS feeds.\r
index 7130f6c228688ea6d5685fa120c1c306a5c6bb76..cf9ee25148cc5630d17195876406d5def07834fe 100644 (file)
@@ -67,7 +67,7 @@ public class FilestorePage extends RootPage {
                final UserModel user = (GitBlitWebSession.get().getUser() == null) ? UserModel.ANONYMOUS : GitBlitWebSession.get().getUser();
                final long nBytesUsed = app().filestore().getFilestoreUsedByteCount();
                final long nBytesAvailable = app().filestore().getFilestoreAvailableByteCount();
-               List<FilestoreModel> files = app().filestore().getAllObjects(user);
+               List<FilestoreModel> files = app().filestore().getAllObjects(app().repositories().getRepositoryModels(user));
 
                if (files == null) {
                        files = new ArrayList<FilestoreModel>();
index 0a5de1961d79fa3cd71c726f66899b2699d6264d..248ee16e1fdd6c365505ae866dcbdb7351ce6706 100644 (file)
@@ -16,6 +16,8 @@
 package com.gitblit.tests;
 
 import com.gitblit.IStoredSettings;
+import com.gitblit.manager.FilestoreManager;
+import com.gitblit.manager.IFilestoreManager;
 import com.gitblit.manager.INotificationManager;
 import com.gitblit.manager.IPluginManager;
 import com.gitblit.manager.IRepositoryManager;
@@ -57,7 +59,8 @@ public class BranchTicketServiceTest extends TicketServiceTest {
                IPluginManager pluginManager = new PluginManager(runtimeManager).start();
                INotificationManager notificationManager = new NotificationManager(settings).start();
                IUserManager userManager = new UserManager(runtimeManager, pluginManager).start();
-               IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start();
+               IRepositoryManager repositoryManager = new RepositoryManager(
+                               runtimeManager, pluginManager, userManager, null).start();
 
                BranchTicketService service = new BranchTicketService(
                                runtimeManager,
index 1fb2eed96f705580cbe418701a7ed6ce0d88ec30..38523843bc467e036ccd6444e290805172d013a1 100644 (file)
@@ -56,7 +56,8 @@ public class FileTicketServiceTest extends TicketServiceTest {
                IPluginManager pluginManager = new PluginManager(runtimeManager).start();
                INotificationManager notificationManager = new NotificationManager(settings).start();
                IUserManager userManager = new UserManager(runtimeManager, pluginManager).start();
-               IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start();
+               IRepositoryManager repositoryManager = new RepositoryManager(
+                               runtimeManager, pluginManager, userManager, null).start();
 
                FileTicketService service = new FileTicketService(
                                runtimeManager,
index a8358b99f50a72afd5ff62f1c0f7638a3435d8e8..b9ecbbbddf5d6976b988061d4af7230f3fa459e7 100644 (file)
@@ -24,6 +24,7 @@ import org.junit.Before;
 import org.junit.Test;\r
 \r
 import com.gitblit.Keys;\r
+import com.gitblit.manager.FilestoreManager;\r
 import com.gitblit.manager.RepositoryManager;\r
 import com.gitblit.manager.RuntimeManager;\r
 import com.gitblit.manager.UserManager;\r
@@ -53,8 +54,9 @@ public class LuceneExecutorTest extends GitblitUnitTest {
                XssFilter xssFilter = new AllowXssFilter();\r
                RuntimeManager runtime = new RuntimeManager(settings, xssFilter, GitBlitSuite.BASEFOLDER).start();\r
                UserManager users = new UserManager(runtime, null).start();\r
-               RepositoryManager repos = new RepositoryManager(runtime, null, users);\r
-               return new LuceneService(settings, repos);\r
+               RepositoryManager repos = new RepositoryManager(runtime, null, users, null);\r
+               //TODO: May need filestore\r
+               return new LuceneService(settings, repos, null);\r
        }\r
 \r
        private RepositoryModel newRepositoryModel(Repository repository) {\r
index 48011ade259ad86c35c4b25ee865ebe8aa5f3a56..6bfab43d0f9b69bc187546f275b68c2cc4a8c849 100644 (file)
@@ -64,7 +64,7 @@ public class RedisTicketServiceTest extends TicketServiceTest {
                IPluginManager pluginManager = new PluginManager(runtimeManager).start();
                INotificationManager notificationManager = new NotificationManager(settings).start();
                IUserManager userManager = new UserManager(runtimeManager, pluginManager).start();
-               IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start();
+               IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager, null).start();
 
                RedisTicketService service = new RedisTicketService(
                                runtimeManager,
index 54aa1e1e7d92cd62df56d84413243104fc63f20a..cb61b3e8a2a329f72fe985c4c46665e92296ab12 100644 (file)
@@ -81,7 +81,7 @@ public class UITicketTest extends GitblitUnitTest {
                IPluginManager pluginManager = new PluginManager(runtimeManager).start();
                INotificationManager notificationManager = new NotificationManager(settings).start();
                IUserManager userManager = new UserManager(runtimeManager, pluginManager).start();
-               IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start();
+               IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager, null).start();
 
                BranchTicketService service = new BranchTicketService(
                                runtimeManager,