Browse Source

Proof of concept #1026

+ Update tika
+ Pull in Tika parsers
+ Remove chicken and egg scenario for FilestoreManager vs
RepositoryManager
1026-Lucene-Index-PDF
Paul Martin 8 years ago
parent
commit
eecaad8b8e

+ 2
- 1
build.moxie View File

@@ -178,7 +178,8 @@ dependencies:
- compile 'commons-codec:commons-codec:1.7' :war
- compile 'redis.clients:jedis:2.6.2' :war
- compile 'ro.fortsoft.pf4j:pf4j:0.9.0' :war
- compile 'org.apache.tika:tika-core:1.5' :war
- compile 'org.apache.tika:tika-core:1.12' :war
- compile 'org.apache.tika:tika-parsers:1.12' :war
- compile 'org.jsoup:jsoup:1.7.3' :war
- test 'junit'
# Dependencies for Selenium web page testing

+ 1
- 1
src/main/java/com/gitblit/FederationClient.java View File

@@ -98,7 +98,7 @@ public class FederationClient {
RuntimeManager runtime = new RuntimeManager(settings, xssFilter, baseFolder).start();
NoopNotificationManager notifications = new NoopNotificationManager().start();
UserManager users = new UserManager(runtime, null).start();
RepositoryManager repositories = new RepositoryManager(runtime, null, users).start();
RepositoryManager repositories = new RepositoryManager(runtime, null, users, null).start();
FederationManager federation = new FederationManager(runtime, notifications, repositories).start();
IGitblit gitblit = new GitblitManager(null, null, runtime, null, notifications, users, null, repositories, null, federation, null);


+ 1
- 1
src/main/java/com/gitblit/MigrateTickets.java View File

@@ -138,7 +138,7 @@ public class MigrateTickets {

XssFilter xssFilter = new AllowXssFilter();
IRuntimeManager runtimeManager = new RuntimeManager(settings, xssFilter, baseFolder).start();
IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null).start();
IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null, null).start();

String inputServiceName = settings.getString(Keys.tickets.service, BranchTicketService.class.getSimpleName());
if (StringUtils.isEmpty(inputServiceName)) {

+ 1
- 1
src/main/java/com/gitblit/ReindexTickets.java View File

@@ -130,7 +130,7 @@ public class ReindexTickets {

XssFilter xssFilter = new AllowXssFilter();
IRuntimeManager runtimeManager = new RuntimeManager(settings, xssFilter, baseFolder).start();
IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null).start();
IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, null, null, null).start();

String serviceName = settings.getString(Keys.tickets.service, BranchTicketService.class.getSimpleName());
if (StringUtils.isEmpty(serviceName)) {

+ 2
- 8
src/main/java/com/gitblit/manager/FilestoreManager.java View File

@@ -78,8 +78,6 @@ public class FilestoreManager implements IFilestoreManager {

private final IRuntimeManager runtimeManager;
private final IRepositoryManager repositoryManager;

private final IStoredSettings settings;

public static final int UNDEFINED_SIZE = -1;
@@ -94,11 +92,8 @@ public class FilestoreManager implements IFilestoreManager {


@Inject
FilestoreManager(
IRuntimeManager runtimeManager,
IRepositoryManager repositoryManager) {
public FilestoreManager(IRuntimeManager runtimeManager) {
this.runtimeManager = runtimeManager;
this.repositoryManager = repositoryManager;
this.settings = runtimeManager.getSettings();
}

@@ -328,9 +323,8 @@ public class FilestoreManager implements IFilestoreManager {
}

@Override
public List<FilestoreModel> getAllObjects(UserModel user) {
public List<FilestoreModel> getAllObjects(List<RepositoryModel> viewableRepositories) {
final List<RepositoryModel> viewableRepositories = repositoryManager.getRepositoryModels(user);
List<String> viewableRepositoryNames = new ArrayList<String>(viewableRepositories.size());
for (RepositoryModel repository : viewableRepositories) {

+ 2
- 2
src/main/java/com/gitblit/manager/GitblitManager.java View File

@@ -1274,8 +1274,8 @@ public class GitblitManager implements IGitblit {
}
@Override
public List<FilestoreModel> getAllObjects(UserModel user) {
return filestoreManager.getAllObjects(user);
public List<FilestoreModel> getAllObjects(List<RepositoryModel> viewableRepositories) {
return filestoreManager.getAllObjects(viewableRepositories);
}
@Override

+ 1
- 1
src/main/java/com/gitblit/manager/IFilestoreManager.java View File

@@ -37,7 +37,7 @@ public interface IFilestoreManager extends IManager {
FilestoreModel.Status downloadBlob(String oid, UserModel user, RepositoryModel repo, OutputStream streamOut );
List<FilestoreModel> getAllObjects(UserModel user);
List<FilestoreModel> getAllObjects(List<RepositoryModel> viewableRepositories);
File getStorageFolder();

+ 6
- 2
src/main/java/com/gitblit/manager/RepositoryManager.java View File

@@ -123,6 +123,8 @@ public class RepositoryManager implements IRepositoryManager {
private final IPluginManager pluginManager;

private final IUserManager userManager;
private final IFilestoreManager filestoreManager;

private File repositoriesFolder;

@@ -136,12 +138,14 @@ public class RepositoryManager implements IRepositoryManager {
public RepositoryManager(
IRuntimeManager runtimeManager,
IPluginManager pluginManager,
IUserManager userManager) {
IUserManager userManager,
IFilestoreManager filestoreManager) {

this.settings = runtimeManager.getSettings();
this.runtimeManager = runtimeManager;
this.pluginManager = pluginManager;
this.userManager = userManager;
this.filestoreManager = filestoreManager;
}

@Override
@@ -1866,7 +1870,7 @@ public class RepositoryManager implements IRepositoryManager {
}

protected void configureLuceneIndexing() {
luceneExecutor = new LuceneService(settings, this);
luceneExecutor = new LuceneService(settings, this, filestoreManager);
String frequency = settings.getString(Keys.web.luceneFrequency, "2 mins");
int mins = TimeUtils.convertFrequencyToMinutes(frequency, 2);
scheduledExecutor.scheduleAtFixedRate(luceneExecutor, 1, mins, TimeUnit.MINUTES);

+ 32
- 3
src/main/java/com/gitblit/service/LuceneService.java View File

@@ -19,6 +19,7 @@ import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.MessageFormat;
@@ -66,6 +67,11 @@ import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.sax.BodyContentHandler;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
@@ -85,8 +91,11 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.gitblit.Constants.SearchObjectType;
import com.gitblit.GitBlit;
import com.gitblit.IStoredSettings;
import com.gitblit.Keys;
import com.gitblit.manager.FilestoreManager;
import com.gitblit.manager.IFilestoreManager;
import com.gitblit.manager.IRepositoryManager;
import com.gitblit.models.PathModel.PathChangeModel;
import com.gitblit.models.RefModel;
@@ -131,6 +140,8 @@ public class LuceneService implements Runnable {
private final IStoredSettings storedSettings;
private final IRepositoryManager repositoryManager;
private final IFilestoreManager filestoreManager;
private final File repositoriesFolder;
private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
@@ -141,10 +152,12 @@ public class LuceneService implements Runnable {
public LuceneService(
IStoredSettings settings,
IRepositoryManager repositoryManager) {
IRepositoryManager repositoryManager,
IFilestoreManager filestoreManager) {
this.storedSettings = settings;
this.repositoryManager = repositoryManager;
this.filestoreManager = filestoreManager;
this.repositoriesFolder = repositoryManager.getRepositoriesFolder();
String exts = luceneIgnoreExtensions;
if (settings != null) {
@@ -540,7 +553,8 @@ public class LuceneService implements Runnable {
if (!paths.containsKey(path)) {
continue;
}
//TODO: Figure out filestore oid the path - bit more involved than updating the index
// remove path from set
ObjectId blobId = paths.remove(path);
result.blobCount++;
@@ -677,9 +691,24 @@ public class LuceneService implements Runnable {
}
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
String str = "";
// read the blob content
String str = JGitUtils.getStringContent(repository, commit.getTree(),
if (path.isFilestoreItem()) {
//Get file from filestore
BodyContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
PDFParser parser = new PDFParser();
ParseContext parseContext = new ParseContext();
File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid());
FileInputStream inputstream = new FileInputStream(lfsFile);
parser.parse(inputstream, handler, metadata, parseContext);
str = handler.toString();
} else {
str = JGitUtils.getStringContent(repository, commit.getTree(),
path.path, encodings);
}
if (str != null) {
doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
writer.addDocument(doc);

+ 13
- 13
src/main/java/com/gitblit/utils/SyndicationUtils.java View File

@@ -28,19 +28,19 @@ import com.gitblit.Constants;
import com.gitblit.Constants.FeedObjectType;
import com.gitblit.GitBlitException;
import com.gitblit.models.FeedEntryModel;
import com.sun.syndication.feed.synd.SyndCategory;
import com.sun.syndication.feed.synd.SyndCategoryImpl;
import com.sun.syndication.feed.synd.SyndContent;
import com.sun.syndication.feed.synd.SyndContentImpl;
import com.sun.syndication.feed.synd.SyndEntry;
import com.sun.syndication.feed.synd.SyndEntryImpl;
import com.sun.syndication.feed.synd.SyndFeed;
import com.sun.syndication.feed.synd.SyndFeedImpl;
import com.sun.syndication.feed.synd.SyndImageImpl;
import com.sun.syndication.io.FeedException;
import com.sun.syndication.io.SyndFeedInput;
import com.sun.syndication.io.SyndFeedOutput;
import com.sun.syndication.io.XmlReader;
import com.rometools.rome.feed.synd.SyndCategory;
import com.rometools.rome.feed.synd.SyndCategoryImpl;
import com.rometools.rome.feed.synd.SyndContent;
import com.rometools.rome.feed.synd.SyndContentImpl;
import com.rometools.rome.feed.synd.SyndEntry;
import com.rometools.rome.feed.synd.SyndEntryImpl;
import com.rometools.rome.feed.synd.SyndFeed;
import com.rometools.rome.feed.synd.SyndFeedImpl;
import com.rometools.rome.feed.synd.SyndImageImpl;
import com.rometools.rome.io.FeedException;
import com.rometools.rome.io.SyndFeedInput;
import com.rometools.rome.io.SyndFeedOutput;
import com.rometools.rome.io.XmlReader;
/**
* Utility class for RSS feeds.

+ 1
- 1
src/main/java/com/gitblit/wicket/pages/FilestorePage.java View File

@@ -67,7 +67,7 @@ public class FilestorePage extends RootPage {
final UserModel user = (GitBlitWebSession.get().getUser() == null) ? UserModel.ANONYMOUS : GitBlitWebSession.get().getUser();
final long nBytesUsed = app().filestore().getFilestoreUsedByteCount();
final long nBytesAvailable = app().filestore().getFilestoreAvailableByteCount();
List<FilestoreModel> files = app().filestore().getAllObjects(user);
List<FilestoreModel> files = app().filestore().getAllObjects(app().repositories().getRepositoryModels(user));

if (files == null) {
files = new ArrayList<FilestoreModel>();

+ 4
- 1
src/test/java/com/gitblit/tests/BranchTicketServiceTest.java View File

@@ -16,6 +16,8 @@
package com.gitblit.tests;

import com.gitblit.IStoredSettings;
import com.gitblit.manager.FilestoreManager;
import com.gitblit.manager.IFilestoreManager;
import com.gitblit.manager.INotificationManager;
import com.gitblit.manager.IPluginManager;
import com.gitblit.manager.IRepositoryManager;
@@ -57,7 +59,8 @@ public class BranchTicketServiceTest extends TicketServiceTest {
IPluginManager pluginManager = new PluginManager(runtimeManager).start();
INotificationManager notificationManager = new NotificationManager(settings).start();
IUserManager userManager = new UserManager(runtimeManager, pluginManager).start();
IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start();
IRepositoryManager repositoryManager = new RepositoryManager(
runtimeManager, pluginManager, userManager, null).start();

BranchTicketService service = new BranchTicketService(
runtimeManager,

+ 2
- 1
src/test/java/com/gitblit/tests/FileTicketServiceTest.java View File

@@ -56,7 +56,8 @@ public class FileTicketServiceTest extends TicketServiceTest {
IPluginManager pluginManager = new PluginManager(runtimeManager).start();
INotificationManager notificationManager = new NotificationManager(settings).start();
IUserManager userManager = new UserManager(runtimeManager, pluginManager).start();
IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start();
IRepositoryManager repositoryManager = new RepositoryManager(
runtimeManager, pluginManager, userManager, null).start();

FileTicketService service = new FileTicketService(
runtimeManager,

+ 4
- 2
src/test/java/com/gitblit/tests/LuceneExecutorTest.java View File

@@ -24,6 +24,7 @@ import org.junit.Before;
import org.junit.Test;
import com.gitblit.Keys;
import com.gitblit.manager.FilestoreManager;
import com.gitblit.manager.RepositoryManager;
import com.gitblit.manager.RuntimeManager;
import com.gitblit.manager.UserManager;
@@ -53,8 +54,9 @@ public class LuceneExecutorTest extends GitblitUnitTest {
XssFilter xssFilter = new AllowXssFilter();
RuntimeManager runtime = new RuntimeManager(settings, xssFilter, GitBlitSuite.BASEFOLDER).start();
UserManager users = new UserManager(runtime, null).start();
RepositoryManager repos = new RepositoryManager(runtime, null, users);
return new LuceneService(settings, repos);
RepositoryManager repos = new RepositoryManager(runtime, null, users, null);
//TODO: May need filestore
return new LuceneService(settings, repos, null);
}
private RepositoryModel newRepositoryModel(Repository repository) {

+ 1
- 1
src/test/java/com/gitblit/tests/RedisTicketServiceTest.java View File

@@ -64,7 +64,7 @@ public class RedisTicketServiceTest extends TicketServiceTest {
IPluginManager pluginManager = new PluginManager(runtimeManager).start();
INotificationManager notificationManager = new NotificationManager(settings).start();
IUserManager userManager = new UserManager(runtimeManager, pluginManager).start();
IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start();
IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager, null).start();

RedisTicketService service = new RedisTicketService(
runtimeManager,

+ 1
- 1
src/test/java/com/gitblit/tests/UITicketTest.java View File

@@ -81,7 +81,7 @@ public class UITicketTest extends GitblitUnitTest {
IPluginManager pluginManager = new PluginManager(runtimeManager).start();
INotificationManager notificationManager = new NotificationManager(settings).start();
IUserManager userManager = new UserManager(runtimeManager, pluginManager).start();
IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager).start();
IRepositoryManager repositoryManager = new RepositoryManager(runtimeManager, pluginManager, userManager, null).start();

BranchTicketService service = new BranchTicketService(
runtimeManager,

Loading…
Cancel
Save