From: Brett Porter Date: Mon, 4 Sep 2006 07:31:52 +0000 (+0000) Subject: [MRM-136] make the browse interface perform acceptably on large repositories X-Git-Tag: archiva-0.9-alpha-1~617 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=1fa628623b26bf63e2427f3bb048b23aea4e73dd;p=archiva.git [MRM-136] make the browse interface perform acceptably on large repositories git-svn-id: https://svn.apache.org/repos/asf/maven/archiva/trunk@439966 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/archiva-core/src/main/java/org/apache/maven/archiva/scheduler/task/IndexerTask.java b/archiva-core/src/main/java/org/apache/maven/archiva/scheduler/task/IndexerTask.java index 4ddf3a9b0..269bc3bf7 100644 --- a/archiva-core/src/main/java/org/apache/maven/archiva/scheduler/task/IndexerTask.java +++ b/archiva-core/src/main/java/org/apache/maven/archiva/scheduler/task/IndexerTask.java @@ -45,7 +45,7 @@ import java.util.Map; * Task for discovering changes in the repository. * * @author Brett Porter - * @plexus.component role=org.apache.maven.archiva.scheduler.task.RepositoryTaskk" role-hint="indexer" + * @plexus.component role="org.apache.maven.archiva.scheduler.task.RepositoryTask" role-hint="indexer" */ public class IndexerTask extends AbstractLogEnabled diff --git a/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/RepositoryArtifactIndex.java b/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/RepositoryArtifactIndex.java index b70f3ecb0..05a3ab0a7 100644 --- a/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/RepositoryArtifactIndex.java +++ b/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/RepositoryArtifactIndex.java @@ -81,7 +81,7 @@ public interface RepositoryArtifactIndex * Retrieve all primary keys of records in the index. * * @return the keys - * @throws RepositoryIndexSearchException if there was an error searching the index + * @throws RepositoryIndexException if there was an error searching the index */ Collection getAllRecordKeys() throws RepositoryIndexException; @@ -97,4 +97,42 @@ public interface RepositoryArtifactIndex */ void indexArtifacts( List artifacts, RepositoryIndexRecordFactory factory ) throws RepositoryIndexException; + + /** + * Get all the group IDs in the index. + * + * @return list of groups as strings + * @throws RepositoryIndexException if there is a problem searching for the group ID + */ + List getAllGroupIds() + throws RepositoryIndexException; + + /** + * Get the list of artifact IDs in a group in the index. + * + * @param groupId the group ID to search + * @return the list of artifact ID strings + * @throws RepositoryIndexSearchException if there is a problem searching for the group ID + */ + List getArtifactIds( String groupId ) + throws RepositoryIndexSearchException; + + /** + * Get the list of available versions for a given artifact. + * + * @param groupId the group ID to search for + * @param artifactId the artifact ID to search for + * @return the list of version strings + * @throws RepositoryIndexSearchException if there is a problem searching for the artifact + */ + List getVersions( String groupId, String artifactId ) + throws RepositoryIndexSearchException; + + /** + * Get the time when the index was last updated. Note that this does not monitor external processes or multiple + * instances of the index. + * + * @return the last updated time, or 0 if it has not been updated since the class was instantiated. + */ + long getLastUpdatedTime(); } diff --git a/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/LuceneRepositoryArtifactIndex.java b/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/LuceneRepositoryArtifactIndex.java index dd6e81764..e456ac5fa 100644 --- a/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/LuceneRepositoryArtifactIndex.java +++ b/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/LuceneRepositoryArtifactIndex.java @@ -27,9 +27,12 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.TermQuery; import org.apache.maven.archiva.indexer.RepositoryArtifactIndex; import org.apache.maven.archiva.indexer.RepositoryIndexException; import org.apache.maven.archiva.indexer.RepositoryIndexSearchException; @@ -47,8 +50,8 @@ import java.io.Reader; import java.text.ParseException; import java.util.ArrayList; import java.util.Collection; -import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -77,6 +80,8 @@ public class LuceneRepositoryArtifactIndex private MavenProjectBuilder projectBuilder; + private long lastUpdatedTime = 0; + public LuceneRepositoryArtifactIndex( File indexPath, LuceneIndexRecordConverter converter ) { this.indexLocation = indexPath; @@ -137,6 +142,7 @@ public class LuceneRepositoryArtifactIndex finally { closeQuietly( indexWriter ); + lastUpdatedTime = System.currentTimeMillis(); } } @@ -276,7 +282,13 @@ public class LuceneRepositoryArtifactIndex public Collection getAllRecordKeys() throws RepositoryIndexException { - Set keys = new HashSet(); + return getAllFieldValues( FLD_PK ); + } + + private List getAllFieldValues( String fieldName ) + throws RepositoryIndexException + { + List keys = new ArrayList(); if ( exists() ) { @@ -286,8 +298,8 @@ public class LuceneRepositoryArtifactIndex { indexReader = IndexReader.open( indexLocation ); - terms = indexReader.terms( new Term( FLD_PK, "" ) ); - while ( FLD_PK.equals( terms.term().field() ) ) + terms = indexReader.terms( new Term( fieldName, "" ) ); + while ( fieldName.equals( terms.term().field() ) ) { keys.add( terms.term().text() ); @@ -353,7 +365,74 @@ public class LuceneRepositoryArtifactIndex finally { closeQuietly( indexModifier ); + lastUpdatedTime = System.currentTimeMillis(); + } + } + + public List getAllGroupIds() + throws RepositoryIndexException + { + return getAllFieldValues( StandardIndexRecordFields.GROUPID_EXACT ); + } + + public List getArtifactIds( String groupId ) + throws RepositoryIndexSearchException + { + return searchField( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ), + StandardIndexRecordFields.ARTIFACTID ); + } + + public List getVersions( String groupId, String artifactId ) + throws RepositoryIndexSearchException + { + BooleanQuery query = new BooleanQuery(); + query.add( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ), + BooleanClause.Occur.MUST ); + query.add( new TermQuery( new Term( StandardIndexRecordFields.ARTIFACTID_EXACT, artifactId ) ), + BooleanClause.Occur.MUST ); + + return searchField( query, StandardIndexRecordFields.VERSION ); + } + + public long getLastUpdatedTime() + { + return lastUpdatedTime; + } + + private List searchField( org.apache.lucene.search.Query luceneQuery, String fieldName ) + throws RepositoryIndexSearchException + { + Set results = new LinkedHashSet(); + + IndexSearcher searcher; + try + { + searcher = new IndexSearcher( indexLocation.getAbsolutePath() ); + } + catch ( IOException e ) + { + throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e ); + } + + try + { + Hits hits = searcher.search( luceneQuery ); + for ( int i = 0; i < hits.length(); i++ ) + { + Document doc = hits.doc( i ); + + results.add( doc.get( fieldName ) ); + } + } + catch ( IOException e ) + { + throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e ); + } + finally + { + closeQuietly( searcher ); } + return new ArrayList( results ); } private void flushProjectBuilderCacheHack() diff --git a/archiva-webapp/src/main/java/org/apache/maven/archiva/web/action/BrowseAction.java b/archiva-webapp/src/main/java/org/apache/maven/archiva/web/action/BrowseAction.java index b02ada5fe..189407c6d 100644 --- a/archiva-webapp/src/main/java/org/apache/maven/archiva/web/action/BrowseAction.java +++ b/archiva-webapp/src/main/java/org/apache/maven/archiva/web/action/BrowseAction.java @@ -16,12 +16,6 @@ package org.apache.maven.archiva.web.action; * limitations under the License. */ -import com.opensymphony.xwork.ActionSupport; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.TermQuery; import org.apache.maven.archiva.configuration.Configuration; import org.apache.maven.archiva.configuration.ConfigurationStore; import org.apache.maven.archiva.configuration.ConfigurationStoreException; @@ -30,32 +24,27 @@ import org.apache.maven.archiva.indexer.RepositoryArtifactIndex; import org.apache.maven.archiva.indexer.RepositoryArtifactIndexFactory; import org.apache.maven.archiva.indexer.RepositoryIndexException; import org.apache.maven.archiva.indexer.RepositoryIndexSearchException; -import org.apache.maven.archiva.indexer.lucene.LuceneQuery; -import org.apache.maven.archiva.indexer.record.StandardArtifactIndexRecord; -import org.apache.maven.archiva.indexer.record.StandardIndexRecordFields; import org.codehaus.plexus.util.StringUtils; +import org.codehaus.plexus.xwork.action.PlexusActionSupport; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.StringTokenizer; import java.util.TreeMap; -import java.util.TreeSet; /** * Browse the repository. * - * @todo the tree part probably belongs in a browsing component, and the indexer could optimize how it retrieves the terms rather than querying everything! + * @todo cache should be a proper cache class that is a singleton requirement rather than static variables * @plexus.component role="com.opensymphony.xwork.Action" role-hint="browseAction" */ public class BrowseAction - extends ActionSupport + extends PlexusActionSupport { /** * @plexus.requirement @@ -84,8 +73,12 @@ public class BrowseAction private List versions; + private static GroupTreeNode rootNode; + + private static long groupCacheTime; + public String browse() - throws ConfigurationStoreException, RepositoryIndexException, IOException, RepositoryIndexSearchException + throws ConfigurationStoreException, RepositoryIndexException, IOException { RepositoryArtifactIndex index = getIndex(); @@ -130,6 +123,8 @@ public class BrowseAction if ( !rootNode.getChildren().containsKey( part ) ) { // TODO: i18n + getLogger().debug( + "Can't find part: " + part + " for groupId " + groupId + " in children " + rootNode.getChildren() ); addActionError( "The group specified was not found" ); return ERROR; } @@ -141,16 +136,7 @@ public class BrowseAction this.groups = collateGroups( rootNode ); - List records = index.search( - new LuceneQuery( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ) ) ); - - Set artifactIds = new HashSet(); - for ( Iterator i = records.iterator(); i.hasNext(); ) - { - StandardArtifactIndexRecord record = (StandardArtifactIndexRecord) i.next(); - artifactIds.add( record.getArtifactId() ); - } - this.artifactIds = new ArrayList( artifactIds ); + this.artifactIds = index.getArtifactIds( groupId ); Collections.sort( this.artifactIds ); return SUCCESS; @@ -175,77 +161,66 @@ public class BrowseAction return ERROR; } - BooleanQuery query = new BooleanQuery(); - query.add( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ), - BooleanClause.Occur.MUST ); - query.add( new TermQuery( new Term( StandardIndexRecordFields.ARTIFACTID_EXACT, artifactId ) ), - BooleanClause.Occur.MUST ); - - List records = index.search( new LuceneQuery( query ) ); + this.versions = index.getVersions( groupId, artifactId ); + Collections.sort( this.versions ); - if ( records.isEmpty() ) + if ( versions.isEmpty() ) { // TODO: i18n addActionError( "Could not find any artifacts with the given group and artifact ID" ); return ERROR; } - Set versions = new HashSet(); - for ( Iterator i = records.iterator(); i.hasNext(); ) - { - StandardArtifactIndexRecord record = (StandardArtifactIndexRecord) i.next(); - versions.add( record.getVersion() ); - } - - this.versions = new ArrayList( versions ); - Collections.sort( this.versions ); - return SUCCESS; } private GroupTreeNode buildGroupTree( RepositoryArtifactIndex index ) - throws IOException, RepositoryIndexSearchException + throws IOException, RepositoryIndexException { // TODO: give action message if indexing is in progress - // TODO: this will be inefficient over a very large number of artifacts, should be cached! - - List records = index.search( new LuceneQuery( new MatchAllDocsQuery() ) ); + long lastUpdate = index.getLastUpdatedTime(); - Set groups = new TreeSet(); - for ( Iterator i = records.iterator(); i.hasNext(); ) + if ( rootNode == null || lastUpdate > groupCacheTime ) { - StandardArtifactIndexRecord record = (StandardArtifactIndexRecord) i.next(); - groups.add( record.getGroupId() ); - } + List groups = index.getAllGroupIds(); - GroupTreeNode rootNode = new GroupTreeNode(); + getLogger().info( "Loaded " + groups.size() + " groups from index" ); - // build a tree structure - for ( Iterator i = groups.iterator(); i.hasNext(); ) - { - String groupId = (String) i.next(); + rootNode = new GroupTreeNode(); - StringTokenizer tok = new StringTokenizer( groupId, GROUP_SEPARATOR ); + // build a tree structure + for ( Iterator i = groups.iterator(); i.hasNext(); ) + { + String groupId = (String) i.next(); - GroupTreeNode node = rootNode; + StringTokenizer tok = new StringTokenizer( groupId, GROUP_SEPARATOR ); - while ( tok.hasMoreTokens() ) - { - String part = tok.nextToken(); + GroupTreeNode node = rootNode; - if ( !node.getChildren().containsKey( part ) ) + while ( tok.hasMoreTokens() ) { - GroupTreeNode newNode = new GroupTreeNode( part, node ); - node.addChild( newNode ); - node = newNode; - } - else - { - node = (GroupTreeNode) node.getChildren().get( part ); + String part = tok.nextToken(); + + if ( !node.getChildren().containsKey( part ) ) + { + GroupTreeNode newNode = new GroupTreeNode( part, node ); + node.addChild( newNode ); + node = newNode; + } + else + { + node = (GroupTreeNode) node.getChildren().get( part ); + } } } + groupCacheTime = lastUpdate; } + else + { + getLogger().debug( "Loaded groups from cache" ); + } + return rootNode; } diff --git a/archiva-webapp/src/main/webapp/WEB-INF/jsp/browseGroup.jsp b/archiva-webapp/src/main/webapp/WEB-INF/jsp/browseGroup.jsp index c195fbf13..a7e77eba2 100644 --- a/archiva-webapp/src/main/webapp/WEB-INF/jsp/browseGroup.jsp +++ b/archiva-webapp/src/main/webapp/WEB-INF/jsp/browseGroup.jsp @@ -58,7 +58,7 @@ -

Group / Artifact

+

Groups