]> source.dussan.org Git - archiva.git/commitdiff
[MRM-136] make the browse interface perform acceptably on large repositories
authorBrett Porter <brett@apache.org>
Mon, 4 Sep 2006 07:31:52 +0000 (07:31 +0000)
committerBrett Porter <brett@apache.org>
Mon, 4 Sep 2006 07:31:52 +0000 (07:31 +0000)
git-svn-id: https://svn.apache.org/repos/asf/maven/archiva/trunk@439966 13f79535-47bb-0310-9956-ffa450edef68

archiva-core/src/main/java/org/apache/maven/archiva/scheduler/task/IndexerTask.java
archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/RepositoryArtifactIndex.java
archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/LuceneRepositoryArtifactIndex.java
archiva-webapp/src/main/java/org/apache/maven/archiva/web/action/BrowseAction.java
archiva-webapp/src/main/webapp/WEB-INF/jsp/browseGroup.jsp

index 4ddf3a9b093515cd78acb0a7032f08db280f65bd..269bc3bf773b0017d34443cce342303a44cb11ef 100644 (file)
@@ -45,7 +45,7 @@ import java.util.Map;
  * Task for discovering changes in the repository.
  *
  * @author <a href="mailto:brett@apache.org">Brett Porter</a>
- * @plexus.component role=org.apache.maven.archiva.scheduler.task.RepositoryTaskk" role-hint="indexer"
+ * @plexus.component role="org.apache.maven.archiva.scheduler.task.RepositoryTask" role-hint="indexer"
  */
 public class IndexerTask
     extends AbstractLogEnabled
index b70f3ecb0c3769d875cefb2ba92518f844f71d57..05a3ab0a7e8b4860b5e17218e1522c37012698a0 100644 (file)
@@ -81,7 +81,7 @@ public interface RepositoryArtifactIndex
      * Retrieve all primary keys of records in the index.
      *
      * @return the keys
-     * @throws RepositoryIndexSearchException if there was an error searching the index
+     * @throws RepositoryIndexException if there was an error searching the index
      */
     Collection getAllRecordKeys()
         throws RepositoryIndexException;
@@ -97,4 +97,42 @@ public interface RepositoryArtifactIndex
      */
     void indexArtifacts( List artifacts, RepositoryIndexRecordFactory factory )
         throws RepositoryIndexException;
+
+    /**
+     * Get all the group IDs in the index.
+     *
+     * @return list of groups as strings
+     * @throws RepositoryIndexException if there is a problem searching for the group ID
+     */
+    List getAllGroupIds()
+        throws RepositoryIndexException;
+
+    /**
+     * Get the list of artifact IDs in a group in the index.
+     *
+     * @param groupId the group ID to search
+     * @return the list of artifact ID strings
+     * @throws RepositoryIndexSearchException if there is a problem searching for the group ID
+     */
+    List getArtifactIds( String groupId )
+        throws RepositoryIndexSearchException;
+
+    /**
+     * Get the list of available versions for a given artifact.
+     *
+     * @param groupId    the group ID to search for
+     * @param artifactId the artifact ID to search for
+     * @return the list of version strings
+     * @throws RepositoryIndexSearchException if there is a problem searching for the artifact
+     */
+    List getVersions( String groupId, String artifactId )
+        throws RepositoryIndexSearchException;
+
+    /**
+     * Get the time when the index was last updated. Note that this does not monitor external processes or multiple
+     * instances of the index.
+     *
+     * @return the last updated time, or 0 if it has not been updated since the class was instantiated.
+     */
+    long getLastUpdatedTime();
 }
index dd6e81764b34de0e2d72822298c0fde56e6fcedc..e456ac5fa88e6fc949da313c7ae084d5ee802d05 100644 (file)
@@ -27,9 +27,12 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Hits;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.TermQuery;
 import org.apache.maven.archiva.indexer.RepositoryArtifactIndex;
 import org.apache.maven.archiva.indexer.RepositoryIndexException;
 import org.apache.maven.archiva.indexer.RepositoryIndexSearchException;
@@ -47,8 +50,8 @@ import java.io.Reader;
 import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.HashSet;
 import java.util.Iterator;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -77,6 +80,8 @@ public class LuceneRepositoryArtifactIndex
 
     private MavenProjectBuilder projectBuilder;
 
+    private long lastUpdatedTime = 0;
+
     public LuceneRepositoryArtifactIndex( File indexPath, LuceneIndexRecordConverter converter )
     {
         this.indexLocation = indexPath;
@@ -137,6 +142,7 @@ public class LuceneRepositoryArtifactIndex
         finally
         {
             closeQuietly( indexWriter );
+            lastUpdatedTime = System.currentTimeMillis();
         }
     }
 
@@ -276,7 +282,13 @@ public class LuceneRepositoryArtifactIndex
     public Collection getAllRecordKeys()
         throws RepositoryIndexException
     {
-        Set keys = new HashSet();
+        return getAllFieldValues( FLD_PK );
+    }
+
+    private List getAllFieldValues( String fieldName )
+        throws RepositoryIndexException
+    {
+        List keys = new ArrayList();
 
         if ( exists() )
         {
@@ -286,8 +298,8 @@ public class LuceneRepositoryArtifactIndex
             {
                 indexReader = IndexReader.open( indexLocation );
 
-                terms = indexReader.terms( new Term( FLD_PK, "" ) );
-                while ( FLD_PK.equals( terms.term().field() ) )
+                terms = indexReader.terms( new Term( fieldName, "" ) );
+                while ( fieldName.equals( terms.term().field() ) )
                 {
                     keys.add( terms.term().text() );
 
@@ -353,7 +365,74 @@ public class LuceneRepositoryArtifactIndex
         finally
         {
             closeQuietly( indexModifier );
+            lastUpdatedTime = System.currentTimeMillis();
+        }
+    }
+
+    public List getAllGroupIds()
+        throws RepositoryIndexException
+    {
+        return getAllFieldValues( StandardIndexRecordFields.GROUPID_EXACT );
+    }
+
+    public List getArtifactIds( String groupId )
+        throws RepositoryIndexSearchException
+    {
+        return searchField( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
+                            StandardIndexRecordFields.ARTIFACTID );
+    }
+
+    public List getVersions( String groupId, String artifactId )
+        throws RepositoryIndexSearchException
+    {
+        BooleanQuery query = new BooleanQuery();
+        query.add( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
+                   BooleanClause.Occur.MUST );
+        query.add( new TermQuery( new Term( StandardIndexRecordFields.ARTIFACTID_EXACT, artifactId ) ),
+                   BooleanClause.Occur.MUST );
+
+        return searchField( query, StandardIndexRecordFields.VERSION );
+    }
+
+    public long getLastUpdatedTime()
+    {
+        return lastUpdatedTime;
+    }
+
+    private List searchField( org.apache.lucene.search.Query luceneQuery, String fieldName )
+        throws RepositoryIndexSearchException
+    {
+        Set results = new LinkedHashSet();
+
+        IndexSearcher searcher;
+        try
+        {
+            searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
+        }
+        catch ( IOException e )
+        {
+            throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
+        }
+
+        try
+        {
+            Hits hits = searcher.search( luceneQuery );
+            for ( int i = 0; i < hits.length(); i++ )
+            {
+                Document doc = hits.doc( i );
+
+                results.add( doc.get( fieldName ) );
+            }
+        }
+        catch ( IOException e )
+        {
+            throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
+        }
+        finally
+        {
+            closeQuietly( searcher );
         }
+        return new ArrayList( results );
     }
 
     private void flushProjectBuilderCacheHack()
index b02ada5fe57138a4e6d28ee6772c5c3837d9f806..189407c6d4aefdbf31e89d66c85bd46779849b6d 100644 (file)
@@ -16,12 +16,6 @@ package org.apache.maven.archiva.web.action;
  * limitations under the License.
  */
 
-import com.opensymphony.xwork.ActionSupport;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.TermQuery;
 import org.apache.maven.archiva.configuration.Configuration;
 import org.apache.maven.archiva.configuration.ConfigurationStore;
 import org.apache.maven.archiva.configuration.ConfigurationStoreException;
@@ -30,32 +24,27 @@ import org.apache.maven.archiva.indexer.RepositoryArtifactIndex;
 import org.apache.maven.archiva.indexer.RepositoryArtifactIndexFactory;
 import org.apache.maven.archiva.indexer.RepositoryIndexException;
 import org.apache.maven.archiva.indexer.RepositoryIndexSearchException;
-import org.apache.maven.archiva.indexer.lucene.LuceneQuery;
-import org.apache.maven.archiva.indexer.record.StandardArtifactIndexRecord;
-import org.apache.maven.archiva.indexer.record.StandardIndexRecordFields;
 import org.codehaus.plexus.util.StringUtils;
+import org.codehaus.plexus.xwork.action.PlexusActionSupport;
 
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.StringTokenizer;
 import java.util.TreeMap;
-import java.util.TreeSet;
 
 /**
  * Browse the repository.
  *
- * @todo the tree part probably belongs in a browsing component, and the indexer could optimize how it retrieves the terms rather than querying everything!
+ * @todo cache should be a proper cache class that is a singleton requirement rather than static variables
  * @plexus.component role="com.opensymphony.xwork.Action" role-hint="browseAction"
  */
 public class BrowseAction
-    extends ActionSupport
+    extends PlexusActionSupport
 {
     /**
      * @plexus.requirement
@@ -84,8 +73,12 @@ public class BrowseAction
 
     private List versions;
 
+    private static GroupTreeNode rootNode;
+
+    private static long groupCacheTime;
+
     public String browse()
-        throws ConfigurationStoreException, RepositoryIndexException, IOException, RepositoryIndexSearchException
+        throws ConfigurationStoreException, RepositoryIndexException, IOException
     {
         RepositoryArtifactIndex index = getIndex();
 
@@ -130,6 +123,8 @@ public class BrowseAction
             if ( !rootNode.getChildren().containsKey( part ) )
             {
                 // TODO: i18n
+                getLogger().debug(
+                    "Can't find part: " + part + " for groupId " + groupId + " in children " + rootNode.getChildren() );
                 addActionError( "The group specified was not found" );
                 return ERROR;
             }
@@ -141,16 +136,7 @@ public class BrowseAction
 
         this.groups = collateGroups( rootNode );
 
-        List records = index.search(
-            new LuceneQuery( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ) ) );
-
-        Set artifactIds = new HashSet();
-        for ( Iterator i = records.iterator(); i.hasNext(); )
-        {
-            StandardArtifactIndexRecord record = (StandardArtifactIndexRecord) i.next();
-            artifactIds.add( record.getArtifactId() );
-        }
-        this.artifactIds = new ArrayList( artifactIds );
+        this.artifactIds = index.getArtifactIds( groupId );
         Collections.sort( this.artifactIds );
 
         return SUCCESS;
@@ -175,77 +161,66 @@ public class BrowseAction
             return ERROR;
         }
 
-        BooleanQuery query = new BooleanQuery();
-        query.add( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
-                   BooleanClause.Occur.MUST );
-        query.add( new TermQuery( new Term( StandardIndexRecordFields.ARTIFACTID_EXACT, artifactId ) ),
-                   BooleanClause.Occur.MUST );
-
-        List records = index.search( new LuceneQuery( query ) );
+        this.versions = index.getVersions( groupId, artifactId );
+        Collections.sort( this.versions );
 
-        if ( records.isEmpty() )
+        if ( versions.isEmpty() )
         {
             // TODO: i18n
             addActionError( "Could not find any artifacts with the given group and artifact ID" );
             return ERROR;
         }
 
-        Set versions = new HashSet();
-        for ( Iterator i = records.iterator(); i.hasNext(); )
-        {
-            StandardArtifactIndexRecord record = (StandardArtifactIndexRecord) i.next();
-            versions.add( record.getVersion() );
-        }
-
-        this.versions = new ArrayList( versions );
-        Collections.sort( this.versions );
-
         return SUCCESS;
     }
 
     private GroupTreeNode buildGroupTree( RepositoryArtifactIndex index )
-        throws IOException, RepositoryIndexSearchException
+        throws IOException, RepositoryIndexException
     {
         // TODO: give action message if indexing is in progress
 
-        // TODO: this will be inefficient over a very large number of artifacts, should be cached!
-
-        List records = index.search( new LuceneQuery( new MatchAllDocsQuery() ) );
+        long lastUpdate = index.getLastUpdatedTime();
 
-        Set groups = new TreeSet();
-        for ( Iterator i = records.iterator(); i.hasNext(); )
+        if ( rootNode == null || lastUpdate > groupCacheTime )
         {
-            StandardArtifactIndexRecord record = (StandardArtifactIndexRecord) i.next();
-            groups.add( record.getGroupId() );
-        }
+            List groups = index.getAllGroupIds();
 
-        GroupTreeNode rootNode = new GroupTreeNode();
+            getLogger().info( "Loaded " + groups.size() + " groups from index" );
 
-        // build a tree structure
-        for ( Iterator i = groups.iterator(); i.hasNext(); )
-        {
-            String groupId = (String) i.next();
+            rootNode = new GroupTreeNode();
 
-            StringTokenizer tok = new StringTokenizer( groupId, GROUP_SEPARATOR );
+            // build a tree structure
+            for ( Iterator i = groups.iterator(); i.hasNext(); )
+            {
+                String groupId = (String) i.next();
 
-            GroupTreeNode node = rootNode;
+                StringTokenizer tok = new StringTokenizer( groupId, GROUP_SEPARATOR );
 
-            while ( tok.hasMoreTokens() )
-            {
-                String part = tok.nextToken();
+                GroupTreeNode node = rootNode;
 
-                if ( !node.getChildren().containsKey( part ) )
+                while ( tok.hasMoreTokens() )
                 {
-                    GroupTreeNode newNode = new GroupTreeNode( part, node );
-                    node.addChild( newNode );
-                    node = newNode;
-                }
-                else
-                {
-                    node = (GroupTreeNode) node.getChildren().get( part );
+                    String part = tok.nextToken();
+
+                    if ( !node.getChildren().containsKey( part ) )
+                    {
+                        GroupTreeNode newNode = new GroupTreeNode( part, node );
+                        node.addChild( newNode );
+                        node = newNode;
+                    }
+                    else
+                    {
+                        node = (GroupTreeNode) node.getChildren().get( part );
+                    }
                 }
             }
+            groupCacheTime = lastUpdate;
         }
+        else
+        {
+            getLogger().debug( "Loaded groups from cache" );
+        }
+
         return rootNode;
     }
 
index c195fbf133456a4b8de71a62a7d41a2872b0e111..a7e77eba2e8bb5738e7a37200884d53467282737 100644 (file)
@@ -58,7 +58,7 @@
 
     <ww:set name="groups" value="groups"/>
     <c:if test="${!empty(groups)}">
-      <h2>Group / Artifact</h2>
+      <h2>Groups</h2>
       <ul>
         <c:forEach items="${groups}" var="groupId">
           <c:set var="url">