diff options
author | Brett Porter <brett@apache.org> | 2010-12-30 08:44:10 +0000 |
---|---|---|
committer | Brett Porter <brett@apache.org> | 2010-12-30 08:44:10 +0000 |
commit | 479f10cb7fc099f0faa0c197e5426bfd3777b5e9 (patch) | |
tree | 5a13dd66315e0a6d273c2219e6ec525cb8af3772 /archiva-modules/plugins/repository-statistics | |
parent | ed717a4b5bf19c91539191115d5e81918ca62995 (diff) | |
download | archiva-479f10cb7fc099f0faa0c197e5426bfd3777b5e9.tar.gz archiva-479f10cb7fc099f0faa0c197e5426bfd3777b5e9.zip |
[MRM-1443, MRM-1327] allow repository-statistics module to optimise statistics generation if JCR store is available
git-svn-id: https://svn.apache.org/repos/asf/archiva/trunk@1053820 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'archiva-modules/plugins/repository-statistics')
6 files changed, 325 insertions, 16 deletions
diff --git a/archiva-modules/plugins/repository-statistics/pom.xml b/archiva-modules/plugins/repository-statistics/pom.xml index 58945a5f9..67ae48d31 100644 --- a/archiva-modules/plugins/repository-statistics/pom.xml +++ b/archiva-modules/plugins/repository-statistics/pom.xml @@ -47,6 +47,14 @@ <artifactId>slf4j-api</artifactId> </dependency> <dependency> + <groupId>javax.jcr</groupId> + <artifactId>jcr</artifactId> + </dependency> + <dependency> + <groupId>org.apache.jackrabbit</groupId> + <artifactId>jackrabbit-jcr-commons</artifactId> + </dependency> + <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-simple</artifactId> <scope>test</scope> @@ -56,5 +64,15 @@ <artifactId>jcl-over-slf4j</artifactId> <scope>test</scope> </dependency> + <dependency> + <groupId>org.apache.jackrabbit</groupId> + <artifactId>jackrabbit-core</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-all</artifactId> + <scope>test</scope> + </dependency> </dependencies> </project> diff --git a/archiva-modules/plugins/repository-statistics/src/main/java/org/apache/archiva/metadata/repository/stats/DefaultRepositoryStatisticsManager.java b/archiva-modules/plugins/repository-statistics/src/main/java/org/apache/archiva/metadata/repository/stats/DefaultRepositoryStatisticsManager.java index 4189a9a2f..2f5f72175 100644 --- a/archiva-modules/plugins/repository-statistics/src/main/java/org/apache/archiva/metadata/repository/stats/DefaultRepositoryStatisticsManager.java +++ b/archiva-modules/plugins/repository-statistics/src/main/java/org/apache/archiva/metadata/repository/stats/DefaultRepositoryStatisticsManager.java @@ -24,6 +24,7 @@ import org.apache.archiva.metadata.repository.MetadataRepository; import org.apache.archiva.metadata.repository.MetadataRepositoryException; import org.apache.archiva.metadata.repository.MetadataResolutionException; import org.apache.archiva.metadata.repository.storage.maven2.MavenArtifactFacet; +import org.apache.jackrabbit.commons.JcrUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,8 +34,17 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.TimeZone; +import javax.jcr.Node; +import javax.jcr.RepositoryException; +import javax.jcr.Session; +import javax.jcr.query.Query; +import javax.jcr.query.QueryManager; +import javax.jcr.query.QueryResult; +import javax.jcr.query.Row; /** * @plexus.component role="org.apache.archiva.metadata.repository.stats.RepositoryStatisticsManager" role-hint="default" @@ -112,16 +122,100 @@ public class DefaultRepositoryStatisticsManager repositoryStatistics.setTotalFileCount( totalFiles ); repositoryStatistics.setNewFileCount( newFiles ); + // TODO // In the future, instead of being tied to a scan we might want to record information in the fly based on // events that are occurring. Even without these totals we could query much of the information on demand based // on information from the metadata content repository. In the mean time, we lock information in at scan time. // Note that if new types are later discoverable due to a code change or new plugin, historical stats will not // be updated and the repository will need to be rescanned. - long startWalk = System.currentTimeMillis(); - // TODO: we can probably get a more efficient implementation directly from the metadata repository, but for now - // we just walk it. Alternatively, we could build an index, or store the aggregate information and update - // it on the fly + long startGather = System.currentTimeMillis(); + + if ( metadataRepository.canObtainAccess( Session.class ) ) + { + // TODO: this is currently very raw and susceptible to changes in content structure. Should we instead + // depend directly on the plugin and interrogate the JCR repository's knowledge of the structure? + populateStatisticsFromJcr( (Session) metadataRepository.obtainAccess( Session.class ), repositoryId, + repositoryStatistics ); + } + else + { + // TODO: + // if the file repository is used more permanently, we may seek a more efficient mechanism - e.g. we could + // build an index, or store the aggregate information and update it on the fly. We can perhaps even walk + // but retrieve less information to speed it up. In the mean time, we walk the repository using the + // standard APIs + populateStatisticsFromRepositoryWalk( metadataRepository, repositoryId, repositoryStatistics ); + } + + log.info( "Gathering statistics executed in " + ( System.currentTimeMillis() - startGather ) + "ms" ); + + metadataRepository.addMetadataFacet( repositoryId, repositoryStatistics ); + } + + private void populateStatisticsFromJcr( Session session, String repositoryId, + RepositoryStatistics repositoryStatistics ) + throws MetadataRepositoryException + { + // TODO: these may be best as running totals, maintained by observations on the properties in JCR + + try + { + QueryManager queryManager = session.getWorkspace().getQueryManager(); + Query query = queryManager.createQuery( "SELECT size FROM [archiva:artifact]", Query.JCR_SQL2 ); + + QueryResult queryResult = query.execute(); + + Map<String, Integer> totalByType = new HashMap<String, Integer>(); + long totalSize = 0, totalArtifacts = 0; + for ( Row row : JcrUtils.getRows( queryResult ) ) + { + Node n = row.getNode(); + if ( n.getPath().startsWith( "/repositories/" + repositoryId + "/content/" ) ) + { + totalSize += row.getValue( "size" ).getLong(); + + String type; + if ( n.hasNode( MavenArtifactFacet.FACET_ID ) ) + { + Node facetNode = n.getNode( MavenArtifactFacet.FACET_ID ); + type = facetNode.getProperty( "type" ).getString(); + } + else + { + type = "Other"; + } + Integer prev = totalByType.get( type ); + totalByType.put( type, prev != null ? prev + 1 : 1 ); + + totalArtifacts++; + } + } + + repositoryStatistics.setTotalArtifactCount( totalArtifacts ); + repositoryStatistics.setTotalArtifactFileSize( totalSize ); + for ( Map.Entry<String, Integer> entry : totalByType.entrySet() ) + { + repositoryStatistics.setTotalCountForType( entry.getKey(), entry.getValue() ); + } + + query = queryManager.createQuery( "SELECT * FROM [archiva:project]", Query.JCR_SQL2 ); + repositoryStatistics.setTotalProjectCount( query.execute().getRows().getSize() ); + + query = queryManager.createQuery( "SELECT * FROM [archiva:namespace] WHERE namespace IS NOT NULL", + Query.JCR_SQL2 ); + repositoryStatistics.setTotalGroupCount( query.execute().getRows().getSize() ); + } + catch ( RepositoryException e ) + { + throw new MetadataRepositoryException( e.getMessage(), e ); + } + } + + private void populateStatisticsFromRepositoryWalk( MetadataRepository metadataRepository, String repositoryId, + RepositoryStatistics repositoryStatistics ) + throws MetadataRepositoryException + { try { for ( String ns : metadataRepository.getRootNamespaces( repositoryId ) ) @@ -133,9 +227,6 @@ public class DefaultRepositoryStatisticsManager { throw new MetadataRepositoryException( e.getMessage(), e ); } - log.info( "Repository walk for statistics executed in " + ( System.currentTimeMillis() - startWalk ) + "ms" ); - - metadataRepository.addMetadataFacet( repositoryId, repositoryStatistics ); } public void deleteStatistics( MetadataRepository metadataRepository, String repositoryId ) diff --git a/archiva-modules/plugins/repository-statistics/src/test/java/org/apache/archiva/metadata/repository/stats/JcrRepositoryStatisticsGatheringTest.java b/archiva-modules/plugins/repository-statistics/src/test/java/org/apache/archiva/metadata/repository/stats/JcrRepositoryStatisticsGatheringTest.java new file mode 100644 index 000000000..ece16db48 --- /dev/null +++ b/archiva-modules/plugins/repository-statistics/src/test/java/org/apache/archiva/metadata/repository/stats/JcrRepositoryStatisticsGatheringTest.java @@ -0,0 +1,150 @@ +package org.apache.archiva.metadata.repository.stats; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.apache.archiva.metadata.repository.MetadataRepository; +import org.apache.commons.io.FileUtils; +import org.apache.jackrabbit.commons.JcrUtils; +import org.apache.jackrabbit.core.TransientRepository; +import org.codehaus.plexus.spring.PlexusInSpringTestCase; + +import java.io.File; +import java.util.Calendar; +import java.util.Date; +import java.util.zip.GZIPInputStream; +import javax.jcr.ImportUUIDBehavior; +import javax.jcr.NamespaceRegistry; +import javax.jcr.Node; +import javax.jcr.RepositoryException; +import javax.jcr.Session; +import javax.jcr.SimpleCredentials; +import javax.jcr.Workspace; +import javax.jcr.nodetype.NodeTypeManager; +import javax.jcr.nodetype.NodeTypeTemplate; + +import static org.mockito.Mockito.*; + +public class JcrRepositoryStatisticsGatheringTest + extends PlexusInSpringTestCase +{ + private static final int TOTAL_FILE_COUNT = 1000; + + private static final int NEW_FILE_COUNT = 500; + + private static final String TEST_REPO = "test-repo"; + + private RepositoryStatisticsManager repositoryStatisticsManager; + + private MetadataRepository metadataRepository; + + private Session session; + + @Override + protected void setUp() + throws Exception + { + super.setUp(); + + File confFile = getTestFile( "src/test/repository.xml" ); + File dir = getTestFile( "target/jcr" ); + FileUtils.deleteDirectory( dir ); + + TransientRepository repository = new TransientRepository( confFile, dir ); + session = repository.login( new SimpleCredentials( "username", "password".toCharArray() ) ); + + // TODO: perhaps have an archiva-jcr-utils module shared by these plugins that does this and can contain + // structure information + Workspace workspace = session.getWorkspace(); + NamespaceRegistry registry = workspace.getNamespaceRegistry(); + registry.registerNamespace( "archiva", "http://archiva.apache.org/jcr/" ); + + NodeTypeManager nodeTypeManager = workspace.getNodeTypeManager(); + registerMixinNodeType( nodeTypeManager, "archiva:namespace" ); + registerMixinNodeType( nodeTypeManager, "archiva:project" ); + registerMixinNodeType( nodeTypeManager, "archiva:projectVersion" ); + registerMixinNodeType( nodeTypeManager, "archiva:artifact" ); + registerMixinNodeType( nodeTypeManager, "archiva:facet" ); + + metadataRepository = mock( MetadataRepository.class ); + when( metadataRepository.canObtainAccess( Session.class ) ).thenReturn( true ); + when( metadataRepository.obtainAccess( Session.class ) ).thenReturn( session ); + + repositoryStatisticsManager = new DefaultRepositoryStatisticsManager(); + } + + private static void registerMixinNodeType( NodeTypeManager nodeTypeManager, String type ) + throws RepositoryException + { + NodeTypeTemplate nodeType = nodeTypeManager.createNodeTypeTemplate(); + nodeType.setMixin( true ); + nodeType.setName( type ); + nodeTypeManager.registerNodeType( nodeType, false ); + } + + @Override + protected void tearDown() + throws Exception + { + session.logout(); + + super.tearDown(); + } + + public void testJcrStatisticsQuery() + throws Exception + { + Calendar cal = Calendar.getInstance(); + Date endTime = cal.getTime(); + cal.add( Calendar.HOUR, -1 ); + Date startTime = cal.getTime(); + + Node n = JcrUtils.getOrAddNode( session.getRootNode(), "repositories" ); + n = JcrUtils.getOrAddNode( n, TEST_REPO ); + n = JcrUtils.getOrAddNode( n, "content" ); + n = JcrUtils.getOrAddNode( n, "org" ); + n = JcrUtils.getOrAddNode( n, "apache" ); + + GZIPInputStream inputStream = new GZIPInputStream( getClass().getResourceAsStream( "/artifacts.xml.gz" ) ); + session.importXML( n.getPath(), inputStream, ImportUUIDBehavior.IMPORT_UUID_CREATE_NEW ); + session.save(); + + repositoryStatisticsManager.addStatisticsAfterScan( metadataRepository, TEST_REPO, startTime, endTime, + TOTAL_FILE_COUNT, NEW_FILE_COUNT ); + + RepositoryStatistics expectedStatistics = new RepositoryStatistics(); + expectedStatistics.setNewFileCount( NEW_FILE_COUNT ); + expectedStatistics.setTotalFileCount( TOTAL_FILE_COUNT ); + expectedStatistics.setScanEndTime( endTime ); + expectedStatistics.setScanStartTime( startTime ); + expectedStatistics.setTotalArtifactFileSize( 95954585 ); + expectedStatistics.setTotalArtifactCount( 269 ); + expectedStatistics.setTotalGroupCount( 1 ); + expectedStatistics.setTotalProjectCount( 43 ); + expectedStatistics.setTotalCountForType( "zip", 1 ); + expectedStatistics.setTotalCountForType( "gz", 1 ); // FIXME: should be tar.gz + expectedStatistics.setTotalCountForType( "java-source", 10 ); + expectedStatistics.setTotalCountForType( "jar", 108 ); + expectedStatistics.setTotalCountForType( "xml", 3 ); + expectedStatistics.setTotalCountForType( "war", 2 ); + expectedStatistics.setTotalCountForType( "pom", 144 ); + + verify( metadataRepository ).addMetadataFacet( TEST_REPO, expectedStatistics ); + } +} diff --git a/archiva-modules/plugins/repository-statistics/src/test/java/org/apache/archiva/metadata/repository/stats/RepositoryStatisticsManagerTest.java b/archiva-modules/plugins/repository-statistics/src/test/java/org/apache/archiva/metadata/repository/stats/RepositoryStatisticsManagerTest.java index 70ef1703b..b7e4fae0d 100644 --- a/archiva-modules/plugins/repository-statistics/src/test/java/org/apache/archiva/metadata/repository/stats/RepositoryStatisticsManagerTest.java +++ b/archiva-modules/plugins/repository-statistics/src/test/java/org/apache/archiva/metadata/repository/stats/RepositoryStatisticsManagerTest.java @@ -34,6 +34,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.TimeZone; +import javax.jcr.Session; public class RepositoryStatisticsManagerTest extends TestCase @@ -128,7 +129,6 @@ public class RepositoryStatisticsManagerTest public void testAddNewStats() throws Exception - { Date current = new Date(); Date startTime = new Date( current.getTime() - 12345 ); @@ -144,6 +144,7 @@ public class RepositoryStatisticsManagerTest metadataRepositoryControl.expectAndReturn( metadataRepository.getMetadataFacet( TEST_REPO_ID, RepositoryStatistics.FACET_ID, stats.getName() ), stats ); + metadataRepositoryControl.expectAndReturn( metadataRepository.canObtainAccess( Session.class ), false ); metadataRepositoryControl.replay(); @@ -166,7 +167,6 @@ public class RepositoryStatisticsManagerTest public void testDeleteStats() throws Exception - { walkRepository( 2 ); @@ -192,6 +192,7 @@ public class RepositoryStatisticsManagerTest metadataRepositoryControl.expectAndReturn( metadataRepository.getMetadataFacets( TEST_REPO_ID, RepositoryStatistics.FACET_ID ), Collections.emptyList() ); + metadataRepositoryControl.expectAndReturn( metadataRepository.canObtainAccess( Session.class ), false, 2 ); metadataRepositoryControl.replay(); @@ -211,7 +212,6 @@ public class RepositoryStatisticsManagerTest public void testDeleteStatsWhenEmpty() throws Exception - { metadataRepositoryControl.expectAndReturn( metadataRepository.getMetadataFacets( TEST_REPO_ID, RepositoryStatistics.FACET_ID ), @@ -231,7 +231,6 @@ public class RepositoryStatisticsManagerTest public void testGetStatsRangeInside() throws Exception - { walkRepository( 3 ); @@ -252,6 +251,7 @@ public class RepositoryStatisticsManagerTest RepositoryStatistics.FACET_ID, key ), statsCreated.get( key ) ); + metadataRepositoryControl.expectAndReturn( metadataRepository.canObtainAccess( Session.class ), false, 3 ); metadataRepositoryControl.replay(); @@ -274,7 +274,6 @@ public class RepositoryStatisticsManagerTest public void testGetStatsRangeUpperOutside() throws Exception - { walkRepository( 3 ); @@ -299,6 +298,7 @@ public class RepositoryStatisticsManagerTest RepositoryStatistics.FACET_ID, key ), statsCreated.get( key ) ); + metadataRepositoryControl.expectAndReturn( metadataRepository.canObtainAccess( Session.class ), false, 3 ); metadataRepositoryControl.replay(); @@ -322,7 +322,6 @@ public class RepositoryStatisticsManagerTest public void testGetStatsRangeLowerOutside() throws Exception - { walkRepository( 3 ); @@ -347,6 +346,7 @@ public class RepositoryStatisticsManagerTest RepositoryStatistics.FACET_ID, key ), statsCreated.get( key ) ); + metadataRepositoryControl.expectAndReturn( metadataRepository.canObtainAccess( Session.class ), false, 3 ); metadataRepositoryControl.replay(); @@ -370,7 +370,6 @@ public class RepositoryStatisticsManagerTest public void testGetStatsRangeLowerAndUpperOutside() throws Exception - { walkRepository( 3 ); @@ -400,6 +399,7 @@ public class RepositoryStatisticsManagerTest RepositoryStatistics.FACET_ID, key ), statsCreated.get( key ) ); + metadataRepositoryControl.expectAndReturn( metadataRepository.canObtainAccess( Session.class ), false, 3 ); metadataRepositoryControl.replay(); @@ -424,7 +424,6 @@ public class RepositoryStatisticsManagerTest public void testGetStatsRangeNotInside() throws Exception - { walkRepository( 3 ); @@ -438,6 +437,7 @@ public class RepositoryStatisticsManagerTest metadataRepositoryControl.expectAndReturn( metadataRepository.getMetadataFacets( TEST_REPO_ID, RepositoryStatistics.FACET_ID ), keys ); + metadataRepositoryControl.expectAndReturn( metadataRepository.canObtainAccess( Session.class ), false, 3 ); metadataRepositoryControl.replay(); @@ -459,7 +459,6 @@ public class RepositoryStatisticsManagerTest private void addStats( Date startTime, Date endTime ) throws Exception - { RepositoryStatistics stats = createTestStats( startTime, endTime ); metadataRepository.addMetadataFacet( TEST_REPO_ID, stats ); diff --git a/archiva-modules/plugins/repository-statistics/src/test/repository.xml b/archiva-modules/plugins/repository-statistics/src/test/repository.xml new file mode 100644 index 000000000..625570e92 --- /dev/null +++ b/archiva-modules/plugins/repository-statistics/src/test/repository.xml @@ -0,0 +1,51 @@ +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + --> + +<!DOCTYPE Repository PUBLIC "-//The Apache Software Foundation//DTD Jackrabbit 1.6//EN" + "http://jackrabbit.apache.org/dtd/repository-1.6.dtd"> + +<Repository> + <FileSystem class="org.apache.jackrabbit.core.fs.mem.MemoryFileSystem"/> + <!--<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem"> + <param name="path" value="${rep.home}/repository"/> +</FileSystem> --> + <Security appName="Jackrabbit"> + <SecurityManager class="org.apache.jackrabbit.core.security.simple.SimpleSecurityManager" workspaceName="security"/> + <AccessManager class="org.apache.jackrabbit.core.security.simple.SimpleAccessManager"/> + <LoginModule class="org.apache.jackrabbit.core.security.simple.SimpleLoginModule"/> + </Security> + <Workspaces rootPath="${rep.home}/workspaces" defaultWorkspace="default"/> + <Workspace name="${wsp.name}"> + <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem"> + <param name="path" value="${wsp.home}"/> + </FileSystem> + <PersistenceManager class="org.apache.jackrabbit.core.persistence.bundle.BundleFsPersistenceManager"/> + <SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex"> + <param name="path" value="${wsp.home}/index"/> + </SearchIndex> + </Workspace> + <Versioning rootPath="${rep.home}/version"> + <FileSystem class="org.apache.jackrabbit.core.fs.mem.MemoryFileSystem"/> + <PersistenceManager class="org.apache.jackrabbit.core.persistence.mem.InMemPersistenceManager"/> + <!--<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem"> + <param name="path" value="${rep.home}/version"/> + </FileSystem> + <PersistenceManager class="org.apache.jackrabbit.core.persistence.bundle.BundleFsPersistenceManager"/>--> + </Versioning> +</Repository> diff --git a/archiva-modules/plugins/repository-statistics/src/test/resources/artifacts.xml.gz b/archiva-modules/plugins/repository-statistics/src/test/resources/artifacts.xml.gz Binary files differnew file mode 100644 index 000000000..0d8a41e3b --- /dev/null +++ b/archiva-modules/plugins/repository-statistics/src/test/resources/artifacts.xml.gz |