1 package org.apache.archiva.metadata.repository.stats;
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
22 import org.apache.archiva.metadata.model.ArtifactMetadata;
23 import org.apache.archiva.metadata.model.maven2.MavenArtifactFacet;
24 import org.apache.archiva.metadata.repository.MetadataRepository;
25 import org.apache.archiva.metadata.repository.MetadataRepositoryException;
26 import org.apache.archiva.metadata.repository.MetadataResolutionException;
27 import org.apache.commons.lang.time.StopWatch;
28 import org.apache.jackrabbit.commons.JcrUtils;
29 import org.slf4j.Logger;
30 import org.slf4j.LoggerFactory;
31 import org.springframework.stereotype.Service;
33 import java.text.ParseException;
34 import java.text.SimpleDateFormat;
35 import java.util.ArrayList;
36 import java.util.Collection;
37 import java.util.Collections;
38 import java.util.Date;
39 import java.util.HashMap;
40 import java.util.List;
42 import java.util.TimeZone;
43 import javax.jcr.Node;
44 import javax.jcr.RepositoryException;
45 import javax.jcr.Session;
46 import javax.jcr.query.Query;
47 import javax.jcr.query.QueryManager;
48 import javax.jcr.query.QueryResult;
49 import javax.jcr.query.Row;
54 @Service("repositoryStatisticsManager#default")
55 public class DefaultRepositoryStatisticsManager
56 implements RepositoryStatisticsManager
58 private static final Logger log = LoggerFactory.getLogger( DefaultRepositoryStatisticsManager.class );
60 private static final TimeZone UTC_TIME_ZONE = TimeZone.getTimeZone( "UTC" );
63 public boolean hasStatistics( MetadataRepository metadataRepository, String repositoryId )
64 throws MetadataRepositoryException
66 return metadataRepository.hasMetadataFacet( repositoryId, RepositoryStatistics.FACET_ID );
70 public RepositoryStatistics getLastStatistics( MetadataRepository metadataRepository, String repositoryId )
71 throws MetadataRepositoryException
73 StopWatch stopWatch = new StopWatch();
75 // TODO: consider a more efficient implementation that directly gets the last one from the content repository
76 List<String> scans = metadataRepository.getMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID );
81 Collections.sort( scans );
82 if ( !scans.isEmpty() )
84 String name = scans.get( scans.size() - 1 );
85 RepositoryStatistics repositoryStatistics =
86 RepositoryStatistics.class.cast( metadataRepository.getMetadataFacet( repositoryId, RepositoryStatistics.FACET_ID,
89 log.debug( "time to find last RepositoryStatistics: {} ms", stopWatch.getTime() );
90 return repositoryStatistics;
98 private void walkRepository( MetadataRepository metadataRepository, RepositoryStatistics stats, String repositoryId,
100 throws MetadataResolutionException
102 for ( String namespace : metadataRepository.getNamespaces( repositoryId, ns ) )
104 walkRepository( metadataRepository, stats, repositoryId, ns + "." + namespace );
107 Collection<String> projects = metadataRepository.getProjects( repositoryId, ns );
108 if ( !projects.isEmpty() )
110 stats.setTotalGroupCount( stats.getTotalGroupCount() + 1 );
111 stats.setTotalProjectCount( stats.getTotalProjectCount() + projects.size() );
113 for ( String project : projects )
115 for ( String version : metadataRepository.getProjectVersions( repositoryId, ns, project ) )
117 for ( ArtifactMetadata artifact : metadataRepository.getArtifacts( repositoryId, ns, project,
120 stats.setTotalArtifactCount( stats.getTotalArtifactCount() + 1 );
121 stats.setTotalArtifactFileSize( stats.getTotalArtifactFileSize() + artifact.getSize() );
123 MavenArtifactFacet facet =
124 (MavenArtifactFacet) artifact.getFacet( MavenArtifactFacet.FACET_ID );
127 String type = facet.getType();
128 stats.setTotalCountForType( type, stats.getTotalCountForType( type ) + 1 );
137 public void addStatisticsAfterScan( MetadataRepository metadataRepository, String repositoryId, Date startTime,
138 Date endTime, long totalFiles, long newFiles )
139 throws MetadataRepositoryException
141 RepositoryStatistics repositoryStatistics = new RepositoryStatistics();
142 repositoryStatistics.setRepositoryId( repositoryId );
143 repositoryStatistics.setScanStartTime( startTime );
144 repositoryStatistics.setScanEndTime( endTime );
145 repositoryStatistics.setTotalFileCount( totalFiles );
146 repositoryStatistics.setNewFileCount( newFiles );
149 // In the future, instead of being tied to a scan we might want to record information in the fly based on
150 // events that are occurring. Even without these totals we could query much of the information on demand based
151 // on information from the metadata content repository. In the mean time, we lock information in at scan time.
152 // Note that if new types are later discoverable due to a code change or new plugin, historical stats will not
153 // be updated and the repository will need to be rescanned.
155 long startGather = System.currentTimeMillis();
157 // FIXME what about other implementations ?
159 if ( metadataRepository.canObtainAccess( Session.class ) )
161 // TODO: this is currently very raw and susceptible to changes in content structure. Should we instead
162 // depend directly on the plugin and interrogate the JCR repository's knowledge of the structure?
163 populateStatisticsFromJcr( (Session) metadataRepository.obtainAccess( Session.class ), repositoryId,
164 repositoryStatistics );
169 // if the file repository is used more permanently, we may seek a more efficient mechanism - e.g. we could
170 // build an index, or store the aggregate information and update it on the fly. We can perhaps even walk
171 // but retrieve less information to speed it up. In the mean time, we walk the repository using the
173 populateStatisticsFromRepositoryWalk( metadataRepository, repositoryId, repositoryStatistics );
176 log.info( "Gathering statistics executed in {} ms", ( System.currentTimeMillis() - startGather ) );
178 metadataRepository.addMetadataFacet( repositoryId, repositoryStatistics );
181 private void populateStatisticsFromJcr( Session session, String repositoryId,
182 RepositoryStatistics repositoryStatistics )
183 throws MetadataRepositoryException
185 // TODO: these may be best as running totals, maintained by observations on the properties in JCR
189 QueryManager queryManager = session.getWorkspace().getQueryManager();
191 // TODO: JCR-SQL2 query will not complete on a large repo in Jackrabbit 2.2.0 - see JCR-2835
192 // Using the JCR-SQL2 variants gives
193 // "org.apache.lucene.search.BooleanQuery$TooManyClauses: maxClauseCount is set to 1024"
194 // String whereClause = "WHERE ISDESCENDANTNODE([/repositories/" + repositoryId + "/content])";
195 // Query query = queryManager.createQuery( "SELECT size FROM [archiva:artifact] " + whereClause,
197 String whereClause = "WHERE jcr:path LIKE '/repositories/" + repositoryId + "/content/%'";
198 Query query = queryManager.createQuery( "SELECT size FROM archiva:artifact " + whereClause, Query.SQL );
200 QueryResult queryResult = query.execute();
202 Map<String, Integer> totalByType = new HashMap<>();
203 long totalSize = 0, totalArtifacts = 0;
204 for ( Row row : JcrUtils.getRows( queryResult ) )
206 Node n = row.getNode();
207 totalSize += row.getValue( "size" ).getLong();
210 if ( n.hasNode( MavenArtifactFacet.FACET_ID ) )
212 Node facetNode = n.getNode( MavenArtifactFacet.FACET_ID );
213 type = facetNode.getProperty( "type" ).getString();
219 Integer prev = totalByType.get( type );
220 totalByType.put( type, prev != null ? prev + 1 : 1 );
225 repositoryStatistics.setTotalArtifactCount( totalArtifacts );
226 repositoryStatistics.setTotalArtifactFileSize( totalSize );
227 for ( Map.Entry<String, Integer> entry : totalByType.entrySet() )
229 repositoryStatistics.setTotalCountForType( entry.getKey(), entry.getValue() );
232 // The query ordering is a trick to ensure that the size is correct, otherwise due to lazy init it will be -1
233 // query = queryManager.createQuery( "SELECT * FROM [archiva:project] " + whereClause, Query.JCR_SQL2 );
234 query = queryManager.createQuery( "SELECT * FROM archiva:project " + whereClause + " ORDER BY jcr:score",
236 repositoryStatistics.setTotalProjectCount( query.execute().getRows().getSize() );
238 // query = queryManager.createQuery(
239 // "SELECT * FROM [archiva:namespace] " + whereClause + " AND namespace IS NOT NULL", Query.JCR_SQL2 );
240 query = queryManager.createQuery(
241 "SELECT * FROM archiva:namespace " + whereClause + " AND namespace IS NOT NULL ORDER BY jcr:score",
243 repositoryStatistics.setTotalGroupCount( query.execute().getRows().getSize() );
245 catch ( RepositoryException e )
247 throw new MetadataRepositoryException( e.getMessage(), e );
251 private void populateStatisticsFromRepositoryWalk( MetadataRepository metadataRepository, String repositoryId,
252 RepositoryStatistics repositoryStatistics )
253 throws MetadataRepositoryException
257 for ( String ns : metadataRepository.getRootNamespaces( repositoryId ) )
259 walkRepository( metadataRepository, repositoryStatistics, repositoryId, ns );
262 catch ( MetadataResolutionException e )
264 throw new MetadataRepositoryException( e.getMessage(), e );
269 public void deleteStatistics( MetadataRepository metadataRepository, String repositoryId )
270 throws MetadataRepositoryException
272 metadataRepository.removeMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID );
276 public List<RepositoryStatistics> getStatisticsInRange( MetadataRepository metadataRepository, String repositoryId,
277 Date startTime, Date endTime )
278 throws MetadataRepositoryException
280 List<RepositoryStatistics> results = new ArrayList<>();
281 List<String> list = metadataRepository.getMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID );
282 Collections.sort( list, Collections.reverseOrder() );
283 for ( String name : list )
287 Date date = createNameFormat().parse( name );
288 if ( ( startTime == null || !date.before( startTime ) ) && ( endTime == null || !date.after(
291 RepositoryStatistics stats =
292 (RepositoryStatistics) metadataRepository.getMetadataFacet( repositoryId,
293 RepositoryStatistics.FACET_ID,
295 results.add( stats );
298 catch ( ParseException e )
300 log.error( "Invalid scan result found in the metadata repository: " + e.getMessage() );
301 // continue and ignore this one
307 private static SimpleDateFormat createNameFormat()
309 SimpleDateFormat fmt = new SimpleDateFormat( RepositoryStatistics.SCAN_TIMESTAMP_FORMAT );
310 fmt.setTimeZone( UTC_TIME_ZONE );