1 package org.apache.archiva.reports.consumers;
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
22 import org.apache.archiva.checksum.ChecksumAlgorithm;
23 import org.apache.archiva.checksum.ChecksummedFile;
24 import org.apache.archiva.metadata.model.ArtifactMetadata;
25 import org.apache.archiva.metadata.repository.MetadataRepository;
26 import org.apache.archiva.metadata.repository.MetadataRepositoryException;
27 import org.apache.archiva.metadata.repository.storage.RepositoryPathTranslator;
28 import org.apache.archiva.reports.RepositoryProblemFacet;
29 import org.apache.commons.collections.CollectionUtils;
30 import org.apache.maven.archiva.configuration.ArchivaConfiguration;
31 import org.apache.maven.archiva.configuration.ConfigurationNames;
32 import org.apache.maven.archiva.configuration.FileTypes;
33 import org.apache.maven.archiva.configuration.ManagedRepositoryConfiguration;
34 import org.apache.maven.archiva.consumers.AbstractMonitoredConsumer;
35 import org.apache.maven.archiva.consumers.ConsumerException;
36 import org.apache.maven.archiva.consumers.KnownRepositoryContentConsumer;
37 import org.codehaus.plexus.personality.plexus.lifecycle.phase.Initializable;
38 import org.codehaus.plexus.personality.plexus.lifecycle.phase.InitializationException;
39 import org.codehaus.plexus.registry.Registry;
40 import org.codehaus.plexus.registry.RegistryListener;
41 import org.slf4j.Logger;
42 import org.slf4j.LoggerFactory;
45 import java.io.IOException;
46 import java.util.ArrayList;
47 import java.util.Collections;
48 import java.util.Date;
49 import java.util.List;
52 * Search the database of known SHA1 Checksums for potential duplicate artifacts.
54 * TODO: no need for this to be a scanner - we can just query the database / content repository to get a full list
57 * @plexus.component role="org.apache.maven.archiva.consumers.KnownRepositoryContentConsumer"
58 * role-hint="duplicate-artifacts"
59 * instantiation-strategy="per-lookup"
61 public class DuplicateArtifactsConsumer
62 extends AbstractMonitoredConsumer
63 implements KnownRepositoryContentConsumer, RegistryListener, Initializable
65 private Logger log = LoggerFactory.getLogger( DuplicateArtifactsConsumer.class );
68 * @plexus.configuration default-value="duplicate-artifacts"
73 * @plexus.configuration default-value="Check for Duplicate Artifacts via SHA1 Checksums"
75 private String description;
80 private ArchivaConfiguration configuration;
85 private FileTypes filetypes;
87 private List<String> includes = new ArrayList<String>();
89 private File repositoryDir;
91 private String repoId;
96 private MetadataRepository metadataRepository;
99 * FIXME: needs to be selected based on the repository in question
101 * @plexus.requirement role-hint="maven2"
103 private RepositoryPathTranslator pathTranslator;
105 public String getId()
110 public String getDescription()
115 public boolean isPermanent()
120 public List<String> getIncludes()
125 public List<String> getExcludes()
127 return Collections.emptyList();
130 public void beginScan( ManagedRepositoryConfiguration repo, Date whenGathered )
131 throws ConsumerException
133 repoId = repo.getId();
134 this.repositoryDir = new File( repo.getLocation() );
137 public void beginScan( ManagedRepositoryConfiguration repo, Date whenGathered, boolean executeOnEntireRepo )
138 throws ConsumerException
140 beginScan( repo, whenGathered );
143 public void processFile( String path )
144 throws ConsumerException
146 File artifactFile = new File( this.repositoryDir, path );
148 // TODO: would be quicker to somehow make sure it ran after the update database consumer, or as a part of that
149 // perhaps could use an artifact context that is retained for all consumers? First in can set the SHA-1
150 // alternatively this could come straight from the storage resolver, which could populate the artifact metadata
151 // in the later parse call with the desired checksum and use that
153 ChecksummedFile checksummedFile = new ChecksummedFile( artifactFile );
156 checksumSha1 = checksummedFile.calculateChecksum( ChecksumAlgorithm.SHA1 );
158 catch ( IOException e )
160 throw new ConsumerException( e.getMessage(), e );
163 List<ArtifactMetadata> results;
166 results = metadataRepository.getArtifactsByChecksum( repoId, checksumSha1 );
168 catch ( MetadataRepositoryException e )
170 throw new ConsumerException( e.getMessage(), e );
173 if ( CollectionUtils.isNotEmpty( results ) )
175 ArtifactMetadata originalArtifact;
178 originalArtifact = pathTranslator.getArtifactForPath( repoId, path );
180 catch ( Exception e )
182 log.warn( "Not reporting problem for invalid artifact in checksum check: " + e.getMessage() );
186 for ( ArtifactMetadata dupArtifact : results )
188 String id = path.substring( path.lastIndexOf( "/" ) + 1 );
189 if ( dupArtifact.getId().equals( id ) && dupArtifact.getNamespace().equals(
190 originalArtifact.getNamespace() ) && dupArtifact.getProject().equals(
191 originalArtifact.getProject() ) && dupArtifact.getVersion().equals(
192 originalArtifact.getVersion() ) )
194 // Skip reference to itself.
195 if ( log.isDebugEnabled() )
197 log.debug( "Not counting duplicate for artifact " + dupArtifact + " for path " + path );
202 RepositoryProblemFacet problem = new RepositoryProblemFacet();
203 problem.setRepositoryId( repoId );
204 problem.setNamespace( originalArtifact.getNamespace() );
205 problem.setProject( originalArtifact.getProject() );
206 problem.setVersion( originalArtifact.getVersion() );
208 // TODO: need to get the right storage resolver for the repository the dupe artifact is in, it might be
210 // TODO: we need the project version here, not the artifact version
211 problem.setMessage( "Duplicate Artifact Detected: " + path + " <--> " + pathTranslator.toPath(
212 dupArtifact.getNamespace(), dupArtifact.getProject(), dupArtifact.getVersion(),
213 dupArtifact.getId() ) );
214 problem.setProblem( "duplicate-artifact" );
218 metadataRepository.addMetadataFacet( repoId, problem );
220 catch ( MetadataRepositoryException e )
222 throw new ConsumerException( e.getMessage(), e );
228 public void processFile( String path, boolean executeOnEntireRepo )
229 throws ConsumerException
234 public void completeScan()
239 public void completeScan( boolean executeOnEntireRepo )
244 public void afterConfigurationChange( Registry registry, String propertyName, Object propertyValue )
246 if ( ConfigurationNames.isRepositoryScanning( propertyName ) )
252 public void beforeConfigurationChange( Registry registry, String propertyName, Object propertyValue )
257 private void initIncludes()
261 includes.addAll( filetypes.getFileTypePatterns( FileTypes.ARTIFACTS ) );
264 public void initialize()
265 throws InitializationException
268 configuration.addChangeListener( this );