1 package org.apache.archiva.reports.consumers;
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
22 import org.apache.archiva.admin.model.beans.ManagedRepository;
23 import org.apache.archiva.checksum.ChecksumAlgorithm;
24 import org.apache.archiva.checksum.ChecksummedFile;
25 import org.apache.archiva.configuration.ArchivaConfiguration;
26 import org.apache.archiva.configuration.ConfigurationNames;
27 import org.apache.archiva.configuration.FileTypes;
28 import org.apache.archiva.consumers.AbstractMonitoredConsumer;
29 import org.apache.archiva.consumers.ConsumerException;
30 import org.apache.archiva.consumers.KnownRepositoryContentConsumer;
31 import org.apache.archiva.metadata.model.ArtifactMetadata;
32 import org.apache.archiva.metadata.repository.MetadataRepository;
33 import org.apache.archiva.metadata.repository.MetadataRepositoryException;
34 import org.apache.archiva.metadata.repository.RepositorySession;
35 import org.apache.archiva.metadata.repository.RepositorySessionFactory;
36 import org.apache.archiva.metadata.repository.storage.RepositoryPathTranslator;
37 import org.apache.archiva.redback.components.registry.Registry;
38 import org.apache.archiva.redback.components.registry.RegistryListener;
39 import org.apache.archiva.metadata.model.facets.RepositoryProblemFacet;
40 import org.apache.commons.collections.CollectionUtils;
41 import org.slf4j.Logger;
42 import org.slf4j.LoggerFactory;
43 import org.springframework.context.annotation.Scope;
44 import org.springframework.stereotype.Service;
46 import javax.annotation.PostConstruct;
47 import javax.inject.Inject;
48 import javax.inject.Named;
50 import java.io.IOException;
51 import java.util.ArrayList;
52 import java.util.Collection;
53 import java.util.Collections;
54 import java.util.Date;
55 import java.util.List;
58 * Search the artifact repository of known SHA1 Checksums for potential duplicate artifacts.
60 * TODO: no need for this to be a scanner - we can just query the database / content repository to get a full list
62 @Service ( "knownRepositoryContentConsumer#duplicate-artifacts" )
63 @Scope ( "prototype" )
64 public class DuplicateArtifactsConsumer
65 extends AbstractMonitoredConsumer
66 implements KnownRepositoryContentConsumer, RegistryListener
68 private Logger log = LoggerFactory.getLogger( DuplicateArtifactsConsumer.class );
70 private String id = "duplicate-artifacts";
72 private String description = "Check for Duplicate Artifacts via SHA1 Checksums";
75 private ArchivaConfiguration configuration;
78 private FileTypes filetypes;
81 * FIXME: this could be multiple implementations and needs to be configured.
84 private RepositorySessionFactory repositorySessionFactory;
86 private List<String> includes = new ArrayList<>();
88 private File repositoryDir;
90 private String repoId;
93 * FIXME: needs to be selected based on the repository in question
96 @Named ( value = "repositoryPathTranslator#maven2" )
97 private RepositoryPathTranslator pathTranslator;
99 private RepositorySession repositorySession;
102 public String getId()
108 public String getDescription()
114 public boolean isPermanent()
120 public List<String> getIncludes()
126 public List<String> getExcludes()
128 return Collections.emptyList();
132 public void beginScan( ManagedRepository repo, Date whenGathered )
133 throws ConsumerException
135 repoId = repo.getId();
136 this.repositoryDir = new File( repo.getLocation() );
137 repositorySession = repositorySessionFactory.createSession();
141 public void beginScan( ManagedRepository repo, Date whenGathered, boolean executeOnEntireRepo )
142 throws ConsumerException
144 beginScan( repo, whenGathered );
148 public void processFile( String path )
149 throws ConsumerException
151 File artifactFile = new File( this.repositoryDir, path );
153 // TODO: would be quicker to somehow make sure it ran after the update database consumer, or as a part of that
154 // perhaps could use an artifact context that is retained for all consumers? First in can set the SHA-1
155 // alternatively this could come straight from the storage resolver, which could populate the artifact metadata
156 // in the later parse call with the desired checksum and use that
158 ChecksummedFile checksummedFile = new ChecksummedFile( artifactFile );
161 checksumSha1 = checksummedFile.calculateChecksum( ChecksumAlgorithm.SHA1 );
163 catch ( IOException e )
165 throw new ConsumerException( e.getMessage(), e );
168 MetadataRepository metadataRepository = repositorySession.getRepository();
170 Collection<ArtifactMetadata> results;
173 results = metadataRepository.getArtifactsByChecksum( repoId, checksumSha1 );
175 catch ( MetadataRepositoryException e )
177 repositorySession.close();
178 throw new ConsumerException( e.getMessage(), e );
181 if ( CollectionUtils.isNotEmpty( results ) )
183 ArtifactMetadata originalArtifact;
186 originalArtifact = pathTranslator.getArtifactForPath( repoId, path );
188 catch ( Exception e )
190 log.warn( "Not reporting problem for invalid artifact in checksum check: {}", e.getMessage() );
194 for ( ArtifactMetadata dupArtifact : results )
196 String id = path.substring( path.lastIndexOf( '/' ) + 1 );
197 if ( dupArtifact.getId().equals( id ) && dupArtifact.getNamespace().equals(
198 originalArtifact.getNamespace() ) && dupArtifact.getProject().equals(
199 originalArtifact.getProject() ) && dupArtifact.getVersion().equals(
200 originalArtifact.getVersion() ) )
202 // Skip reference to itself.
204 log.debug( "Not counting duplicate for artifact {} for path {}", dupArtifact, path );
209 RepositoryProblemFacet problem = new RepositoryProblemFacet();
210 problem.setRepositoryId( repoId );
211 problem.setNamespace( originalArtifact.getNamespace() );
212 problem.setProject( originalArtifact.getProject() );
213 problem.setVersion( originalArtifact.getVersion() );
215 // FIXME: need to get the right storage resolver for the repository the dupe artifact is in, it might be
217 // FIXME: we need the project version here, not the artifact version
218 problem.setMessage( "Duplicate Artifact Detected: " + path + " <--> " + pathTranslator.toPath(
219 dupArtifact.getNamespace(), dupArtifact.getProject(), dupArtifact.getVersion(),
220 dupArtifact.getId() ) );
221 problem.setProblem( "duplicate-artifact" );
225 metadataRepository.addMetadataFacet( repoId, problem );
227 catch ( MetadataRepositoryException e )
229 throw new ConsumerException( e.getMessage(), e );
236 public void processFile( String path, boolean executeOnEntireRepo )
237 throws ConsumerException
243 public void completeScan()
245 repositorySession.close();
249 public void completeScan( boolean executeOnEntireRepo )
255 public void afterConfigurationChange( Registry registry, String propertyName, Object propertyValue )
257 if ( ConfigurationNames.isRepositoryScanning( propertyName ) )
264 public void beforeConfigurationChange( Registry registry, String propertyName, Object propertyValue )
269 private void initIncludes()
273 includes.addAll( filetypes.getFileTypePatterns( FileTypes.ARTIFACTS ) );
277 public void initialize()
280 configuration.addChangeListener( this );