From 9148a900ee09096aa24a674687912574e6cc16f3 Mon Sep 17 00:00:00 2001 From: Martin Stockhammer Date: Fri, 8 Sep 2017 15:24:01 +0200 Subject: [PATCH] Migrating repository scanner to java.nio --- .../scanner/DefaultRepositoryScanner.java | 56 +++--- .../scanner/RepositoryContentConsumers.java | 12 +- .../scanner/RepositoryScannerInstance.java | 172 ++++++++++++------ .../RepositoryContentConsumersTest.java | 15 +- .../scanner/RepositoryScannerTest.java | 32 ++-- ...ArchivaRepositoryScanningTaskExecutor.java | 2 +- 6 files changed, 175 insertions(+), 114 deletions(-) diff --git a/archiva-modules/archiva-base/archiva-repository-scanner/src/main/java/org/apache/archiva/repository/scanner/DefaultRepositoryScanner.java b/archiva-modules/archiva-base/archiva-repository-scanner/src/main/java/org/apache/archiva/repository/scanner/DefaultRepositoryScanner.java index 6257880dd..5daae0745 100644 --- a/archiva-modules/archiva-base/archiva-repository-scanner/src/main/java/org/apache/archiva/repository/scanner/DefaultRepositoryScanner.java +++ b/archiva-modules/archiva-base/archiva-repository-scanner/src/main/java/org/apache/archiva/repository/scanner/DefaultRepositoryScanner.java @@ -21,22 +21,22 @@ package org.apache.archiva.repository.scanner; import org.apache.archiva.admin.model.RepositoryAdminException; import org.apache.archiva.admin.model.beans.ManagedRepository; -import org.apache.archiva.configuration.ArchivaConfiguration; import org.apache.archiva.configuration.FileTypes; import org.apache.archiva.consumers.InvalidRepositoryContentConsumer; import org.apache.archiva.consumers.KnownRepositoryContentConsumer; import org.apache.archiva.consumers.RepositoryContentConsumer; import org.apache.commons.collections.CollectionUtils; -import org.codehaus.plexus.util.DirectoryWalker; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import javax.inject.Inject; -import java.io.File; -import java.util.ArrayList; -import java.util.Collections; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Set; +import java.io.IOException; +import java.nio.file.FileVisitOption; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.*; /** * DefaultRepositoryScanner @@ -47,6 +47,9 @@ import java.util.Set; public class DefaultRepositoryScanner implements RepositoryScanner { + + private static final Logger log = LoggerFactory.getLogger(DefaultRepositoryScanner.class); + @Inject private FileTypes filetypes; @@ -89,20 +92,23 @@ public class DefaultRepositoryScanner throw new IllegalArgumentException( "Unable to operate on a null repository." ); } - File repositoryBase = new File( repository.getLocation() ); + Path repositoryBase = Paths.get( repository.getLocation() ); //MRM-1342 Repository statistics report doesn't appear to be working correctly //create the repo if not existing to have an empty stats - if ( !repositoryBase.exists() && !repositoryBase.mkdirs() ) + if ( !Files.exists(repositoryBase)) { - throw new UnsupportedOperationException( - "Unable to scan a repository, directory " + repositoryBase.getPath() + " does not exist." ); + try { + Files.createDirectories(repositoryBase); + } catch (IOException e) { + throw new UnsupportedOperationException("Unable to scan a repository, directory " + repositoryBase + " does not exist." ); + } } - if ( !repositoryBase.isDirectory() ) + if ( !Files.isDirectory(repositoryBase) ) { throw new UnsupportedOperationException( - "Unable to scan a repository, path " + repositoryBase.getPath() + " is not a directory." ); + "Unable to scan a repository, path " + repositoryBase+ " is not a directory." ); } // Setup Includes / Excludes. @@ -118,34 +124,26 @@ public class DefaultRepositoryScanner // Scan All Content. (intentional) allIncludes.add( "**/*" ); - // Setup Directory Walker - DirectoryWalker dirWalker = new DirectoryWalker(); - - dirWalker.setBaseDir( repositoryBase ); - - dirWalker.setIncludes( allIncludes ); - dirWalker.setExcludes( allExcludes ); - // Setup the Scan Instance RepositoryScannerInstance scannerInstance = new RepositoryScannerInstance( repository, knownContentConsumers, invalidContentConsumers, changesSince ); + scannerInstance.setFileNameIncludePattern(allIncludes); + scannerInstance.setFileNameExcludePattern(allExcludes); inProgressScans.add( scannerInstance ); - RepositoryScanStatistics stats; + RepositoryScanStatistics stats = null; try { - dirWalker.addDirectoryWalkListener( scannerInstance ); - - // Execute scan. - dirWalker.scan(); + Files.walkFileTree(repositoryBase, EnumSet.of(FileVisitOption.FOLLOW_LINKS), Integer.MAX_VALUE, scannerInstance); stats = scannerInstance.getStatistics(); stats.setKnownConsumers( gatherIds( knownContentConsumers ) ); stats.setInvalidConsumers( gatherIds( invalidContentConsumers ) ); - } - finally + } catch (IOException e) { + log.error("Could not scan directory {}", repositoryBase); + } finally { inProgressScans.remove( scannerInstance ); } diff --git a/archiva-modules/archiva-base/archiva-repository-scanner/src/main/java/org/apache/archiva/repository/scanner/RepositoryContentConsumers.java b/archiva-modules/archiva-base/archiva-repository-scanner/src/main/java/org/apache/archiva/repository/scanner/RepositoryContentConsumers.java index 507400eba..5ccdb1ef9 100644 --- a/archiva-modules/archiva-base/archiva-repository-scanner/src/main/java/org/apache/archiva/repository/scanner/RepositoryContentConsumers.java +++ b/archiva-modules/archiva-base/archiva-repository-scanner/src/main/java/org/apache/archiva/repository/scanner/RepositoryContentConsumers.java @@ -40,12 +40,8 @@ import org.springframework.context.ApplicationContextAware; import org.springframework.stereotype.Service; import javax.inject.Inject; -import java.io.File; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.nio.file.Path; +import java.util.*; /** * RepositoryContentConsumerUtil @@ -266,7 +262,7 @@ public class RepositoryContentConsumers * @param localFile the local file to execute the consumers against. * @param updateRelatedArtifacts TODO */ - public void executeConsumers( ManagedRepository repository, File localFile, boolean updateRelatedArtifacts ) + public void executeConsumers(ManagedRepository repository, Path localFile, boolean updateRelatedArtifacts ) throws RepositoryAdminException { List selectedKnownConsumers = null; @@ -300,7 +296,7 @@ public class RepositoryContentConsumers // yuck. In case you can't read this, it says // "process the file if the consumer has it in the includes list, and not in the excludes list" - BaseFile baseFile = new BaseFile( repository.getLocation(), localFile ); + BaseFile baseFile = new BaseFile( repository.getLocation(), localFile.toFile() ); ConsumerWantsFilePredicate predicate = new ConsumerWantsFilePredicate( repository ); predicate.setBasefile( baseFile ); predicate.setCaseSensitive( false ); diff --git a/archiva-modules/archiva-base/archiva-repository-scanner/src/main/java/org/apache/archiva/repository/scanner/RepositoryScannerInstance.java b/archiva-modules/archiva-base/archiva-repository-scanner/src/main/java/org/apache/archiva/repository/scanner/RepositoryScannerInstance.java index 65cb6603c..55e0e5248 100644 --- a/archiva-modules/archiva-base/archiva-repository-scanner/src/main/java/org/apache/archiva/repository/scanner/RepositoryScannerInstance.java +++ b/archiva-modules/archiva-base/archiva-repository-scanner/src/main/java/org/apache/archiva/repository/scanner/RepositoryScannerInstance.java @@ -31,21 +31,20 @@ import org.apache.commons.collections.Closure; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.functors.IfClosure; import org.apache.commons.lang.SystemUtils; -import org.codehaus.plexus.util.DirectoryWalkListener; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.io.IOException; +import java.nio.file.*; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.*; +import java.util.stream.Collectors; /** * RepositoryScannerInstance */ public class RepositoryScannerInstance - implements DirectoryWalkListener + implements FileVisitor { private Logger log = LoggerFactory.getLogger( RepositoryScannerInstance.class ); @@ -73,6 +72,17 @@ public class RepositoryScannerInstance private Map consumerCounts; + + private List fileNameIncludePattern = new ArrayList<>(); + private List fileNameExcludePattern = new ArrayList<>(); + + private List includeMatcher = new ArrayList<>(); + private List excludeMatcher = new ArrayList<>(); + + private boolean isRunning = false; + + Path basePath = null; + public RepositoryScannerInstance( ManagedRepository repository, List knownConsumerList, List invalidConsumerList ) @@ -81,6 +91,8 @@ public class RepositoryScannerInstance this.knownConsumers = knownConsumerList; this.invalidConsumers = invalidConsumerList; + addFileNameIncludePattern("**/*"); + consumerTimings = new HashMap<>(); consumerCounts = new HashMap<>(); @@ -133,77 +145,129 @@ public class RepositoryScannerInstance return consumerCounts; } - @Override - public void directoryWalkStarting( File basedir ) + public ManagedRepository getRepository() { - log.info( "Walk Started: [{}] {}", this.repository.getId(), this.repository.getLocation() ); - stats.triggerStart(); + return repository; } - @Override - public void directoryWalkStep( int percentage, File file ) + public RepositoryScanStatistics getStats() + { + return stats; + } + + public long getChangesSince() { - log.debug( "Walk Step: {}, {}", percentage, file ); + return changesSince; + } - stats.increaseFileCount(); + public List getFileNameIncludePattern() { + return fileNameIncludePattern; + } - // consume files regardless - the predicate will check the timestamp - BaseFile basefile = new BaseFile( repository.getLocation(), file ); + public void setFileNameIncludePattern(List fileNamePattern) { + this.fileNameIncludePattern = fileNamePattern; + FileSystem sys = FileSystems.getDefault(); + this.includeMatcher = fileNamePattern.stream().map(ts ->sys + .getPathMatcher("glob:" + ts)).collect(Collectors.toList()); + } - // Timestamp finished points to the last successful scan, not this current one. - if ( file.lastModified() >= changesSince ) - { - stats.increaseNewFileCount(); + public void addFileNameIncludePattern(String fileNamePattern) { + if (! this.fileNameIncludePattern.contains(fileNamePattern)) { + this.fileNameIncludePattern.add(fileNamePattern); + this.includeMatcher.add(FileSystems.getDefault().getPathMatcher("glob:" + fileNamePattern)); } + } - consumerProcessFile.setBasefile( basefile ); - consumerWantsFile.setBasefile( basefile ); + public List getFileNameExcludePattern() { + return fileNameExcludePattern; + } - Closure processIfWanted = IfClosure.getInstance( consumerWantsFile, consumerProcessFile ); - CollectionUtils.forAllDo( this.knownConsumers, processIfWanted ); + public void setFileNameExcludePattern(List fileNamePattern) { + this.fileNameExcludePattern = fileNamePattern; + FileSystem sys = FileSystems.getDefault(); + this.excludeMatcher = fileNamePattern.stream().map(ts ->sys + .getPathMatcher("glob:" + ts)).collect(Collectors.toList()); + } - if ( consumerWantsFile.getWantedFileCount() <= 0 ) - { - // Nothing known processed this file. It is invalid! - CollectionUtils.forAllDo( this.invalidConsumers, consumerProcessFile ); + public void addFileNameExcludePattern(String fileNamePattern) { + if (! this.fileNameExcludePattern.contains(fileNamePattern)) { + this.fileNameExcludePattern.add(fileNamePattern); + this.excludeMatcher.add(FileSystems.getDefault().getPathMatcher("glob:" + fileNamePattern)); } } + @Override - public void directoryWalkFinished() - { - TriggerScanCompletedClosure scanCompletedClosure = new TriggerScanCompletedClosure( repository, true ); - CollectionUtils.forAllDo( knownConsumers, scanCompletedClosure ); - CollectionUtils.forAllDo( invalidConsumers, scanCompletedClosure ); + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { + if (!isRunning) { + isRunning = true; + this.basePath = dir; + log.info( "Walk Started: [{}] {}", this.repository.getId(), this.repository.getLocation() ); + stats.triggerStart(); + } + return FileVisitResult.CONTINUE; + } - stats.setConsumerTimings( consumerTimings ); - stats.setConsumerCounts( consumerCounts ); + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + if (excludeMatcher.stream().noneMatch(m -> m.matches(file)) && includeMatcher.stream().allMatch(m -> m.matches(file))) { + log.debug( "Walk Step: {}, {}", file ); - log.info( "Walk Finished: [{}] {}", this.repository.getId(), this.repository.getLocation() ); - stats.triggerFinished(); + stats.increaseFileCount(); + + // consume files regardless - the predicate will check the timestamp + BaseFile basefile = new BaseFile( repository.getLocation(), file.toFile() ); + + // Timestamp finished points to the last successful scan, not this current one. + if ( Files.getLastModifiedTime(file).toMillis() >= changesSince ) + { + stats.increaseNewFileCount(); + } + + consumerProcessFile.setBasefile( basefile ); + consumerWantsFile.setBasefile( basefile ); + + Closure processIfWanted = IfClosure.getInstance( consumerWantsFile, consumerProcessFile ); + CollectionUtils.forAllDo( this.knownConsumers, processIfWanted ); + + if ( consumerWantsFile.getWantedFileCount() <= 0 ) + { + // Nothing known processed this file. It is invalid! + CollectionUtils.forAllDo( this.invalidConsumers, consumerProcessFile ); + } + + } + return FileVisitResult.CONTINUE; } - /** - * Debug method from DirectoryWalker. - */ @Override - public void debug( String message ) - { - log.debug( "Repository Scanner: {}", message ); + public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + log.error("Error occured at {}: {}", file, exc.getMessage(), exc); + if (basePath!=null && Files.isSameFile(file, basePath)) { + finishWalk(); + } + return FileVisitResult.CONTINUE; } - public ManagedRepository getRepository() - { - return repository; + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + if (Files.isSameFile(dir, basePath)) { + finishWalk(); + } + return FileVisitResult.CONTINUE; } - public RepositoryScanStatistics getStats() - { - return stats; - } + private void finishWalk() { + this.isRunning = false; + TriggerScanCompletedClosure scanCompletedClosure = new TriggerScanCompletedClosure( repository, true ); + CollectionUtils.forAllDo( knownConsumers, scanCompletedClosure ); + CollectionUtils.forAllDo( invalidConsumers, scanCompletedClosure ); - public long getChangesSince() - { - return changesSince; + stats.setConsumerTimings( consumerTimings ); + stats.setConsumerCounts( consumerCounts ); + + log.info( "Walk Finished: [{}] {}", this.repository.getId(), this.repository.getLocation() ); + stats.triggerFinished(); + this.basePath = null; } } diff --git a/archiva-modules/archiva-base/archiva-repository-scanner/src/test/java/org/apache/archiva/repository/scanner/RepositoryContentConsumersTest.java b/archiva-modules/archiva-base/archiva-repository-scanner/src/test/java/org/apache/archiva/repository/scanner/RepositoryContentConsumersTest.java index db981234a..988e3ef8b 100644 --- a/archiva-modules/archiva-base/archiva-repository-scanner/src/test/java/org/apache/archiva/repository/scanner/RepositoryContentConsumersTest.java +++ b/archiva-modules/archiva-base/archiva-repository-scanner/src/test/java/org/apache/archiva/repository/scanner/RepositoryContentConsumersTest.java @@ -44,9 +44,10 @@ import org.springframework.core.io.Resource; import org.springframework.test.context.ContextConfiguration; import javax.inject.Inject; -import java.io.File; import java.io.IOException; import java.lang.annotation.Annotation; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.Arrays; import java.util.Collections; import java.util.Date; @@ -69,12 +70,12 @@ public class RepositoryContentConsumersTest @Inject ApplicationContext applicationContext; - protected ManagedRepository createRepository( String id, String name, File location ) + protected ManagedRepository createRepository( String id, String name, Path location ) { ManagedRepository repo = new ManagedRepository(); repo.setId( id ); repo.setName( name ); - repo.setLocation( location.getAbsolutePath() ); + repo.setLocation( location.toAbsolutePath().toString() ); return repo; } @@ -271,8 +272,8 @@ public class RepositoryContentConsumersTest consumers.setSelectedInvalidConsumers( Collections.singletonList( selectedInvalidConsumer ) ); - ManagedRepository repo = createRepository( "id", "name", new File( "target/test-repo" ) ); - File testFile = new File( "target/test-repo/path/to/test-file.txt" ); + ManagedRepository repo = createRepository( "id", "name", Paths.get( "target/test-repo" ) ); + Path testFile = Paths.get( "target/test-repo/path/to/test-file.txt" ); Date startTime = new Date( System.currentTimeMillis() ); startTime.setTime( 12345678 ); @@ -294,7 +295,7 @@ public class RepositoryContentConsumersTest knownControl.reset(); invalidControl.reset(); - File notIncludedTestFile = new File( "target/test-repo/path/to/test-file.xml" ); + Path notIncludedTestFile = Paths.get( "target/test-repo/path/to/test-file.xml" ); selectedKnownConsumer.beginScan( repo, startTime, false ); expect( selectedKnownConsumer.getExcludes() ).andReturn( Collections.emptyList() ); @@ -316,7 +317,7 @@ public class RepositoryContentConsumersTest knownControl.reset(); invalidControl.reset(); - File excludedTestFile = new File( "target/test-repo/path/to/test-file.txt" ); + Path excludedTestFile = Paths.get( "target/test-repo/path/to/test-file.txt" ); selectedKnownConsumer.beginScan( repo, startTime, false ); expect( selectedKnownConsumer.getExcludes() ).andReturn( Collections.singletonList( "**/test-file.txt" ) ); diff --git a/archiva-modules/archiva-base/archiva-repository-scanner/src/test/java/org/apache/archiva/repository/scanner/RepositoryScannerTest.java b/archiva-modules/archiva-base/archiva-repository-scanner/src/test/java/org/apache/archiva/repository/scanner/RepositoryScannerTest.java index 721d97c6b..e028b94eb 100644 --- a/archiva-modules/archiva-base/archiva-repository-scanner/src/test/java/org/apache/archiva/repository/scanner/RepositoryScannerTest.java +++ b/archiva-modules/archiva-base/archiva-repository-scanner/src/test/java/org/apache/archiva/repository/scanner/RepositoryScannerTest.java @@ -32,9 +32,11 @@ import org.springframework.context.ApplicationContext; import org.springframework.test.context.ContextConfiguration; import javax.inject.Inject; -import java.io.File; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; +import java.nio.file.attribute.FileTime; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -55,12 +57,12 @@ public class RepositoryScannerTest @Inject ApplicationContext applicationContext; - protected ManagedRepository createRepository( String id, String name, File location ) + protected ManagedRepository createRepository( String id, String name, Path location ) { ManagedRepository repo = new ManagedRepository(); repo.setId( id ); repo.setName( name ); - repo.setLocation( location.getAbsolutePath() ); + repo.setLocation( location.toAbsolutePath().toString()); return repo; } @@ -78,10 +80,10 @@ public class RepositoryScannerTest private ManagedRepository createDefaultRepository() { - File repoDir = - Paths.get( System.getProperty( "basedir" ), "src/test/repositories/default-repository" ).toFile(); + Path repoDir = + Paths.get( System.getProperty( "basedir" ), "src/test/repositories/default-repository" ); - assertTrue( "Default Test Repository should exist.", repoDir.exists() && repoDir.isDirectory() ); + assertTrue( "Default Test Repository should exist.", Files.exists(repoDir) && Files.isDirectory(repoDir) ); return createRepository( "testDefaultRepo", "Test Default Repository", repoDir ); } @@ -89,19 +91,19 @@ public class RepositoryScannerTest private ManagedRepository createSimpleRepository() throws IOException, ParseException { - File srcDir = Paths.get( System.getProperty( "basedir" ), "src/test/repositories/simple-repository" ).toFile(); + Path srcDir = Paths.get( System.getProperty( "basedir" ), "src/test/repositories/simple-repository" ); - File repoDir = Paths.get( System.getProperty( "basedir" ), "target/test-repos/simple-repository" ).toFile(); + Path repoDir = Paths.get( System.getProperty( "basedir" ), "target/test-repos/simple-repository" ); - FileUtils.deleteDirectory( repoDir ); + org.apache.archiva.common.utils.FileUtils.deleteDirectory( repoDir ); - FileUtils.copyDirectory( srcDir, repoDir ); + FileUtils.copyDirectory( srcDir.toFile(), repoDir.toFile() ); - File repoFile = new File( repoDir, + Path repoFile = repoDir.resolve( "groupId/snapshot-artifact/1.0-alpha-1-SNAPSHOT/snapshot-artifact-1.0-alpha-1-20050611.202024-1.pom" ); - repoFile.setLastModified( getTimestampAsMillis( "20050611.202024" ) ); + Files.setLastModifiedTime(repoFile, FileTime.fromMillis(getTimestampAsMillis( "20050611.202024" ) )); - assertTrue( "Simple Test Repository should exist.", repoDir.exists() && repoDir.isDirectory() ); + assertTrue( "Simple Test Repository should exist.", Files.exists(repoDir) && Files.isDirectory(repoDir) ); return createRepository( "testSimpleRepo", "Test Simple Repository", repoDir ); } @@ -116,9 +118,9 @@ public class RepositoryScannerTest private ManagedRepository createLegacyRepository() { - File repoDir = Paths.get( System.getProperty( "basedir" ), "src/test/repositories/legacy-repository" ).toFile(); + Path repoDir = Paths.get( System.getProperty( "basedir" ), "src/test/repositories/legacy-repository" ); - assertTrue( "Legacy Test Repository should exist.", repoDir.exists() && repoDir.isDirectory() ); + assertTrue( "Legacy Test Repository should exist.", Files.exists(repoDir) && Files.isDirectory(repoDir) ); ManagedRepository repo = createRepository( "testLegacyRepo", "Test Legacy Repository", repoDir ); repo.setLayout( "legacy" ); diff --git a/archiva-modules/archiva-scheduler/archiva-scheduler-repository/src/main/java/org/apache/archiva/scheduler/repository/ArchivaRepositoryScanningTaskExecutor.java b/archiva-modules/archiva-scheduler/archiva-scheduler-repository/src/main/java/org/apache/archiva/scheduler/repository/ArchivaRepositoryScanningTaskExecutor.java index 1ac363cd9..87aaceb3e 100644 --- a/archiva-modules/archiva-scheduler/archiva-scheduler-repository/src/main/java/org/apache/archiva/scheduler/repository/ArchivaRepositoryScanningTaskExecutor.java +++ b/archiva-modules/archiva-scheduler/archiva-scheduler-repository/src/main/java/org/apache/archiva/scheduler/repository/ArchivaRepositoryScanningTaskExecutor.java @@ -113,7 +113,7 @@ public class ArchivaRepositoryScanningTaskExecutor if ( task.getResourceFile() != null ) { log.debug( "Executing task from queue with job name: {}", task ); - consumers.executeConsumers( arepo, task.getResourceFile(), task.isUpdateRelatedArtifacts() ); + consumers.executeConsumers( arepo, task.getResourceFile().toPath(), task.isUpdateRelatedArtifacts() ); } else { -- 2.39.5