From 4ab1e3733c2b98bf7d0f1c314911ed5e88ff9bf2 Mon Sep 17 00:00:00 2001 From: Martin Stockhammer Date: Thu, 27 Feb 2020 22:33:37 +0100 Subject: [PATCH] Adding spliterator for assets --- .../repository/storage/AssetSpliterator.java | 206 ++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 archiva-modules/archiva-base/archiva-storage-api/src/main/java/org/apache/archiva/repository/storage/AssetSpliterator.java diff --git a/archiva-modules/archiva-base/archiva-storage-api/src/main/java/org/apache/archiva/repository/storage/AssetSpliterator.java b/archiva-modules/archiva-base/archiva-storage-api/src/main/java/org/apache/archiva/repository/storage/AssetSpliterator.java new file mode 100644 index 000000000..312615b7f --- /dev/null +++ b/archiva-modules/archiva-base/archiva-storage-api/src/main/java/org/apache/archiva/repository/storage/AssetSpliterator.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.archiva.repository.storage; + +import java.io.Closeable; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.Spliterator; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +/** + * + * Base Spliterator implementation for Storage Assets. The spliterator visits the tree by depth-first. + * For the non-concurrent usage it is guaranteed that children are visited before their + * parents. If the spliterator is used in a parallel stream, there is no guarantee for + * the order of returned assets. + * + * The estimated size is not accurate, because the tree paths are scanned on demand. + * + * @since 3.0 + * @author Martin Stockhammer + */ +public class AssetSpliterator implements Spliterator, Closeable +{ + public static final int DEFAULT_SPLIT_THRESHOLD = 2; + + // the linked list is used as stack + private LinkedList workList = new LinkedList<>( ); + private LinkedHashSet visitedContainers = new LinkedHashSet<>( ); + private long visited = 0; + private final int splitThreshold; + private static final int CHARACTERISTICS = Spliterator.DISTINCT|Spliterator.NONNULL; + + + AssetSpliterator( int splitThreshold, StorageAsset... assets) { + this.splitThreshold = splitThreshold; + Collections.addAll( this.workList, assets ); + } + + AssetSpliterator( StorageAsset... assets) { + this.splitThreshold = DEFAULT_SPLIT_THRESHOLD; + Collections.addAll( this.workList, assets ); + } + + AssetSpliterator() { + this.splitThreshold = DEFAULT_SPLIT_THRESHOLD; + } + + AssetSpliterator( int splitThreshold) { + this.splitThreshold = splitThreshold; + } + + + AssetSpliterator( int splitThreshold, Set visitedContainers) { + this.visitedContainers.addAll( visitedContainers ); + this.splitThreshold = splitThreshold; + } + + AssetSpliterator( List baseList, Set visitedContainers) { + this.workList.addAll(baseList); + this.visitedContainers.addAll( visitedContainers ); + this.splitThreshold = DEFAULT_SPLIT_THRESHOLD; + } + + private void add( StorageAsset asset) { + workList.addLast( asset ); + } + + + @Override + public void close( ) + { + this.workList.clear(); + this.workList=null; + this.visitedContainers.clear(); + this.visitedContainers=null; + } + + @Override + public boolean tryAdvance( Consumer action ) + { + try + { + StorageAsset asset = workList.getLast( ); + consumeAsset( action, asset ); + return true; + } catch (NoSuchElementException e) { + return false; + } + } + + private void consumeAsset( Consumer action, StorageAsset asset ) + { + // Traverse the path to the deepest descent (depth-first) + while(retrieveNextPath( asset )) { + asset = workList.getLast( ); + } + action.accept( workList.removeLast() ); + visited++; + } + + private boolean retrieveNextPath(StorageAsset parent) { + if (parent.isContainer() && !visitedContainers.contains( parent )) { + // Containers after files in stack guarantee the depth-first behaviour + workList.addAll( getChildFiles( parent ) ); + workList.addAll( getChildContainers( parent ) ); + visitedContainers.add( parent ); + return true; + } else { + return false; + } + } + + @Override + public void forEachRemaining( Consumer action ) + { + try + { + //noinspection InfiniteLoopStatement + while ( true ) + { + consumeAsset( action, workList.getLast( ) ); + } + } catch (NoSuchElementException e) { + // Should happen at the end. + } + } + + List getChildContainers( StorageAsset parent) { + return parent.list( ).stream( ).filter( StorageAsset::isContainer ).collect( Collectors.toList( ) ); + } + + List getChildFiles(StorageAsset parent) { + return parent.list( ).stream( ).filter( StorageAsset::isLeaf ).collect( Collectors.toList( ) ); + } + + + /** + * Splits by moving every second asset to the new spliterator. This allows to start both at similar + * tree depths. But it is not guaranteed that they start on the same depth. + * The split happens only, if the number of elements in the worklist is greater than 2. + * + * @return the new spliterator if the work list size is greater than 2 + */ + @Override + public Spliterator trySplit( ) + { + if (workList.size()>splitThreshold) { + // We use the elements alternately for the current and the new spliterator + // For the parallel scenario we cannot guarantee that children are visited + // before their parents + final LinkedList newWorkList = new LinkedList<>( ); + final AssetSpliterator newSpliterator = new AssetSpliterator( this.splitThreshold, visitedContainers ); + try { + //noinspection InfiniteLoopStatement + while (true) + { + newWorkList.add( workList.getFirst( ) ); + newSpliterator.add( workList.getFirst( ) ); + } + } catch (NoSuchElementException e) { + // + } + // Swap the worklist + this.workList = newWorkList; + return newSpliterator; + } else { + return null; + } + } + + @Override + public long estimateSize( ) + { + return workList.size()+visited; + } + + @Override + public int characteristics( ) + { + return CHARACTERISTICS; + } + + +} -- 2.39.5