source.dussan.org Git - archiva.git/blob

   1 package org.apache.maven.repository.indexing;
   2
   3 /*
   4  * Copyright 2005-2006 The Apache Software Foundation.
   5  *
   6  * Licensed under the Apache License, Version 2.0 (the "License");
   7  * you may not use this file except in compliance with the License.
   8  * You may obtain a copy of the License at
   9  *
  10  *      http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  */
  18
  19 import org.apache.lucene.analysis.Analyzer;
  20 import org.apache.lucene.analysis.CharTokenizer;
  21 import org.apache.lucene.analysis.SimpleAnalyzer;
  22 import org.apache.lucene.analysis.TokenStream;
  23 import org.apache.lucene.index.IndexReader;
  24 import org.apache.lucene.index.IndexWriter;
  25 import org.apache.lucene.index.Term;
  26 import org.apache.maven.artifact.repository.ArtifactRepository;
  27
  28 import java.io.File;
  29 import java.io.IOException;
  30 import java.io.Reader;
  31 import java.util.Collection;
  32 import java.util.zip.ZipEntry;
  33
  34 /**
  35  * Abstract class for RepositoryIndexers.
  36  *
  37  * @author Edwin Punzalan
  38  * @todo [BP] overall am not happy with the design of this class and subclasses, but will refactor over time based on how it is used and by assessing how this affects Lucene's performance
  39  */
  40 public abstract class AbstractRepositoryIndex
  41     implements RepositoryIndex
  42 {
  43     // TODO: can this be derived from the repository? -- probably a sensible default, but still should be configurable, but this could just be on the call to open()
  44     private File indexPath;
  45
  46     private boolean indexOpen;
  47
  48     // TODO: why is the writer open for the life, but not the reader? why keep them open that length of time anyway? investigate best practices in Lucene
  49     private IndexWriter indexWriter;
  50
  51     protected ArtifactRepository repository;
  52
  53     private Analyzer analyzer;
  54
  55     /**
  56      * Class constructor
  57      *
  58      * @param indexPath
  59      * @param repository
  60      */
  61     protected AbstractRepositoryIndex( File indexPath, ArtifactRepository repository )
  62     {
  63         this.repository = repository;
  64         this.indexPath = indexPath;
  65     }
  66
  67     /**
  68      * Method to open the IndexWriter
  69      *
  70      * @throws RepositoryIndexException
  71      */
  72     public void open()
  73         throws RepositoryIndexException
  74     {
  75         try
  76         {
  77             if ( indexExists() )
  78             {
  79                 indexWriter = new IndexWriter( indexPath, getAnalyzer(), false );
  80             }
  81             else
  82             {
  83                 indexWriter = new IndexWriter( indexPath, getAnalyzer(), true );
  84             }
  85         }
  86         catch ( IOException ie )
  87         {
  88             throw new RepositoryIndexException( ie );
  89         }
  90         indexOpen = true;
  91     }
  92
  93     /**
  94      * @see org.apache.maven.repository.indexing.RepositoryIndex#optimize()
  95      */
  96     public void optimize()
  97         throws RepositoryIndexException
  98     {
  99         if ( !indexOpen )
 100         {
 101             throw new RepositoryIndexException( "Unable to optimize index on a closed index" );
 102         }
 103
 104         try
 105         {
 106             indexWriter.optimize();
 107         }
 108         catch ( IOException ioe )
 109         {
 110             throw new RepositoryIndexException( "Failed to optimize index", ioe );
 111         }
 112     }
 113
 114     /**
 115      * @see org.apache.maven.repository.indexing.RepositoryIndex#isOpen()
 116      */
 117     public boolean isOpen()
 118     {
 119         return indexOpen;
 120     }
 121
 122     /**
 123      * @see org.apache.maven.repository.indexing.RepositoryIndex#close()
 124      */
 125     public void close()
 126         throws RepositoryIndexException
 127     {
 128         try
 129         {
 130             if ( indexWriter != null )
 131             {
 132                 indexWriter.close();
 133                 indexWriter = null;
 134             }
 135
 136             indexOpen = false;
 137         }
 138         catch ( IOException e )
 139         {
 140             throw new RepositoryIndexException( e.getMessage(), e );
 141         }
 142     }
 143
 144     /**
 145      * @see org.apache.maven.repository.indexing.RepositoryIndex#getIndexPath()
 146      */
 147     public File getIndexPath()
 148     {
 149         return indexPath;
 150     }
 151
 152     /**
 153      * Method to retrieve the lucene IndexWriter used in creating/updating the index
 154      *
 155      * @return the lucene IndexWriter object used to update the index
 156      * @throws IOException
 157      */
 158     protected IndexWriter getIndexWriter()
 159         throws IOException
 160     {
 161         // TODO: why is this allowed to be called before open()?
 162         if ( indexWriter == null )
 163         {
 164             indexWriter = new IndexWriter( indexPath, getAnalyzer(), false );
 165         }
 166         return indexWriter;
 167     }
 168
 169     /**
 170      * method for validating an index directory
 171      *
 172      * @param indexFields
 173      * @throws RepositoryIndexException if the given indexPath is not valid for this type of RepositoryIndex
 174      */
 175     protected void validateIndex( String[] indexFields )
 176         throws RepositoryIndexException, IOException
 177     {
 178         IndexReader indexReader = IndexReader.open( indexPath );
 179         try
 180         {
 181             if ( indexReader.numDocs() > 0 )
 182             {
 183                 Collection fields = indexReader.getFieldNames();
 184                 for ( int idx = 0; idx < indexFields.length; idx++ )
 185                 {
 186                     if ( !fields.contains( indexFields[idx] ) )
 187                     {
 188                         throw new RepositoryIndexException(
 189                             "The Field " + indexFields[idx] + " does not exist in index " + indexPath + "." );
 190                     }
 191                 }
 192             }
 193         }
 194         finally
 195         {
 196             indexReader.close();
 197         }
 198     }
 199
 200     /**
 201      * @see org.apache.maven.repository.indexing.RepositoryIndex#getRepository()
 202      */
 203     public ArtifactRepository getRepository()
 204     {
 205         return repository;
 206     }
 207
 208     /**
 209      * Delete the document(s) that contains the specified value on the specified field.
 210      *
 211      * @param field
 212      * @param value
 213      * @throws RepositoryIndexException
 214      * @throws IOException
 215      */
 216     protected void deleteDocument( String field, String value )
 217         throws RepositoryIndexException, IOException
 218     {
 219         IndexReader indexReader = null;
 220         try
 221         {
 222             indexReader = IndexReader.open( indexPath );
 223             indexReader.delete( new Term( field, value ) );
 224         }
 225         catch ( IOException ie )
 226         {
 227             throw new RepositoryIndexException( indexPath + " is not a valid directory." );
 228         }
 229         finally
 230         {
 231             if ( indexReader != null )
 232             {
 233                 indexReader.close();
 234             }
 235         }
 236     }
 237
 238     /**
 239      * Check if the index already exists.
 240      *
 241      * @return true if the index already exists
 242      * @throws RepositoryIndexException
 243      */
 244     protected boolean indexExists()
 245         throws RepositoryIndexException
 246     {
 247         if ( IndexReader.indexExists( indexPath ) )
 248         {
 249             return true;
 250         }
 251         else if ( !indexPath.exists() )
 252         {
 253             return false;
 254         }
 255         else if ( indexPath.isDirectory() )
 256         {
 257             if ( indexPath.listFiles().length > 1 )
 258             {
 259                 throw new RepositoryIndexException( indexPath + " is not a valid index directory." );
 260             }
 261             else
 262             {
 263                 return false;
 264             }
 265         }
 266         else
 267         {
 268             throw new RepositoryIndexException( indexPath + " is not a directory." );
 269         }
 270     }
 271
 272     /**
 273      * Checks if the object has already been indexed and deletes it if it is.
 274      *
 275      * @param object the object to be indexed.
 276      * @throws RepositoryIndexException
 277      * @throws IOException
 278      */
 279     abstract void deleteIfIndexed( Object object )
 280         throws RepositoryIndexException, IOException;
 281
 282     /**
 283      * @see org.apache.maven.repository.indexing.RepositoryIndex#getAnalyzer()
 284      */
 285     public Analyzer getAnalyzer()
 286     {
 287         if ( analyzer == null )
 288         {
 289             analyzer = new ArtifactRepositoryIndexAnalyzer( new SimpleAnalyzer() );
 290         }
 291
 292         return analyzer;
 293     }
 294
 295     /**
 296      * @see RepositoryIndex#isKeywordField(String)
 297      */
 298     public boolean isKeywordField( String field )
 299     {
 300         return KEYWORD_FIELDS.contains( field );
 301     }
 302
 303     /**
 304      * Method to test a zip entry if it is a java class, and adds it to the classes buffer
 305      *
 306      * @param entry   the zip entry to test for java class
 307      * @param classes the String buffer to add the java class if the test result as true
 308      * @return true if the zip entry is a java class and was successfully added to the buffer
 309      */
 310     protected boolean addIfClassEntry( ZipEntry entry, StringBuffer classes )
 311     {
 312         boolean isAdded = false;
 313
 314         String name = entry.getName();
 315         if ( name.endsWith( ".class" ) )
 316         {
 317             // TODO verify if class is public or protected
 318             if ( name.lastIndexOf( "$" ) == -1 )
 319             {
 320                 int idx = name.lastIndexOf( '/' );
 321                 if ( idx < 0 )
 322                 {
 323                     idx = 0;
 324                 }
 325                 String classname = name.substring( idx + 1, name.length() - 6 );
 326                 classes.append( classname ).append( "\n" );
 327                 isAdded = true;
 328             }
 329         }
 330
 331         return isAdded;
 332     }
 333
 334     private static class ArtifactRepositoryIndexAnalyzer
 335         extends Analyzer
 336     {
 337         private Analyzer defaultAnalyzer;
 338
 339         /**
 340          * constructor to for this analyzer
 341          *
 342          * @param defaultAnalyzer the analyzer to use as default for the general fields of the artifact indeces
 343          */
 344         ArtifactRepositoryIndexAnalyzer( Analyzer defaultAnalyzer )
 345         {
 346             this.defaultAnalyzer = defaultAnalyzer;
 347         }
 348
 349         /**
 350          * Method called by lucence during indexing operations
 351          *
 352          * @param fieldName the field name that the lucene object is currently processing
 353          * @param reader    a Reader object to the index stream
 354          * @return an analyzer to specific to the field name or the default analyzer if none is present
 355          */
 356         public TokenStream tokenStream( String fieldName, Reader reader )
 357         {
 358             TokenStream tokenStream;
 359
 360             if ( RepositoryIndex.FLD_VERSION.equals( fieldName ) || RepositoryIndex.FLD_LASTUPDATE.equals( fieldName ) )
 361             {
 362                 tokenStream = new VersionTokenizer( reader );
 363             }
 364             else
 365             {
 366                 tokenStream = defaultAnalyzer.tokenStream( fieldName, reader );
 367             }
 368
 369             return tokenStream;
 370         }
 371     }
 372
 373     /**
 374      * Class used to tokenize an artifact's version.
 375      */
 376     private static class VersionTokenizer
 377         extends CharTokenizer
 378     {
 379         /**
 380          * Constructor with the required reader to the index stream
 381          *
 382          * @param reader the Reader object of the index stream
 383          */
 384         VersionTokenizer( Reader reader )
 385         {
 386             super( reader );
 387         }
 388
 389         /**
 390          * method that lucene calls to check tokenization of a stream character
 391          *
 392          * @param character char currently being processed
 393          * @return true if the char is a token, false if the char is a stop char
 394          */
 395         protected boolean isTokenChar( char character )
 396         {
 397             return character != '.' && character != '-';
 398         }
 399     }
 400 }