source.dussan.org Git - archiva.git/blob

   1 package org.apache.maven.repository.indexing;
   2
   3 /*
   4  * Copyright 2005-2006 The Apache Software Foundation.
   5  *
   6  * Licensed under the Apache License, Version 2.0 (the "License");
   7  * you may not use this file except in compliance with the License.
   8  * You may obtain a copy of the License at
   9  *
  10  *      http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  */
  18
  19 import org.apache.lucene.analysis.Analyzer;
  20 import org.apache.lucene.analysis.SimpleAnalyzer;
  21 import org.apache.lucene.analysis.TokenStream;
  22 import org.apache.lucene.analysis.CharTokenizer;
  23 import org.apache.lucene.index.IndexReader;
  24 import org.apache.lucene.index.IndexWriter;
  25 import org.apache.lucene.index.Term;
  26 import org.apache.maven.artifact.repository.ArtifactRepository;
  27
  28 import java.io.File;
  29 import java.io.IOException;
  30 import java.io.Reader;
  31 import java.util.Collection;
  32
  33 /**
  34  * Abstract class for RepositoryIndexers
  35  *
  36  * @author Edwin Punzalan
  37  */
  38 public abstract class AbstractRepositoryIndex
  39     implements RepositoryIndex
  40 {
  41     private String indexPath;
  42
  43     private boolean indexOpen;
  44
  45     private IndexWriter indexWriter;
  46
  47     protected ArtifactRepository repository;
  48
  49     private Analyzer analyzer;
  50
  51     /**
  52      * Class constructor
  53      *
  54      * @param indexPath
  55      * @param repository
  56      * @throws RepositoryIndexException
  57      */
  58     protected AbstractRepositoryIndex( String indexPath, ArtifactRepository repository )
  59         throws RepositoryIndexException
  60     {
  61         this.repository = repository;
  62         this.indexPath = indexPath;
  63     }
  64
  65     /**
  66      * Method to open the IndexWriter
  67      *
  68      * @throws RepositoryIndexException
  69      */
  70     public void open()
  71         throws RepositoryIndexException
  72     {
  73         try
  74         {
  75             if ( indexExists() )
  76             {
  77                 indexWriter = new IndexWriter( indexPath, getAnalyzer(), false );
  78             }
  79             else
  80             {
  81                 indexWriter = new IndexWriter( indexPath, getAnalyzer(), true );
  82             }
  83         }
  84         catch ( IOException ie )
  85         {
  86             throw new RepositoryIndexException( ie );
  87         }
  88         indexOpen = true;
  89     }
  90
  91     /**
  92      * @see org.apache.maven.repository.indexing.RepositoryIndex#optimize()
  93      */
  94     public void optimize()
  95         throws RepositoryIndexException
  96     {
  97         if ( !indexOpen )
  98         {
  99             throw new RepositoryIndexException( "Unable to optimize index on a closed index" );
 100         }
 101
 102         try
 103         {
 104             indexWriter.optimize();
 105         }
 106         catch ( IOException ioe )
 107         {
 108             throw new RepositoryIndexException( "Failed to optimize index", ioe );
 109         }
 110     }
 111
 112     /**
 113      * @see org.apache.maven.repository.indexing.RepositoryIndex#isOpen()
 114      */
 115     public boolean isOpen()
 116     {
 117         return indexOpen;
 118     }
 119
 120     /**
 121      * @see org.apache.maven.repository.indexing.RepositoryIndex#close()
 122      */
 123     public void close()
 124         throws RepositoryIndexException
 125     {
 126         try
 127         {
 128             if ( indexWriter != null )
 129             {
 130                 indexWriter.close();
 131                 indexWriter = null;
 132             }
 133
 134             indexOpen = false;
 135         }
 136         catch ( IOException e )
 137         {
 138             throw new RepositoryIndexException( e.getMessage(), e );
 139         }
 140     }
 141
 142     /**
 143      * @see org.apache.maven.repository.indexing.RepositoryIndex#getIndexPath()
 144      */
 145     public String getIndexPath()
 146     {
 147         return indexPath;
 148     }
 149
 150     /**
 151      * Method to retrieve the lucene IndexWriter used in creating/updating the index
 152      *
 153      * @return the lucene IndexWriter object used to update the index
 154      * @throws IOException
 155      */
 156     protected IndexWriter getIndexWriter()
 157         throws IOException
 158     {
 159         if ( indexWriter == null )
 160         {
 161             indexWriter = new IndexWriter( indexPath, getAnalyzer(), false );
 162         }
 163         return indexWriter;
 164     }
 165
 166     /**
 167      * method for validating an index directory
 168      *
 169      * @param indexFields
 170      * @throws RepositoryIndexException if the given indexPath is not valid for this type of RepositoryIndex
 171      */
 172     protected void validateIndex( String[] indexFields )
 173         throws RepositoryIndexException, IOException
 174     {
 175         IndexReader indexReader = IndexReader.open( indexPath );
 176         try
 177         {
 178             if ( indexReader.numDocs() > 0 )
 179             {
 180                 Collection fields = indexReader.getFieldNames();
 181                 for ( int idx = 0; idx < indexFields.length; idx++ )
 182                 {
 183                     if ( !fields.contains( indexFields[idx] ) )
 184                     {
 185                         throw new RepositoryIndexException(
 186                             "The Field " + indexFields[idx] + " does not exist in index " + indexPath + "." );
 187                     }
 188                 }
 189             }
 190         }
 191         finally
 192         {
 193             indexReader.close();
 194         }
 195     }
 196
 197     /**
 198      * @see org.apache.maven.repository.indexing.RepositoryIndex#getRepository()
 199      */
 200     public ArtifactRepository getRepository()
 201     {
 202         return repository;
 203     }
 204
 205     /**
 206      * Delete the document(s) that contains the specified value on the specified field.
 207      *
 208      * @param field
 209      * @param value
 210      * @throws RepositoryIndexException
 211      * @throws IOException
 212      */
 213     protected void deleteDocument( String field, String value )
 214         throws RepositoryIndexException, IOException
 215     {
 216         IndexReader indexReader = null;
 217         try
 218         {
 219             indexReader = IndexReader.open( indexPath );
 220             indexReader.delete( new Term( field, value ) );
 221         }
 222         catch ( IOException ie )
 223         {
 224             throw new RepositoryIndexException( indexPath + "is not a valid directory." );
 225         }
 226         finally
 227         {
 228             if ( indexReader != null )
 229             {
 230                 indexReader.close();
 231             }
 232         }
 233     }
 234
 235     /**
 236      * Check if the index already exists.
 237      *
 238      * @return true if the index already exists
 239      * @throws IOException
 240      * @throws RepositoryIndexException
 241      */
 242     protected boolean indexExists()
 243         throws IOException, RepositoryIndexException
 244     {
 245         File indexDir = new File( indexPath );
 246
 247         if ( IndexReader.indexExists( indexDir ) )
 248         {
 249             return true;
 250         }
 251         else if ( !indexDir.exists() )
 252         {
 253             return false;
 254         }
 255         else if ( indexDir.isDirectory() )
 256         {
 257             throw new RepositoryIndexException( indexPath + " is not a valid index directory." );
 258         }
 259         else
 260         {
 261             throw new RepositoryIndexException( indexPath + " is not a directory." );
 262         }
 263     }
 264
 265     /**
 266      * Checks if the object has already been indexed and deletes it if it is.
 267      *
 268      * @param object the object to be indexed.
 269      * @throws RepositoryIndexException
 270      * @throws IOException
 271      */
 272     abstract void deleteIfIndexed( Object object )
 273         throws RepositoryIndexException, IOException;
 274
 275     /**
 276      * @see org.apache.maven.repository.indexing.RepositoryIndex#getAnalyzer()
 277      */
 278     public Analyzer getAnalyzer()
 279     {
 280         if ( analyzer == null )
 281         {
 282             analyzer = new ArtifactRepositoryIndexAnalyzer( new SimpleAnalyzer() );
 283         }
 284
 285         return analyzer;
 286     }
 287
 288     /**
 289      * @see RepositoryIndex#isKeywordField(String)
 290      */
 291     public boolean isKeywordField( String field )
 292     {
 293         return KEYWORD_FIELDS.contains( field );
 294     }
 295
 296     private class ArtifactRepositoryIndexAnalyzer
 297         extends Analyzer
 298     {
 299         private Analyzer defaultAnalyzer;
 300
 301         /**
 302          * constructor to for this analyzer
 303          *
 304          * @param defaultAnalyzer the analyzer to use as default for the general fields of the artifact indeces
 305          */
 306         public ArtifactRepositoryIndexAnalyzer( Analyzer defaultAnalyzer )
 307         {
 308             this.defaultAnalyzer = defaultAnalyzer;
 309         }
 310
 311         /**
 312          * Method called by lucence during indexing operations
 313          *
 314          * @param fieldName the field name that the lucene object is currently processing
 315          * @param reader    a Reader object to the index stream
 316          * @return an analyzer to specific to the field name or the default analyzer if none is present
 317          */
 318         public TokenStream tokenStream( String fieldName, Reader reader )
 319         {
 320             TokenStream tokenStream;
 321
 322             if ( RepositoryIndex.FLD_VERSION.equals( fieldName ) || RepositoryIndex.FLD_LASTUPDATE.equals( fieldName ) )
 323             {
 324                 tokenStream = new VersionTokenizer( reader );
 325             }
 326             else
 327             {
 328                 tokenStream = defaultAnalyzer.tokenStream( fieldName, reader );
 329             }
 330
 331             return tokenStream;
 332         }
 333
 334         /**
 335          * Class used to tokenize an artifact's version.
 336          */
 337         private class VersionTokenizer
 338             extends CharTokenizer
 339         {
 340             /**
 341              * Constructor with the required reader to the index stream
 342              *
 343              * @param reader the Reader object of the index stream
 344              */
 345             VersionTokenizer( Reader reader )
 346             {
 347                 super( reader );
 348             }
 349
 350             /**
 351              * method that lucene calls to check tokenization of a stream character
 352              *
 353              * @param character char currently being processed
 354              * @return true if the char is a token, false if the char is a stop char
 355              */
 356             protected boolean isTokenChar( char character )
 357             {
 358                 return character != '.' && character != '-';
 359             }
 360         }
 361     }
 362 }