1 package org.apache.maven.archiva.indexer.lucene;
4 * Copyright 2005-2006 The Apache Software Foundation.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import org.apache.lucene.analysis.Analyzer;
20 import org.apache.lucene.analysis.CharTokenizer;
21 import org.apache.lucene.analysis.TokenStream;
22 import org.apache.lucene.analysis.standard.StandardAnalyzer;
23 import org.apache.lucene.document.Document;
24 import org.apache.lucene.document.Field;
25 import org.apache.lucene.index.IndexModifier;
26 import org.apache.lucene.index.IndexReader;
27 import org.apache.lucene.index.IndexWriter;
28 import org.apache.lucene.index.Term;
29 import org.apache.lucene.index.TermEnum;
30 import org.apache.lucene.search.Hits;
31 import org.apache.lucene.search.IndexSearcher;
32 import org.apache.lucene.search.MatchAllDocsQuery;
33 import org.apache.maven.archiva.indexer.RepositoryArtifactIndex;
34 import org.apache.maven.archiva.indexer.RepositoryIndexException;
35 import org.apache.maven.archiva.indexer.RepositoryIndexSearchException;
36 import org.apache.maven.archiva.indexer.query.Query;
37 import org.apache.maven.archiva.indexer.record.MinimalIndexRecordFields;
38 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecord;
39 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecordFactory;
40 import org.apache.maven.archiva.indexer.record.StandardIndexRecordFields;
41 import org.apache.maven.artifact.Artifact;
42 import org.apache.maven.project.MavenProjectBuilder;
45 import java.io.IOException;
46 import java.io.Reader;
47 import java.text.ParseException;
48 import java.util.ArrayList;
49 import java.util.Collection;
50 import java.util.HashSet;
51 import java.util.Iterator;
52 import java.util.List;
57 * Lucene implementation of a repository index.
59 * @author <a href="mailto:brett@apache.org">Brett Porter</a>
61 public class LuceneRepositoryArtifactIndex
62 implements RepositoryArtifactIndex
65 * The location of the index on the file system.
67 private File indexLocation;
70 * Convert repository records to Lucene documents.
72 private LuceneIndexRecordConverter converter;
74 private static final String FLD_PK = "pk";
76 private static Analyzer luceneAnalyzer = new LuceneAnalyzer();
78 private MavenProjectBuilder projectBuilder;
80 public LuceneRepositoryArtifactIndex( File indexPath, LuceneIndexRecordConverter converter )
82 this.indexLocation = indexPath;
83 this.converter = converter;
86 public LuceneRepositoryArtifactIndex( File indexLocation, LuceneIndexRecordConverter converter,
87 MavenProjectBuilder projectBuilder )
89 this.indexLocation = indexLocation;
90 this.converter = converter;
91 this.projectBuilder = projectBuilder;
94 public void indexRecords( Collection records )
95 throws RepositoryIndexException
97 deleteRecords( records );
99 addRecords( records );
102 private void addRecords( Collection records )
103 throws RepositoryIndexException
105 IndexWriter indexWriter;
108 indexWriter = new IndexWriter( indexLocation, getAnalyzer(), !exists() );
110 catch ( IOException e )
112 throw new RepositoryIndexException( "Unable to open index", e );
117 for ( Iterator i = records.iterator(); i.hasNext(); )
119 RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
121 if ( record != null )
123 Document document = converter.convert( record );
125 new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
127 indexWriter.addDocument( document );
131 indexWriter.optimize();
133 catch ( IOException e )
135 throw new RepositoryIndexException( "Failed to add an index document", e );
139 closeQuietly( indexWriter );
143 public static Analyzer getAnalyzer()
145 return luceneAnalyzer;
148 private static class LuceneAnalyzer
151 private static final Analyzer STANDARD = new StandardAnalyzer();
153 public TokenStream tokenStream( String field, final Reader reader )
155 // do not tokenize field called 'element'
156 if ( StandardIndexRecordFields.DEPENDENCIES.equals( field ) )
158 return new CharTokenizer( reader )
160 protected boolean isTokenChar( char c )
166 else if ( StandardIndexRecordFields.FILES.equals( field ) )
168 return new CharTokenizer( reader )
170 protected boolean isTokenChar( char c )
172 return c != '\n' && c != '/';
177 if ( StandardIndexRecordFields.CLASSES.equals( field ) || MinimalIndexRecordFields.CLASSES.equals( field ) )
179 return new CharTokenizer( reader )
181 protected boolean isTokenChar( char c )
183 return c != '\n' && c != '.';
186 protected char normalize( char c )
188 return Character.toLowerCase( c );
192 else if ( StandardIndexRecordFields.GROUPID.equals( field ) )
194 return new CharTokenizer( reader )
196 protected boolean isTokenChar( char c )
201 protected char normalize( char c )
203 return Character.toLowerCase( c );
207 else if ( StandardIndexRecordFields.VERSION.equals( field ) ||
208 StandardIndexRecordFields.BASE_VERSION.equals( field ) )
210 return new CharTokenizer( reader )
212 protected boolean isTokenChar( char c )
218 else if ( StandardIndexRecordFields.FILENAME.equals( field ) ||
219 MinimalIndexRecordFields.FILENAME.equals( field ) )
221 return new CharTokenizer( reader )
223 protected boolean isTokenChar( char c )
225 return c != '-' && c != '.' && c != '/';
231 // use standard analyzer
232 return STANDARD.tokenStream( field, reader );
237 public void deleteRecords( Collection records )
238 throws RepositoryIndexException
242 IndexReader indexReader = null;
245 indexReader = IndexReader.open( indexLocation );
247 for ( Iterator i = records.iterator(); i.hasNext(); )
249 RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
251 if ( record != null )
253 Term term = new Term( FLD_PK, record.getPrimaryKey() );
255 indexReader.deleteDocuments( term );
259 catch ( IOException e )
261 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
265 closeQuietly( indexReader );
270 public Collection getAllRecords()
271 throws RepositoryIndexSearchException
273 return search( new LuceneQuery( new MatchAllDocsQuery() ) );
276 public Collection getAllRecordKeys()
277 throws RepositoryIndexException
279 Set keys = new HashSet();
283 IndexReader indexReader = null;
284 TermEnum terms = null;
287 indexReader = IndexReader.open( indexLocation );
289 terms = indexReader.terms( new Term( FLD_PK, "" ) );
290 while ( FLD_PK.equals( terms.term().field() ) )
292 keys.add( terms.term().text() );
300 catch ( IOException e )
302 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
306 closeQuietly( indexReader );
307 closeQuietly( terms );
313 public void indexArtifacts( List artifacts, RepositoryIndexRecordFactory factory )
314 throws RepositoryIndexException
316 IndexModifier indexModifier = null;
319 indexModifier = new IndexModifier( indexLocation, getAnalyzer(), !exists() );
322 for ( Iterator i = artifacts.iterator(); i.hasNext(); count++ )
324 Artifact artifact = (Artifact) i.next();
325 RepositoryIndexRecord record = factory.createRecord( artifact );
327 if ( record != null )
329 Term term = new Term( FLD_PK, record.getPrimaryKey() );
331 indexModifier.deleteDocuments( term );
333 Document document = converter.convert( record );
335 new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
337 indexModifier.addDocument( document );
340 if ( count % 100 == 0 )
342 // MNG-142 - the project builder retains a lot of objects in its inflexible cache. This is a hack
343 // around that. TODO: remove when it is configurable
344 flushProjectBuilderCacheHack();
347 indexModifier.optimize();
349 catch ( IOException e )
351 throw new RepositoryIndexException( "Error updating index: " + e.getMessage(), e );
355 closeQuietly( indexModifier );
359 private void flushProjectBuilderCacheHack()
363 if ( projectBuilder != null )
365 java.lang.reflect.Field f = projectBuilder.getClass().getDeclaredField( "rawProjectCache" );
366 f.setAccessible( true );
367 Map cache = (Map) f.get( projectBuilder );
370 f = projectBuilder.getClass().getDeclaredField( "processedProjectCache" );
371 f.setAccessible( true );
372 cache = (Map) f.get( projectBuilder );
376 catch ( NoSuchFieldException e )
378 throw new RuntimeException( e );
380 catch ( IllegalAccessException e )
382 throw new RuntimeException( e );
386 public boolean exists()
387 throws RepositoryIndexException
389 if ( IndexReader.indexExists( indexLocation ) )
393 else if ( !indexLocation.exists() )
397 else if ( indexLocation.isDirectory() )
399 if ( indexLocation.listFiles().length > 1 )
401 throw new RepositoryIndexException( indexLocation + " is not a valid index directory." );
410 throw new RepositoryIndexException( indexLocation + " is not a directory." );
414 public List search( Query query )
415 throws RepositoryIndexSearchException
417 LuceneQuery lQuery = (LuceneQuery) query;
419 org.apache.lucene.search.Query luceneQuery = lQuery.getLuceneQuery();
421 IndexSearcher searcher;
424 searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
426 catch ( IOException e )
428 throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
431 List records = new ArrayList();
434 Hits hits = searcher.search( luceneQuery );
435 for ( int i = 0; i < hits.length(); i++ )
437 Document doc = hits.doc( i );
439 records.add( converter.convert( doc ) );
442 catch ( IOException e )
444 throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
446 catch ( ParseException e )
448 throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
452 closeQuietly( searcher );
458 private static void closeQuietly( IndexSearcher searcher )
462 if ( searcher != null )
467 catch ( IOException e )
473 private static void closeQuietly( TermEnum terms )
474 throws RepositoryIndexException
482 catch ( IOException e )
489 private static void closeQuietly( IndexWriter indexWriter )
490 throws RepositoryIndexException
494 if ( indexWriter != null )
499 catch ( IOException e )
501 // write should compain if it can't be closed, data probably not persisted
502 throw new RepositoryIndexException( e.getMessage(), e );
506 private static void closeQuietly( IndexModifier indexModifier )
508 if ( indexModifier != null )
512 indexModifier.close();
514 catch ( IOException e )
521 private static void closeQuietly( IndexReader reader )
525 if ( reader != null )
530 catch ( IOException e )