1 package org.apache.maven.archiva.indexer.lucene;
4 * Copyright 2005-2006 The Apache Software Foundation.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import org.apache.lucene.analysis.Analyzer;
20 import org.apache.lucene.analysis.CharTokenizer;
21 import org.apache.lucene.analysis.TokenStream;
22 import org.apache.lucene.analysis.standard.StandardAnalyzer;
23 import org.apache.lucene.document.Document;
24 import org.apache.lucene.document.Field;
25 import org.apache.lucene.index.IndexModifier;
26 import org.apache.lucene.index.IndexReader;
27 import org.apache.lucene.index.IndexWriter;
28 import org.apache.lucene.index.Term;
29 import org.apache.lucene.index.TermEnum;
30 import org.apache.lucene.search.BooleanClause;
31 import org.apache.lucene.search.BooleanQuery;
32 import org.apache.lucene.search.Hits;
33 import org.apache.lucene.search.IndexSearcher;
34 import org.apache.lucene.search.MatchAllDocsQuery;
35 import org.apache.lucene.search.TermQuery;
36 import org.apache.maven.archiva.indexer.RepositoryArtifactIndex;
37 import org.apache.maven.archiva.indexer.RepositoryIndexException;
38 import org.apache.maven.archiva.indexer.RepositoryIndexSearchException;
39 import org.apache.maven.archiva.indexer.query.Query;
40 import org.apache.maven.archiva.indexer.record.MinimalIndexRecordFields;
41 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecord;
42 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecordFactory;
43 import org.apache.maven.archiva.indexer.record.StandardIndexRecordFields;
44 import org.apache.maven.artifact.Artifact;
45 import org.apache.maven.project.MavenProjectBuilder;
48 import java.io.IOException;
49 import java.io.Reader;
50 import java.text.ParseException;
51 import java.util.ArrayList;
52 import java.util.Collection;
53 import java.util.Iterator;
54 import java.util.LinkedHashSet;
55 import java.util.List;
60 * Lucene implementation of a repository index.
62 * @author <a href="mailto:brett@apache.org">Brett Porter</a>
64 public class LuceneRepositoryArtifactIndex
65 implements RepositoryArtifactIndex
68 * The location of the index on the file system.
70 private File indexLocation;
73 * Convert repository records to Lucene documents.
75 private LuceneIndexRecordConverter converter;
77 private static final String FLD_PK = "pk";
79 private static Analyzer luceneAnalyzer = new LuceneAnalyzer();
81 private MavenProjectBuilder projectBuilder;
83 private static long lastUpdatedTime = 0;
85 public LuceneRepositoryArtifactIndex( File indexPath, LuceneIndexRecordConverter converter )
87 this.indexLocation = indexPath;
88 this.converter = converter;
91 public LuceneRepositoryArtifactIndex( File indexLocation, LuceneIndexRecordConverter converter,
92 MavenProjectBuilder projectBuilder )
94 this.indexLocation = indexLocation;
95 this.converter = converter;
96 this.projectBuilder = projectBuilder;
99 public void indexRecords( Collection records )
100 throws RepositoryIndexException
102 deleteRecords( records );
104 addRecords( records );
107 private void addRecords( Collection records )
108 throws RepositoryIndexException
110 IndexWriter indexWriter;
113 indexWriter = new IndexWriter( indexLocation, getAnalyzer(), !exists() );
115 catch ( IOException e )
117 throw new RepositoryIndexException( "Unable to open index", e );
122 for ( Iterator i = records.iterator(); i.hasNext(); )
124 RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
126 if ( record != null )
128 Document document = converter.convert( record );
130 new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
132 indexWriter.addDocument( document );
136 indexWriter.optimize();
138 catch ( IOException e )
140 throw new RepositoryIndexException( "Failed to add an index document", e );
144 closeQuietly( indexWriter );
145 lastUpdatedTime = System.currentTimeMillis();
149 public static Analyzer getAnalyzer()
151 return luceneAnalyzer;
154 private static class LuceneAnalyzer
157 private static final Analyzer STANDARD = new StandardAnalyzer();
159 public TokenStream tokenStream( String field, final Reader reader )
161 // do not tokenize field called 'element'
162 if ( StandardIndexRecordFields.DEPENDENCIES.equals( field ) )
164 return new CharTokenizer( reader )
166 protected boolean isTokenChar( char c )
172 else if ( StandardIndexRecordFields.FILES.equals( field ) )
174 return new CharTokenizer( reader )
176 protected boolean isTokenChar( char c )
178 return c != '\n' && c != '/';
183 if ( StandardIndexRecordFields.CLASSES.equals( field ) || MinimalIndexRecordFields.CLASSES.equals( field ) )
185 return new CharTokenizer( reader )
187 protected boolean isTokenChar( char c )
189 return c != '\n' && c != '.';
192 protected char normalize( char c )
194 return Character.toLowerCase( c );
198 else if ( StandardIndexRecordFields.GROUPID.equals( field ) )
200 return new CharTokenizer( reader )
202 protected boolean isTokenChar( char c )
207 protected char normalize( char c )
209 return Character.toLowerCase( c );
213 else if ( StandardIndexRecordFields.VERSION.equals( field ) ||
214 StandardIndexRecordFields.BASE_VERSION.equals( field ) )
216 return new CharTokenizer( reader )
218 protected boolean isTokenChar( char c )
224 else if ( StandardIndexRecordFields.FILENAME.equals( field ) ||
225 MinimalIndexRecordFields.FILENAME.equals( field ) )
227 return new CharTokenizer( reader )
229 protected boolean isTokenChar( char c )
231 return c != '-' && c != '.' && c != '/';
237 // use standard analyzer
238 return STANDARD.tokenStream( field, reader );
243 public void deleteRecords( Collection records )
244 throws RepositoryIndexException
248 IndexReader indexReader = null;
251 indexReader = IndexReader.open( indexLocation );
253 for ( Iterator i = records.iterator(); i.hasNext(); )
255 RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
257 if ( record != null )
259 Term term = new Term( FLD_PK, record.getPrimaryKey() );
261 indexReader.deleteDocuments( term );
265 catch ( IOException e )
267 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
271 closeQuietly( indexReader );
276 public Collection getAllRecords()
277 throws RepositoryIndexSearchException
279 return search( new LuceneQuery( new MatchAllDocsQuery() ) );
282 public Collection getAllRecordKeys()
283 throws RepositoryIndexException
285 return getAllFieldValues( FLD_PK );
288 private List getAllFieldValues( String fieldName )
289 throws RepositoryIndexException
291 List keys = new ArrayList();
295 IndexReader indexReader = null;
296 TermEnum terms = null;
299 indexReader = IndexReader.open( indexLocation );
301 terms = indexReader.terms( new Term( fieldName, "" ) );
302 while ( fieldName.equals( terms.term().field() ) )
304 keys.add( terms.term().text() );
312 catch ( IOException e )
314 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
318 closeQuietly( indexReader );
319 closeQuietly( terms );
325 public void indexArtifacts( List artifacts, RepositoryIndexRecordFactory factory )
326 throws RepositoryIndexException
328 IndexModifier indexModifier = null;
331 indexModifier = new IndexModifier( indexLocation, getAnalyzer(), !exists() );
334 for ( Iterator i = artifacts.iterator(); i.hasNext(); count++ )
336 Artifact artifact = (Artifact) i.next();
337 RepositoryIndexRecord record = factory.createRecord( artifact );
339 if ( record != null )
341 Term term = new Term( FLD_PK, record.getPrimaryKey() );
343 indexModifier.deleteDocuments( term );
345 Document document = converter.convert( record );
347 new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
349 indexModifier.addDocument( document );
352 if ( count % 100 == 0 )
354 // MNG-142 - the project builder retains a lot of objects in its inflexible cache. This is a hack
355 // around that. TODO: remove when it is configurable
356 flushProjectBuilderCacheHack();
359 indexModifier.optimize();
361 catch ( IOException e )
363 throw new RepositoryIndexException( "Error updating index: " + e.getMessage(), e );
367 closeQuietly( indexModifier );
368 lastUpdatedTime = System.currentTimeMillis();
372 public List getAllGroupIds()
373 throws RepositoryIndexException
375 return getAllFieldValues( StandardIndexRecordFields.GROUPID_EXACT );
378 public List getArtifactIds( String groupId )
379 throws RepositoryIndexSearchException
381 return searchField( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
382 StandardIndexRecordFields.ARTIFACTID );
385 public List getVersions( String groupId, String artifactId )
386 throws RepositoryIndexSearchException
388 BooleanQuery query = new BooleanQuery();
389 query.add( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
390 BooleanClause.Occur.MUST );
391 query.add( new TermQuery( new Term( StandardIndexRecordFields.ARTIFACTID_EXACT, artifactId ) ),
392 BooleanClause.Occur.MUST );
394 return searchField( query, StandardIndexRecordFields.VERSION );
397 public long getLastUpdatedTime()
399 return lastUpdatedTime;
402 private List searchField( org.apache.lucene.search.Query luceneQuery, String fieldName )
403 throws RepositoryIndexSearchException
405 Set results = new LinkedHashSet();
407 IndexSearcher searcher;
410 searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
412 catch ( IOException e )
414 throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
419 Hits hits = searcher.search( luceneQuery );
420 for ( int i = 0; i < hits.length(); i++ )
422 Document doc = hits.doc( i );
424 results.add( doc.get( fieldName ) );
427 catch ( IOException e )
429 throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
433 closeQuietly( searcher );
435 return new ArrayList( results );
438 private void flushProjectBuilderCacheHack()
442 if ( projectBuilder != null )
444 java.lang.reflect.Field f = projectBuilder.getClass().getDeclaredField( "rawProjectCache" );
445 f.setAccessible( true );
446 Map cache = (Map) f.get( projectBuilder );
449 f = projectBuilder.getClass().getDeclaredField( "processedProjectCache" );
450 f.setAccessible( true );
451 cache = (Map) f.get( projectBuilder );
455 catch ( NoSuchFieldException e )
457 throw new RuntimeException( e );
459 catch ( IllegalAccessException e )
461 throw new RuntimeException( e );
465 public boolean exists()
466 throws RepositoryIndexException
468 if ( IndexReader.indexExists( indexLocation ) )
472 else if ( !indexLocation.exists() )
476 else if ( indexLocation.isDirectory() )
478 if ( indexLocation.listFiles().length > 1 )
480 throw new RepositoryIndexException( indexLocation + " is not a valid index directory." );
489 throw new RepositoryIndexException( indexLocation + " is not a directory." );
493 public List search( Query query )
494 throws RepositoryIndexSearchException
496 LuceneQuery lQuery = (LuceneQuery) query;
498 org.apache.lucene.search.Query luceneQuery = lQuery.getLuceneQuery();
500 IndexSearcher searcher;
503 searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
505 catch ( IOException e )
507 throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
510 List records = new ArrayList();
513 Hits hits = searcher.search( luceneQuery );
514 for ( int i = 0; i < hits.length(); i++ )
516 Document doc = hits.doc( i );
518 records.add( converter.convert( doc ) );
521 catch ( IOException e )
523 throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
525 catch ( ParseException e )
527 throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
531 closeQuietly( searcher );
537 private static void closeQuietly( IndexSearcher searcher )
541 if ( searcher != null )
546 catch ( IOException e )
552 private static void closeQuietly( TermEnum terms )
553 throws RepositoryIndexException
561 catch ( IOException e )
568 private static void closeQuietly( IndexWriter indexWriter )
569 throws RepositoryIndexException
573 if ( indexWriter != null )
578 catch ( IOException e )
580 // write should compain if it can't be closed, data probably not persisted
581 throw new RepositoryIndexException( e.getMessage(), e );
585 private static void closeQuietly( IndexModifier indexModifier )
587 if ( indexModifier != null )
591 indexModifier.close();
593 catch ( IOException e )
600 private static void closeQuietly( IndexReader reader )
604 if ( reader != null )
609 catch ( IOException e )