1 package org.apache.maven.archiva.indexer.lucene;
4 * Copyright 2005-2006 The Apache Software Foundation.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import org.apache.lucene.analysis.Analyzer;
20 import org.apache.lucene.analysis.CharTokenizer;
21 import org.apache.lucene.analysis.TokenStream;
22 import org.apache.lucene.analysis.standard.StandardAnalyzer;
23 import org.apache.lucene.document.Document;
24 import org.apache.lucene.document.Field;
25 import org.apache.lucene.index.IndexModifier;
26 import org.apache.lucene.index.IndexReader;
27 import org.apache.lucene.index.IndexWriter;
28 import org.apache.lucene.index.Term;
29 import org.apache.lucene.index.TermEnum;
30 import org.apache.lucene.search.BooleanClause;
31 import org.apache.lucene.search.BooleanQuery;
32 import org.apache.lucene.search.Hits;
33 import org.apache.lucene.search.IndexSearcher;
34 import org.apache.lucene.search.MatchAllDocsQuery;
35 import org.apache.lucene.search.TermQuery;
36 import org.apache.maven.archiva.indexer.RepositoryArtifactIndex;
37 import org.apache.maven.archiva.indexer.RepositoryIndexException;
38 import org.apache.maven.archiva.indexer.RepositoryIndexSearchException;
39 import org.apache.maven.archiva.indexer.query.Query;
40 import org.apache.maven.archiva.indexer.record.MinimalIndexRecordFields;
41 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecord;
42 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecordFactory;
43 import org.apache.maven.archiva.indexer.record.StandardIndexRecordFields;
44 import org.apache.maven.artifact.Artifact;
47 import java.io.IOException;
48 import java.io.Reader;
49 import java.text.ParseException;
50 import java.util.ArrayList;
51 import java.util.Collection;
52 import java.util.Iterator;
53 import java.util.LinkedHashSet;
54 import java.util.List;
58 * Lucene implementation of a repository index.
60 * @author <a href="mailto:brett@apache.org">Brett Porter</a>
62 public class LuceneRepositoryArtifactIndex
63 implements RepositoryArtifactIndex
66 * The location of the index on the file system.
68 private File indexLocation;
71 * Convert repository records to Lucene documents.
73 private LuceneIndexRecordConverter converter;
75 private static final String FLD_PK = "pk";
77 private static Analyzer luceneAnalyzer = new LuceneAnalyzer();
79 private static long lastUpdatedTime = 0;
81 public LuceneRepositoryArtifactIndex( File indexPath, LuceneIndexRecordConverter converter )
83 this.indexLocation = indexPath;
84 this.converter = converter;
87 public void indexRecords( Collection records )
88 throws RepositoryIndexException
90 deleteRecords( records );
92 addRecords( records );
95 private void addRecords( Collection records )
96 throws RepositoryIndexException
98 IndexWriter indexWriter;
101 indexWriter = new IndexWriter( indexLocation, getAnalyzer(), !exists() );
103 catch ( IOException e )
105 throw new RepositoryIndexException( "Unable to open index", e );
110 for ( Iterator i = records.iterator(); i.hasNext(); )
112 RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
114 if ( record != null )
116 Document document = converter.convert( record );
118 new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
120 indexWriter.addDocument( document );
124 indexWriter.optimize();
126 catch ( IOException e )
128 throw new RepositoryIndexException( "Failed to add an index document", e );
132 closeQuietly( indexWriter );
133 lastUpdatedTime = System.currentTimeMillis();
137 public static Analyzer getAnalyzer()
139 return luceneAnalyzer;
142 private static class LuceneAnalyzer
145 private static final Analyzer STANDARD = new StandardAnalyzer();
147 public TokenStream tokenStream( String field, final Reader reader )
149 // do not tokenize field called 'element'
150 if ( StandardIndexRecordFields.DEPENDENCIES.equals( field ) )
152 return new CharTokenizer( reader )
154 protected boolean isTokenChar( char c )
160 else if ( StandardIndexRecordFields.FILES.equals( field ) )
162 return new CharTokenizer( reader )
164 protected boolean isTokenChar( char c )
166 return c != '\n' && c != '/';
171 if ( StandardIndexRecordFields.CLASSES.equals( field ) || MinimalIndexRecordFields.CLASSES.equals( field ) )
173 return new CharTokenizer( reader )
175 protected boolean isTokenChar( char c )
177 return c != '\n' && c != '.';
180 protected char normalize( char c )
182 return Character.toLowerCase( c );
186 else if ( StandardIndexRecordFields.GROUPID.equals( field ) )
188 return new CharTokenizer( reader )
190 protected boolean isTokenChar( char c )
195 protected char normalize( char c )
197 return Character.toLowerCase( c );
201 else if ( StandardIndexRecordFields.VERSION.equals( field ) ||
202 StandardIndexRecordFields.BASE_VERSION.equals( field ) )
204 return new CharTokenizer( reader )
206 protected boolean isTokenChar( char c )
212 else if ( StandardIndexRecordFields.FILENAME.equals( field ) ||
213 MinimalIndexRecordFields.FILENAME.equals( field ) )
215 return new CharTokenizer( reader )
217 protected boolean isTokenChar( char c )
219 return c != '-' && c != '.' && c != '/';
225 // use standard analyzer
226 return STANDARD.tokenStream( field, reader );
231 public void deleteRecords( Collection records )
232 throws RepositoryIndexException
236 IndexReader indexReader = null;
239 indexReader = IndexReader.open( indexLocation );
241 for ( Iterator i = records.iterator(); i.hasNext(); )
243 RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
245 if ( record != null )
247 Term term = new Term( FLD_PK, record.getPrimaryKey() );
249 indexReader.deleteDocuments( term );
253 catch ( IOException e )
255 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
259 closeQuietly( indexReader );
264 public Collection getAllRecords()
265 throws RepositoryIndexSearchException
267 return search( new LuceneQuery( new MatchAllDocsQuery() ) );
270 public Collection getAllRecordKeys()
271 throws RepositoryIndexException
273 return getAllFieldValues( FLD_PK );
276 private List getAllFieldValues( String fieldName )
277 throws RepositoryIndexException
279 List keys = new ArrayList();
283 IndexReader indexReader = null;
284 TermEnum terms = null;
287 indexReader = IndexReader.open( indexLocation );
289 terms = indexReader.terms( new Term( fieldName, "" ) );
290 while ( fieldName.equals( terms.term().field() ) )
292 keys.add( terms.term().text() );
300 catch ( IOException e )
302 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
306 closeQuietly( indexReader );
307 closeQuietly( terms );
313 public void indexArtifacts( List artifacts, RepositoryIndexRecordFactory factory )
314 throws RepositoryIndexException
316 IndexModifier indexModifier = null;
319 indexModifier = new IndexModifier( indexLocation, getAnalyzer(), !exists() );
321 for ( Iterator i = artifacts.iterator(); i.hasNext(); )
323 Artifact artifact = (Artifact) i.next();
324 RepositoryIndexRecord record = factory.createRecord( artifact );
326 if ( record != null )
328 Term term = new Term( FLD_PK, record.getPrimaryKey() );
330 indexModifier.deleteDocuments( term );
332 Document document = converter.convert( record );
334 new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
336 indexModifier.addDocument( document );
339 indexModifier.optimize();
341 catch ( IOException e )
343 throw new RepositoryIndexException( "Error updating index: " + e.getMessage(), e );
347 closeQuietly( indexModifier );
348 lastUpdatedTime = System.currentTimeMillis();
352 public List getAllGroupIds()
353 throws RepositoryIndexException
355 return getAllFieldValues( StandardIndexRecordFields.GROUPID_EXACT );
358 public List getArtifactIds( String groupId )
359 throws RepositoryIndexSearchException
361 return searchField( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
362 StandardIndexRecordFields.ARTIFACTID );
365 public List getVersions( String groupId, String artifactId )
366 throws RepositoryIndexSearchException
368 BooleanQuery query = new BooleanQuery();
369 query.add( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
370 BooleanClause.Occur.MUST );
371 query.add( new TermQuery( new Term( StandardIndexRecordFields.ARTIFACTID_EXACT, artifactId ) ),
372 BooleanClause.Occur.MUST );
374 return searchField( query, StandardIndexRecordFields.VERSION );
377 public long getLastUpdatedTime()
379 return lastUpdatedTime;
382 private List searchField( org.apache.lucene.search.Query luceneQuery, String fieldName )
383 throws RepositoryIndexSearchException
385 Set results = new LinkedHashSet();
387 IndexSearcher searcher;
390 searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
392 catch ( IOException e )
394 throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
399 Hits hits = searcher.search( luceneQuery );
400 for ( int i = 0; i < hits.length(); i++ )
402 Document doc = hits.doc( i );
404 results.add( doc.get( fieldName ) );
407 catch ( IOException e )
409 throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
413 closeQuietly( searcher );
415 return new ArrayList( results );
418 public boolean exists()
419 throws RepositoryIndexException
421 if ( IndexReader.indexExists( indexLocation ) )
425 else if ( !indexLocation.exists() )
429 else if ( indexLocation.isDirectory() )
431 if ( indexLocation.listFiles().length > 1 )
433 throw new RepositoryIndexException( indexLocation + " is not a valid index directory." );
442 throw new RepositoryIndexException( indexLocation + " is not a directory." );
446 public List search( Query query )
447 throws RepositoryIndexSearchException
449 LuceneQuery lQuery = (LuceneQuery) query;
451 org.apache.lucene.search.Query luceneQuery = lQuery.getLuceneQuery();
453 IndexSearcher searcher;
456 searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
458 catch ( IOException e )
460 throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
463 List records = new ArrayList();
466 Hits hits = searcher.search( luceneQuery );
467 for ( int i = 0; i < hits.length(); i++ )
469 Document doc = hits.doc( i );
471 records.add( converter.convert( doc ) );
474 catch ( IOException e )
476 throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
478 catch ( ParseException e )
480 throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
484 closeQuietly( searcher );
490 private static void closeQuietly( IndexSearcher searcher )
494 if ( searcher != null )
499 catch ( IOException e )
505 private static void closeQuietly( TermEnum terms )
506 throws RepositoryIndexException
514 catch ( IOException e )
521 private static void closeQuietly( IndexWriter indexWriter )
522 throws RepositoryIndexException
526 if ( indexWriter != null )
531 catch ( IOException e )
533 // write should compain if it can't be closed, data probably not persisted
534 throw new RepositoryIndexException( e.getMessage(), e );
538 private static void closeQuietly( IndexModifier indexModifier )
540 if ( indexModifier != null )
544 indexModifier.close();
546 catch ( IOException e )
553 private static void closeQuietly( IndexReader reader )
557 if ( reader != null )
562 catch ( IOException e )