1 package org.apache.maven.archiva.indexer.lucene;
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
22 import org.apache.lucene.analysis.Analyzer;
23 import org.apache.lucene.analysis.CharTokenizer;
24 import org.apache.lucene.analysis.TokenStream;
25 import org.apache.lucene.analysis.standard.StandardAnalyzer;
26 import org.apache.lucene.document.Document;
27 import org.apache.lucene.document.Field;
28 import org.apache.lucene.index.IndexModifier;
29 import org.apache.lucene.index.IndexReader;
30 import org.apache.lucene.index.IndexWriter;
31 import org.apache.lucene.index.Term;
32 import org.apache.lucene.index.TermEnum;
33 import org.apache.lucene.search.BooleanClause;
34 import org.apache.lucene.search.BooleanQuery;
35 import org.apache.lucene.search.Hits;
36 import org.apache.lucene.search.IndexSearcher;
37 import org.apache.lucene.search.MatchAllDocsQuery;
38 import org.apache.lucene.search.TermQuery;
39 import org.apache.maven.archiva.indexer.RepositoryArtifactIndex;
40 import org.apache.maven.archiva.indexer.RepositoryIndexException;
41 import org.apache.maven.archiva.indexer.RepositoryIndexSearchException;
42 import org.apache.maven.archiva.indexer.query.Query;
43 import org.apache.maven.archiva.indexer.record.MinimalIndexRecordFields;
44 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecord;
45 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecordFactory;
46 import org.apache.maven.archiva.indexer.record.StandardIndexRecordFields;
47 import org.apache.maven.artifact.Artifact;
50 import java.io.IOException;
51 import java.io.Reader;
52 import java.text.ParseException;
53 import java.util.ArrayList;
54 import java.util.Collection;
55 import java.util.Iterator;
56 import java.util.LinkedHashSet;
57 import java.util.List;
61 * Lucene implementation of a repository index.
63 * @author <a href="mailto:brett@apache.org">Brett Porter</a>
65 public class LuceneRepositoryArtifactIndex
66 implements RepositoryArtifactIndex
69 * The location of the index on the file system.
71 private File indexLocation;
74 * Convert repository records to Lucene documents.
76 private LuceneIndexRecordConverter converter;
78 private static final String FLD_PK = "pk";
80 private static Analyzer luceneAnalyzer = new LuceneAnalyzer();
82 private static long lastUpdatedTime = 0;
84 public LuceneRepositoryArtifactIndex( File indexPath, LuceneIndexRecordConverter converter )
86 this.indexLocation = indexPath;
87 this.converter = converter;
90 public void indexRecords( Collection records )
91 throws RepositoryIndexException
93 deleteRecords( records );
95 addRecords( records );
98 private void addRecords( Collection records )
99 throws RepositoryIndexException
101 IndexWriter indexWriter;
104 indexWriter = new IndexWriter( indexLocation, getAnalyzer(), !exists() );
106 catch ( IOException e )
108 throw new RepositoryIndexException( "Unable to open index", e );
113 for ( Iterator i = records.iterator(); i.hasNext(); )
115 RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
117 if ( record != null )
119 Document document = converter.convert( record );
121 new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
123 indexWriter.addDocument( document );
127 indexWriter.optimize();
129 catch ( IOException e )
131 throw new RepositoryIndexException( "Failed to add an index document", e );
135 closeQuietly( indexWriter );
136 lastUpdatedTime = System.currentTimeMillis();
140 public static Analyzer getAnalyzer()
142 return luceneAnalyzer;
145 private static class LuceneAnalyzer
148 private static final Analyzer STANDARD = new StandardAnalyzer();
150 public TokenStream tokenStream( String field, final Reader reader )
152 // do not tokenize field called 'element'
153 if ( StandardIndexRecordFields.DEPENDENCIES.equals( field ) )
155 return new CharTokenizer( reader )
157 protected boolean isTokenChar( char c )
163 else if ( StandardIndexRecordFields.FILES.equals( field ) )
165 return new CharTokenizer( reader )
167 protected boolean isTokenChar( char c )
169 return c != '\n' && c != '/';
174 if ( StandardIndexRecordFields.CLASSES.equals( field ) || MinimalIndexRecordFields.CLASSES.equals( field ) )
176 return new CharTokenizer( reader )
178 protected boolean isTokenChar( char c )
180 return c != '\n' && c != '.';
183 protected char normalize( char c )
185 return Character.toLowerCase( c );
189 else if ( StandardIndexRecordFields.GROUPID.equals( field ) )
191 return new CharTokenizer( reader )
193 protected boolean isTokenChar( char c )
198 protected char normalize( char c )
200 return Character.toLowerCase( c );
204 else if ( StandardIndexRecordFields.VERSION.equals( field ) ||
205 StandardIndexRecordFields.BASE_VERSION.equals( field ) )
207 return new CharTokenizer( reader )
209 protected boolean isTokenChar( char c )
215 else if ( StandardIndexRecordFields.FILENAME.equals( field ) ||
216 MinimalIndexRecordFields.FILENAME.equals( field ) )
218 return new CharTokenizer( reader )
220 protected boolean isTokenChar( char c )
222 return c != '-' && c != '.' && c != '/';
228 // use standard analyzer
229 return STANDARD.tokenStream( field, reader );
234 public void deleteRecords( Collection records )
235 throws RepositoryIndexException
239 IndexReader indexReader = null;
242 indexReader = IndexReader.open( indexLocation );
244 for ( Iterator i = records.iterator(); i.hasNext(); )
246 RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
248 if ( record != null )
250 Term term = new Term( FLD_PK, record.getPrimaryKey() );
252 indexReader.deleteDocuments( term );
256 catch ( IOException e )
258 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
262 closeQuietly( indexReader );
267 public Collection getAllRecords()
268 throws RepositoryIndexSearchException
270 return search( new LuceneQuery( new MatchAllDocsQuery() ) );
273 public Collection getAllRecordKeys()
274 throws RepositoryIndexException
276 return getAllFieldValues( FLD_PK );
279 private List getAllFieldValues( String fieldName )
280 throws RepositoryIndexException
282 List keys = new ArrayList();
286 IndexReader indexReader = null;
287 TermEnum terms = null;
290 indexReader = IndexReader.open( indexLocation );
292 terms = indexReader.terms( new Term( fieldName, "" ) );
293 while ( fieldName.equals( terms.term().field() ) )
295 keys.add( terms.term().text() );
303 catch ( IOException e )
305 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
309 closeQuietly( indexReader );
310 closeQuietly( terms );
316 public void indexArtifacts( List artifacts, RepositoryIndexRecordFactory factory )
317 throws RepositoryIndexException
319 IndexModifier indexModifier = null;
322 indexModifier = new IndexModifier( indexLocation, getAnalyzer(), !exists() );
324 for ( Iterator i = artifacts.iterator(); i.hasNext(); )
326 Artifact artifact = (Artifact) i.next();
327 RepositoryIndexRecord record = factory.createRecord( artifact );
329 if ( record != null )
331 Term term = new Term( FLD_PK, record.getPrimaryKey() );
333 indexModifier.deleteDocuments( term );
335 Document document = converter.convert( record );
337 new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
339 indexModifier.addDocument( document );
342 indexModifier.optimize();
344 catch ( IOException e )
346 throw new RepositoryIndexException( "Error updating index: " + e.getMessage(), e );
350 closeQuietly( indexModifier );
351 lastUpdatedTime = System.currentTimeMillis();
355 public void indexArtifact( Artifact artifact, RepositoryIndexRecordFactory factory )
356 throws RepositoryIndexException
358 IndexModifier indexModifier = null;
361 indexModifier = new IndexModifier( indexLocation, getAnalyzer(), !exists() );
363 RepositoryIndexRecord record = factory.createRecord( artifact );
365 if ( record != null )
367 Term term = new Term( FLD_PK, record.getPrimaryKey() );
369 indexModifier.deleteDocuments( term );
371 Document document = converter.convert( record );
372 document.add( new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
374 indexModifier.addDocument( document );
376 indexModifier.optimize();
378 catch ( IOException e )
380 throw new RepositoryIndexException( "Error updating index: " + e.getMessage(), e );
384 closeQuietly( indexModifier );
385 lastUpdatedTime = System.currentTimeMillis();
389 public List getAllGroupIds()
390 throws RepositoryIndexException
392 return getAllFieldValues( StandardIndexRecordFields.GROUPID_EXACT );
395 public List getArtifactIds( String groupId )
396 throws RepositoryIndexSearchException
398 return searchField( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
399 StandardIndexRecordFields.ARTIFACTID );
402 public List getVersions( String groupId, String artifactId )
403 throws RepositoryIndexSearchException
405 BooleanQuery query = new BooleanQuery();
406 query.add( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
407 BooleanClause.Occur.MUST );
408 query.add( new TermQuery( new Term( StandardIndexRecordFields.ARTIFACTID_EXACT, artifactId ) ),
409 BooleanClause.Occur.MUST );
411 return searchField( query, StandardIndexRecordFields.VERSION );
414 public long getLastUpdatedTime()
416 return lastUpdatedTime;
419 private List searchField( org.apache.lucene.search.Query luceneQuery, String fieldName )
420 throws RepositoryIndexSearchException
422 Set results = new LinkedHashSet();
424 IndexSearcher searcher;
427 searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
429 catch ( IOException e )
431 throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
436 Hits hits = searcher.search( luceneQuery );
437 for ( int i = 0; i < hits.length(); i++ )
439 Document doc = hits.doc( i );
441 results.add( doc.get( fieldName ) );
444 catch ( IOException e )
446 throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
450 closeQuietly( searcher );
452 return new ArrayList( results );
455 public boolean exists()
456 throws RepositoryIndexException
458 if ( IndexReader.indexExists( indexLocation ) )
462 else if ( !indexLocation.exists() )
466 else if ( indexLocation.isDirectory() )
468 if ( indexLocation.listFiles().length > 1 )
470 throw new RepositoryIndexException( indexLocation + " is not a valid index directory." );
479 throw new RepositoryIndexException( indexLocation + " is not a directory." );
483 public List search( Query query )
484 throws RepositoryIndexSearchException
486 LuceneQuery lQuery = (LuceneQuery) query;
488 org.apache.lucene.search.Query luceneQuery = lQuery.getLuceneQuery();
490 IndexSearcher searcher;
493 searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
495 catch ( IOException e )
497 throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
500 List records = new ArrayList();
503 Hits hits = searcher.search( luceneQuery );
504 for ( int i = 0; i < hits.length(); i++ )
506 Document doc = hits.doc( i );
508 records.add( converter.convert( doc ) );
511 catch ( IOException e )
513 throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
515 catch ( ParseException e )
517 throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
521 closeQuietly( searcher );
527 private static void closeQuietly( IndexSearcher searcher )
531 if ( searcher != null )
536 catch ( IOException e )
542 private static void closeQuietly( TermEnum terms )
543 throws RepositoryIndexException
551 catch ( IOException e )
558 private static void closeQuietly( IndexWriter indexWriter )
559 throws RepositoryIndexException
563 if ( indexWriter != null )
568 catch ( IOException e )
570 // write should compain if it can't be closed, data probably not persisted
571 throw new RepositoryIndexException( e.getMessage(), e );
575 private static void closeQuietly( IndexModifier indexModifier )
577 if ( indexModifier != null )
581 indexModifier.close();
583 catch ( IOException e )
590 private static void closeQuietly( IndexReader reader )
594 if ( reader != null )
599 catch ( IOException e )