]> source.dussan.org Git - archiva.git/blob
0b64ee53c3f3cf1cfe0f22a0a950b224df72796e
[archiva.git] /
1 package org.apache.maven.archiva.indexer.lucene;
2
3 /*
4  * Copyright 2005-2006 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *      http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18
19 import org.apache.lucene.analysis.Analyzer;
20 import org.apache.lucene.analysis.CharTokenizer;
21 import org.apache.lucene.analysis.TokenStream;
22 import org.apache.lucene.analysis.standard.StandardAnalyzer;
23 import org.apache.lucene.document.Document;
24 import org.apache.lucene.document.Field;
25 import org.apache.lucene.index.IndexModifier;
26 import org.apache.lucene.index.IndexReader;
27 import org.apache.lucene.index.IndexWriter;
28 import org.apache.lucene.index.Term;
29 import org.apache.lucene.index.TermEnum;
30 import org.apache.lucene.search.BooleanClause;
31 import org.apache.lucene.search.BooleanQuery;
32 import org.apache.lucene.search.Hits;
33 import org.apache.lucene.search.IndexSearcher;
34 import org.apache.lucene.search.MatchAllDocsQuery;
35 import org.apache.lucene.search.TermQuery;
36 import org.apache.maven.archiva.indexer.RepositoryArtifactIndex;
37 import org.apache.maven.archiva.indexer.RepositoryIndexException;
38 import org.apache.maven.archiva.indexer.RepositoryIndexSearchException;
39 import org.apache.maven.archiva.indexer.query.Query;
40 import org.apache.maven.archiva.indexer.record.MinimalIndexRecordFields;
41 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecord;
42 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecordFactory;
43 import org.apache.maven.archiva.indexer.record.StandardIndexRecordFields;
44 import org.apache.maven.artifact.Artifact;
45
46 import java.io.File;
47 import java.io.IOException;
48 import java.io.Reader;
49 import java.text.ParseException;
50 import java.util.ArrayList;
51 import java.util.Collection;
52 import java.util.Iterator;
53 import java.util.LinkedHashSet;
54 import java.util.List;
55 import java.util.Set;
56
57 /**
58  * Lucene implementation of a repository index.
59  *
60  * @author <a href="mailto:brett@apache.org">Brett Porter</a>
61  */
62 public class LuceneRepositoryArtifactIndex
63     implements RepositoryArtifactIndex
64 {
65     /**
66      * The location of the index on the file system.
67      */
68     private File indexLocation;
69
70     /**
71      * Convert repository records to Lucene documents.
72      */
73     private LuceneIndexRecordConverter converter;
74
75     private static final String FLD_PK = "pk";
76
77     private static Analyzer luceneAnalyzer = new LuceneAnalyzer();
78
79     private static long lastUpdatedTime = 0;
80
81     public LuceneRepositoryArtifactIndex( File indexPath, LuceneIndexRecordConverter converter )
82     {
83         this.indexLocation = indexPath;
84         this.converter = converter;
85     }
86
87     public void indexRecords( Collection records )
88         throws RepositoryIndexException
89     {
90         deleteRecords( records );
91
92         addRecords( records );
93     }
94
95     private void addRecords( Collection records )
96         throws RepositoryIndexException
97     {
98         IndexWriter indexWriter;
99         try
100         {
101             indexWriter = new IndexWriter( indexLocation, getAnalyzer(), !exists() );
102         }
103         catch ( IOException e )
104         {
105             throw new RepositoryIndexException( "Unable to open index", e );
106         }
107
108         try
109         {
110             for ( Iterator i = records.iterator(); i.hasNext(); )
111             {
112                 RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
113
114                 if ( record != null )
115                 {
116                     Document document = converter.convert( record );
117                     document.add(
118                         new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
119
120                     indexWriter.addDocument( document );
121                 }
122             }
123
124             indexWriter.optimize();
125         }
126         catch ( IOException e )
127         {
128             throw new RepositoryIndexException( "Failed to add an index document", e );
129         }
130         finally
131         {
132             closeQuietly( indexWriter );
133             lastUpdatedTime = System.currentTimeMillis();
134         }
135     }
136
137     public static Analyzer getAnalyzer()
138     {
139         return luceneAnalyzer;
140     }
141
142     private static class LuceneAnalyzer
143         extends Analyzer
144     {
145         private static final Analyzer STANDARD = new StandardAnalyzer();
146
147         public TokenStream tokenStream( String field, final Reader reader )
148         {
149             // do not tokenize field called 'element'
150             if ( StandardIndexRecordFields.DEPENDENCIES.equals( field ) )
151             {
152                 return new CharTokenizer( reader )
153                 {
154                     protected boolean isTokenChar( char c )
155                     {
156                         return c != '\n';
157                     }
158                 };
159             }
160             else if ( StandardIndexRecordFields.FILES.equals( field ) )
161             {
162                 return new CharTokenizer( reader )
163                 {
164                     protected boolean isTokenChar( char c )
165                     {
166                         return c != '\n' && c != '/';
167                     }
168                 };
169             }
170             else
171             if ( StandardIndexRecordFields.CLASSES.equals( field ) || MinimalIndexRecordFields.CLASSES.equals( field ) )
172             {
173                 return new CharTokenizer( reader )
174                 {
175                     protected boolean isTokenChar( char c )
176                     {
177                         return c != '\n' && c != '.';
178                     }
179
180                     protected char normalize( char c )
181                     {
182                         return Character.toLowerCase( c );
183                     }
184                 };
185             }
186             else if ( StandardIndexRecordFields.GROUPID.equals( field ) )
187             {
188                 return new CharTokenizer( reader )
189                 {
190                     protected boolean isTokenChar( char c )
191                     {
192                         return c != '.';
193                     }
194
195                     protected char normalize( char c )
196                     {
197                         return Character.toLowerCase( c );
198                     }
199                 };
200             }
201             else if ( StandardIndexRecordFields.VERSION.equals( field ) ||
202                 StandardIndexRecordFields.BASE_VERSION.equals( field ) )
203             {
204                 return new CharTokenizer( reader )
205                 {
206                     protected boolean isTokenChar( char c )
207                     {
208                         return c != '-';
209                     }
210                 };
211             }
212             else if ( StandardIndexRecordFields.FILENAME.equals( field ) ||
213                 MinimalIndexRecordFields.FILENAME.equals( field ) )
214             {
215                 return new CharTokenizer( reader )
216                 {
217                     protected boolean isTokenChar( char c )
218                     {
219                         return c != '-' && c != '.' && c != '/';
220                     }
221                 };
222             }
223             else
224             {
225                 // use standard analyzer
226                 return STANDARD.tokenStream( field, reader );
227             }
228         }
229     }
230
231     public void deleteRecords( Collection records )
232         throws RepositoryIndexException
233     {
234         if ( exists() )
235         {
236             IndexReader indexReader = null;
237             try
238             {
239                 indexReader = IndexReader.open( indexLocation );
240
241                 for ( Iterator i = records.iterator(); i.hasNext(); )
242                 {
243                     RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
244
245                     if ( record != null )
246                     {
247                         Term term = new Term( FLD_PK, record.getPrimaryKey() );
248
249                         indexReader.deleteDocuments( term );
250                     }
251                 }
252             }
253             catch ( IOException e )
254             {
255                 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
256             }
257             finally
258             {
259                 closeQuietly( indexReader );
260             }
261         }
262     }
263
264     public Collection getAllRecords()
265         throws RepositoryIndexSearchException
266     {
267         return search( new LuceneQuery( new MatchAllDocsQuery() ) );
268     }
269
270     public Collection getAllRecordKeys()
271         throws RepositoryIndexException
272     {
273         return getAllFieldValues( FLD_PK );
274     }
275
276     private List getAllFieldValues( String fieldName )
277         throws RepositoryIndexException
278     {
279         List keys = new ArrayList();
280
281         if ( exists() )
282         {
283             IndexReader indexReader = null;
284             TermEnum terms = null;
285             try
286             {
287                 indexReader = IndexReader.open( indexLocation );
288
289                 terms = indexReader.terms( new Term( fieldName, "" ) );
290                 while ( fieldName.equals( terms.term().field() ) )
291                 {
292                     keys.add( terms.term().text() );
293
294                     if ( !terms.next() )
295                     {
296                         break;
297                     }
298                 }
299             }
300             catch ( IOException e )
301             {
302                 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
303             }
304             finally
305             {
306                 closeQuietly( indexReader );
307                 closeQuietly( terms );
308             }
309         }
310         return keys;
311     }
312
313     public void indexArtifacts( List artifacts, RepositoryIndexRecordFactory factory )
314         throws RepositoryIndexException
315     {
316         IndexModifier indexModifier = null;
317         try
318         {
319             indexModifier = new IndexModifier( indexLocation, getAnalyzer(), !exists() );
320
321             for ( Iterator i = artifacts.iterator(); i.hasNext(); )
322             {
323                 Artifact artifact = (Artifact) i.next();
324                 RepositoryIndexRecord record = factory.createRecord( artifact );
325
326                 if ( record != null )
327                 {
328                     Term term = new Term( FLD_PK, record.getPrimaryKey() );
329
330                     indexModifier.deleteDocuments( term );
331
332                     Document document = converter.convert( record );
333                     document.add(
334                         new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
335
336                     indexModifier.addDocument( document );
337                 }
338             }
339             indexModifier.optimize();
340         }
341         catch ( IOException e )
342         {
343             throw new RepositoryIndexException( "Error updating index: " + e.getMessage(), e );
344         }
345         finally
346         {
347             closeQuietly( indexModifier );
348             lastUpdatedTime = System.currentTimeMillis();
349         }
350     }
351
352     public List getAllGroupIds()
353         throws RepositoryIndexException
354     {
355         return getAllFieldValues( StandardIndexRecordFields.GROUPID_EXACT );
356     }
357
358     public List getArtifactIds( String groupId )
359         throws RepositoryIndexSearchException
360     {
361         return searchField( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
362                             StandardIndexRecordFields.ARTIFACTID );
363     }
364
365     public List getVersions( String groupId, String artifactId )
366         throws RepositoryIndexSearchException
367     {
368         BooleanQuery query = new BooleanQuery();
369         query.add( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
370                    BooleanClause.Occur.MUST );
371         query.add( new TermQuery( new Term( StandardIndexRecordFields.ARTIFACTID_EXACT, artifactId ) ),
372                    BooleanClause.Occur.MUST );
373
374         return searchField( query, StandardIndexRecordFields.VERSION );
375     }
376
377     public long getLastUpdatedTime()
378     {
379         return lastUpdatedTime;
380     }
381
382     private List searchField( org.apache.lucene.search.Query luceneQuery, String fieldName )
383         throws RepositoryIndexSearchException
384     {
385         Set results = new LinkedHashSet();
386
387         IndexSearcher searcher;
388         try
389         {
390             searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
391         }
392         catch ( IOException e )
393         {
394             throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
395         }
396
397         try
398         {
399             Hits hits = searcher.search( luceneQuery );
400             for ( int i = 0; i < hits.length(); i++ )
401             {
402                 Document doc = hits.doc( i );
403
404                 results.add( doc.get( fieldName ) );
405             }
406         }
407         catch ( IOException e )
408         {
409             throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
410         }
411         finally
412         {
413             closeQuietly( searcher );
414         }
415         return new ArrayList( results );
416     }
417
418     public boolean exists()
419         throws RepositoryIndexException
420     {
421         if ( IndexReader.indexExists( indexLocation ) )
422         {
423             return true;
424         }
425         else if ( !indexLocation.exists() )
426         {
427             return false;
428         }
429         else if ( indexLocation.isDirectory() )
430         {
431             if ( indexLocation.listFiles().length > 1 )
432             {
433                 throw new RepositoryIndexException( indexLocation + " is not a valid index directory." );
434             }
435             else
436             {
437                 return false;
438             }
439         }
440         else
441         {
442             throw new RepositoryIndexException( indexLocation + " is not a directory." );
443         }
444     }
445
446     public List search( Query query )
447         throws RepositoryIndexSearchException
448     {
449         LuceneQuery lQuery = (LuceneQuery) query;
450
451         org.apache.lucene.search.Query luceneQuery = lQuery.getLuceneQuery();
452
453         IndexSearcher searcher;
454         try
455         {
456             searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
457         }
458         catch ( IOException e )
459         {
460             throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
461         }
462
463         List records = new ArrayList();
464         try
465         {
466             Hits hits = searcher.search( luceneQuery );
467             for ( int i = 0; i < hits.length(); i++ )
468             {
469                 Document doc = hits.doc( i );
470
471                 records.add( converter.convert( doc ) );
472             }
473         }
474         catch ( IOException e )
475         {
476             throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
477         }
478         catch ( ParseException e )
479         {
480             throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
481         }
482         finally
483         {
484             closeQuietly( searcher );
485         }
486
487         return records;
488     }
489
490     private static void closeQuietly( IndexSearcher searcher )
491     {
492         try
493         {
494             if ( searcher != null )
495             {
496                 searcher.close();
497             }
498         }
499         catch ( IOException e )
500         {
501             // ignore
502         }
503     }
504
505     private static void closeQuietly( TermEnum terms )
506         throws RepositoryIndexException
507     {
508         if ( terms != null )
509         {
510             try
511             {
512                 terms.close();
513             }
514             catch ( IOException e )
515             {
516                 // ignore
517             }
518         }
519     }
520
521     private static void closeQuietly( IndexWriter indexWriter )
522         throws RepositoryIndexException
523     {
524         try
525         {
526             if ( indexWriter != null )
527             {
528                 indexWriter.close();
529             }
530         }
531         catch ( IOException e )
532         {
533             // write should compain if it can't be closed, data probably not persisted
534             throw new RepositoryIndexException( e.getMessage(), e );
535         }
536     }
537
538     private static void closeQuietly( IndexModifier indexModifier )
539     {
540         if ( indexModifier != null )
541         {
542             try
543             {
544                 indexModifier.close();
545             }
546             catch ( IOException e )
547             {
548                 // ignore
549             }
550         }
551     }
552
553     private static void closeQuietly( IndexReader reader )
554     {
555         try
556         {
557             if ( reader != null )
558             {
559                 reader.close();
560             }
561         }
562         catch ( IOException e )
563         {
564             // ignore
565         }
566     }
567 }