]> source.dussan.org Git - archiva.git/blob
c0a02935dd57a297e5b4279ee4e0bf8b42e6d0b1
[archiva.git] /
1 package org.apache.maven.archiva.indexer.lucene;
2
3 /*
4  * Licensed to the Apache Software Foundation (ASF) under one
5  * or more contributor license agreements.  See the NOTICE file
6  * distributed with this work for additional information
7  * regarding copyright ownership.  The ASF licenses this file
8  * to you under the Apache License, Version 2.0 (the
9  * "License"); you may not use this file except in compliance
10  * with the License.  You may obtain a copy of the License at
11  *
12  *   http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing,
15  * software distributed under the License is distributed on an
16  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17  * KIND, either express or implied.  See the License for the
18  * specific language governing permissions and limitations
19  * under the License.
20  */
21
22 import org.apache.lucene.analysis.Analyzer;
23 import org.apache.lucene.analysis.CharTokenizer;
24 import org.apache.lucene.analysis.TokenStream;
25 import org.apache.lucene.analysis.standard.StandardAnalyzer;
26 import org.apache.lucene.document.Document;
27 import org.apache.lucene.document.Field;
28 import org.apache.lucene.index.IndexModifier;
29 import org.apache.lucene.index.IndexReader;
30 import org.apache.lucene.index.IndexWriter;
31 import org.apache.lucene.index.Term;
32 import org.apache.lucene.index.TermEnum;
33 import org.apache.lucene.search.BooleanClause;
34 import org.apache.lucene.search.BooleanQuery;
35 import org.apache.lucene.search.Hits;
36 import org.apache.lucene.search.IndexSearcher;
37 import org.apache.lucene.search.MatchAllDocsQuery;
38 import org.apache.lucene.search.TermQuery;
39 import org.apache.maven.archiva.indexer.RepositoryArtifactIndex;
40 import org.apache.maven.archiva.indexer.RepositoryIndexException;
41 import org.apache.maven.archiva.indexer.RepositoryIndexSearchException;
42 import org.apache.maven.archiva.indexer.query.Query;
43 import org.apache.maven.archiva.indexer.record.MinimalIndexRecordFields;
44 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecord;
45 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecordFactory;
46 import org.apache.maven.archiva.indexer.record.StandardIndexRecordFields;
47 import org.apache.maven.artifact.Artifact;
48
49 import java.io.File;
50 import java.io.IOException;
51 import java.io.Reader;
52 import java.text.ParseException;
53 import java.util.ArrayList;
54 import java.util.Collection;
55 import java.util.Iterator;
56 import java.util.LinkedHashSet;
57 import java.util.List;
58 import java.util.Set;
59
60 /**
61  * Lucene implementation of a repository index.
62  *
63  * @author <a href="mailto:brett@apache.org">Brett Porter</a>
64  */
65 public class LuceneRepositoryArtifactIndex
66     implements RepositoryArtifactIndex
67 {
68     /**
69      * The location of the index on the file system.
70      */
71     private File indexLocation;
72
73     /**
74      * Convert repository records to Lucene documents.
75      */
76     private LuceneIndexRecordConverter converter;
77
78     private static final String FLD_PK = "pk";
79
80     private static Analyzer luceneAnalyzer = new LuceneAnalyzer();
81
82     private static long lastUpdatedTime = 0;
83
84     public LuceneRepositoryArtifactIndex( File indexPath, LuceneIndexRecordConverter converter )
85     {
86         this.indexLocation = indexPath;
87         this.converter = converter;
88     }
89
90     public void indexRecords( Collection records )
91         throws RepositoryIndexException
92     {
93         deleteRecords( records );
94
95         addRecords( records );
96     }
97
98     private void addRecords( Collection records )
99         throws RepositoryIndexException
100     {
101         IndexWriter indexWriter;
102         try
103         {
104             indexWriter = new IndexWriter( indexLocation, getAnalyzer(), !exists() );
105         }
106         catch ( IOException e )
107         {
108             throw new RepositoryIndexException( "Unable to open index", e );
109         }
110
111         try
112         {
113             for ( Iterator i = records.iterator(); i.hasNext(); )
114             {
115                 RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
116
117                 if ( record != null )
118                 {
119                     Document document = converter.convert( record );
120                     document.add(
121                         new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
122
123                     indexWriter.addDocument( document );
124                 }
125             }
126
127             indexWriter.optimize();
128         }
129         catch ( IOException e )
130         {
131             throw new RepositoryIndexException( "Failed to add an index document", e );
132         }
133         finally
134         {
135             closeQuietly( indexWriter );
136             lastUpdatedTime = System.currentTimeMillis();
137         }
138     }
139
140     public static Analyzer getAnalyzer()
141     {
142         return luceneAnalyzer;
143     }
144
145     private static class LuceneAnalyzer
146         extends Analyzer
147     {
148         private static final Analyzer STANDARD = new StandardAnalyzer();
149
150         public TokenStream tokenStream( String field, final Reader reader )
151         {
152             // do not tokenize field called 'element'
153             if ( StandardIndexRecordFields.DEPENDENCIES.equals( field ) )
154             {
155                 return new CharTokenizer( reader )
156                 {
157                     protected boolean isTokenChar( char c )
158                     {
159                         return c != '\n';
160                     }
161                 };
162             }
163             else if ( StandardIndexRecordFields.FILES.equals( field ) )
164             {
165                 return new CharTokenizer( reader )
166                 {
167                     protected boolean isTokenChar( char c )
168                     {
169                         return c != '\n' && c != '/';
170                     }
171                 };
172             }
173             else
174             if ( StandardIndexRecordFields.CLASSES.equals( field ) || MinimalIndexRecordFields.CLASSES.equals( field ) )
175             {
176                 return new CharTokenizer( reader )
177                 {
178                     protected boolean isTokenChar( char c )
179                     {
180                         return c != '\n' && c != '.';
181                     }
182
183                     protected char normalize( char c )
184                     {
185                         return Character.toLowerCase( c );
186                     }
187                 };
188             }
189             else if ( StandardIndexRecordFields.GROUPID.equals( field ) )
190             {
191                 return new CharTokenizer( reader )
192                 {
193                     protected boolean isTokenChar( char c )
194                     {
195                         return c != '.';
196                     }
197
198                     protected char normalize( char c )
199                     {
200                         return Character.toLowerCase( c );
201                     }
202                 };
203             }
204             else if ( StandardIndexRecordFields.VERSION.equals( field ) ||
205                 StandardIndexRecordFields.BASE_VERSION.equals( field ) )
206             {
207                 return new CharTokenizer( reader )
208                 {
209                     protected boolean isTokenChar( char c )
210                     {
211                         return c != '-';
212                     }
213                 };
214             }
215             else if ( StandardIndexRecordFields.FILENAME.equals( field ) ||
216                 MinimalIndexRecordFields.FILENAME.equals( field ) )
217             {
218                 return new CharTokenizer( reader )
219                 {
220                     protected boolean isTokenChar( char c )
221                     {
222                         return c != '-' && c != '.' && c != '/';
223                     }
224                 };
225             }
226             else
227             {
228                 // use standard analyzer
229                 return STANDARD.tokenStream( field, reader );
230             }
231         }
232     }
233
234     public void deleteRecords( Collection records )
235         throws RepositoryIndexException
236     {
237         if ( exists() )
238         {
239             IndexReader indexReader = null;
240             try
241             {
242                 indexReader = IndexReader.open( indexLocation );
243
244                 for ( Iterator i = records.iterator(); i.hasNext(); )
245                 {
246                     RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
247
248                     if ( record != null )
249                     {
250                         Term term = new Term( FLD_PK, record.getPrimaryKey() );
251
252                         indexReader.deleteDocuments( term );
253                     }
254                 }
255             }
256             catch ( IOException e )
257             {
258                 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
259             }
260             finally
261             {
262                 closeQuietly( indexReader );
263             }
264         }
265     }
266
267     public Collection getAllRecords()
268         throws RepositoryIndexSearchException
269     {
270         return search( new LuceneQuery( new MatchAllDocsQuery() ) );
271     }
272
273     public Collection getAllRecordKeys()
274         throws RepositoryIndexException
275     {
276         return getAllFieldValues( FLD_PK );
277     }
278
279     private List getAllFieldValues( String fieldName )
280         throws RepositoryIndexException
281     {
282         List keys = new ArrayList();
283
284         if ( exists() )
285         {
286             IndexReader indexReader = null;
287             TermEnum terms = null;
288             try
289             {
290                 indexReader = IndexReader.open( indexLocation );
291
292                 terms = indexReader.terms( new Term( fieldName, "" ) );
293                 while ( fieldName.equals( terms.term().field() ) )
294                 {
295                     keys.add( terms.term().text() );
296
297                     if ( !terms.next() )
298                     {
299                         break;
300                     }
301                 }
302             }
303             catch ( IOException e )
304             {
305                 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
306             }
307             finally
308             {
309                 closeQuietly( indexReader );
310                 closeQuietly( terms );
311             }
312         }
313         return keys;
314     }
315
316     public void indexArtifacts( List artifacts, RepositoryIndexRecordFactory factory )
317         throws RepositoryIndexException
318     {
319         IndexModifier indexModifier = null;
320         try
321         {
322             indexModifier = new IndexModifier( indexLocation, getAnalyzer(), !exists() );
323
324             for ( Iterator i = artifacts.iterator(); i.hasNext(); )
325             {
326                 Artifact artifact = (Artifact) i.next();
327                 RepositoryIndexRecord record = factory.createRecord( artifact );
328
329                 if ( record != null )
330                 {
331                     Term term = new Term( FLD_PK, record.getPrimaryKey() );
332
333                     indexModifier.deleteDocuments( term );
334
335                     Document document = converter.convert( record );
336                     document.add(
337                         new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
338
339                     indexModifier.addDocument( document );
340                 }
341             }
342             indexModifier.optimize();
343         }
344         catch ( IOException e )
345         {
346             throw new RepositoryIndexException( "Error updating index: " + e.getMessage(), e );
347         }
348         finally
349         {
350             closeQuietly( indexModifier );
351             lastUpdatedTime = System.currentTimeMillis();
352         }
353     }
354     
355     public void indexArtifact( Artifact artifact, RepositoryIndexRecordFactory factory )
356         throws RepositoryIndexException
357     {
358         IndexModifier indexModifier = null;
359         try
360         {
361             indexModifier = new IndexModifier( indexLocation, getAnalyzer(), !exists() );
362
363             RepositoryIndexRecord record = factory.createRecord( artifact );
364
365             if ( record != null )
366             {
367                 Term term = new Term( FLD_PK, record.getPrimaryKey() );
368
369                 indexModifier.deleteDocuments( term );
370
371                 Document document = converter.convert( record );
372                 document.add( new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
373
374                 indexModifier.addDocument( document );
375             }
376             indexModifier.optimize();
377         }
378         catch ( IOException e )
379         {
380             throw new RepositoryIndexException( "Error updating index: " + e.getMessage(), e );
381         }
382         finally
383         {
384             closeQuietly( indexModifier );
385             lastUpdatedTime = System.currentTimeMillis();
386         }
387     }    
388
389     public List getAllGroupIds()
390         throws RepositoryIndexException
391     {
392         return getAllFieldValues( StandardIndexRecordFields.GROUPID_EXACT );
393     }
394
395     public List getArtifactIds( String groupId )
396         throws RepositoryIndexSearchException
397     {
398         return searchField( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
399                             StandardIndexRecordFields.ARTIFACTID );
400     }
401
402     public List getVersions( String groupId, String artifactId )
403         throws RepositoryIndexSearchException
404     {
405         BooleanQuery query = new BooleanQuery();
406         query.add( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
407                    BooleanClause.Occur.MUST );
408         query.add( new TermQuery( new Term( StandardIndexRecordFields.ARTIFACTID_EXACT, artifactId ) ),
409                    BooleanClause.Occur.MUST );
410
411         return searchField( query, StandardIndexRecordFields.VERSION );
412     }
413
414     public long getLastUpdatedTime()
415     {
416         return lastUpdatedTime;
417     }
418
419     private List searchField( org.apache.lucene.search.Query luceneQuery, String fieldName )
420         throws RepositoryIndexSearchException
421     {
422         Set results = new LinkedHashSet();
423
424         IndexSearcher searcher;
425         try
426         {
427             searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
428         }
429         catch ( IOException e )
430         {
431             throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
432         }
433
434         try
435         {
436             Hits hits = searcher.search( luceneQuery );
437             for ( int i = 0; i < hits.length(); i++ )
438             {
439                 Document doc = hits.doc( i );
440
441                 results.add( doc.get( fieldName ) );
442             }
443         }
444         catch ( IOException e )
445         {
446             throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
447         }
448         finally
449         {
450             closeQuietly( searcher );
451         }
452         return new ArrayList( results );
453     }
454
455     public boolean exists()
456         throws RepositoryIndexException
457     {
458         if ( IndexReader.indexExists( indexLocation ) )
459         {
460             return true;
461         }
462         else if ( !indexLocation.exists() )
463         {
464             return false;
465         }
466         else if ( indexLocation.isDirectory() )
467         {
468             if ( indexLocation.listFiles().length > 1 )
469             {
470                 throw new RepositoryIndexException( indexLocation + " is not a valid index directory." );
471             }
472             else
473             {
474                 return false;
475             }
476         }
477         else
478         {
479             throw new RepositoryIndexException( indexLocation + " is not a directory." );
480         }
481     }
482
483     public List search( Query query )
484         throws RepositoryIndexSearchException
485     {
486         LuceneQuery lQuery = (LuceneQuery) query;
487
488         org.apache.lucene.search.Query luceneQuery = lQuery.getLuceneQuery();
489
490         IndexSearcher searcher;
491         try
492         {
493             searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
494         }
495         catch ( IOException e )
496         {
497             throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
498         }
499
500         List records = new ArrayList();
501         try
502         {
503             Hits hits = searcher.search( luceneQuery );
504             for ( int i = 0; i < hits.length(); i++ )
505             {
506                 Document doc = hits.doc( i );
507
508                 records.add( converter.convert( doc ) );
509             }
510         }
511         catch ( IOException e )
512         {
513             throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
514         }
515         catch ( ParseException e )
516         {
517             throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
518         }
519         finally
520         {
521             closeQuietly( searcher );
522         }
523
524         return records;
525     }
526
527     private static void closeQuietly( IndexSearcher searcher )
528     {
529         try
530         {
531             if ( searcher != null )
532             {
533                 searcher.close();
534             }
535         }
536         catch ( IOException e )
537         {
538             // ignore
539         }
540     }
541
542     private static void closeQuietly( TermEnum terms )
543         throws RepositoryIndexException
544     {
545         if ( terms != null )
546         {
547             try
548             {
549                 terms.close();
550             }
551             catch ( IOException e )
552             {
553                 // ignore
554             }
555         }
556     }
557
558     private static void closeQuietly( IndexWriter indexWriter )
559         throws RepositoryIndexException
560     {
561         try
562         {
563             if ( indexWriter != null )
564             {
565                 indexWriter.close();
566             }
567         }
568         catch ( IOException e )
569         {
570             // write should compain if it can't be closed, data probably not persisted
571             throw new RepositoryIndexException( e.getMessage(), e );
572         }
573     }
574
575     private static void closeQuietly( IndexModifier indexModifier )
576     {
577         if ( indexModifier != null )
578         {
579             try
580             {
581                 indexModifier.close();
582             }
583             catch ( IOException e )
584             {
585                 // ignore
586             }
587         }
588     }
589
590     private static void closeQuietly( IndexReader reader )
591     {
592         try
593         {
594             if ( reader != null )
595             {
596                 reader.close();
597             }
598         }
599         catch ( IOException e )
600         {
601             // ignore
602         }
603     }
604 }