]> source.dussan.org Git - archiva.git/blob
ee5062542f6dedb2b4f8178c084f10455a3cca40
[archiva.git] /
1 package org.apache.maven.archiva.indexer.lucene;
2
3 /*
4  * Copyright 2005-2006 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *      http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18
19 import org.apache.lucene.analysis.Analyzer;
20 import org.apache.lucene.analysis.CharTokenizer;
21 import org.apache.lucene.analysis.TokenStream;
22 import org.apache.lucene.analysis.standard.StandardAnalyzer;
23 import org.apache.lucene.document.Document;
24 import org.apache.lucene.document.Field;
25 import org.apache.lucene.index.IndexModifier;
26 import org.apache.lucene.index.IndexReader;
27 import org.apache.lucene.index.IndexWriter;
28 import org.apache.lucene.index.Term;
29 import org.apache.lucene.index.TermEnum;
30 import org.apache.lucene.search.BooleanClause;
31 import org.apache.lucene.search.BooleanQuery;
32 import org.apache.lucene.search.Hits;
33 import org.apache.lucene.search.IndexSearcher;
34 import org.apache.lucene.search.MatchAllDocsQuery;
35 import org.apache.lucene.search.TermQuery;
36 import org.apache.maven.archiva.indexer.RepositoryArtifactIndex;
37 import org.apache.maven.archiva.indexer.RepositoryIndexException;
38 import org.apache.maven.archiva.indexer.RepositoryIndexSearchException;
39 import org.apache.maven.archiva.indexer.query.Query;
40 import org.apache.maven.archiva.indexer.record.MinimalIndexRecordFields;
41 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecord;
42 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecordFactory;
43 import org.apache.maven.archiva.indexer.record.StandardIndexRecordFields;
44 import org.apache.maven.artifact.Artifact;
45 import org.apache.maven.project.MavenProjectBuilder;
46
47 import java.io.File;
48 import java.io.IOException;
49 import java.io.Reader;
50 import java.text.ParseException;
51 import java.util.ArrayList;
52 import java.util.Collection;
53 import java.util.Iterator;
54 import java.util.LinkedHashSet;
55 import java.util.List;
56 import java.util.Map;
57 import java.util.Set;
58
59 /**
60  * Lucene implementation of a repository index.
61  *
62  * @author <a href="mailto:brett@apache.org">Brett Porter</a>
63  */
64 public class LuceneRepositoryArtifactIndex
65     implements RepositoryArtifactIndex
66 {
67     /**
68      * The location of the index on the file system.
69      */
70     private File indexLocation;
71
72     /**
73      * Convert repository records to Lucene documents.
74      */
75     private LuceneIndexRecordConverter converter;
76
77     private static final String FLD_PK = "pk";
78
79     private static Analyzer luceneAnalyzer = new LuceneAnalyzer();
80
81     private MavenProjectBuilder projectBuilder;
82
83     private static long lastUpdatedTime = 0;
84
85     public LuceneRepositoryArtifactIndex( File indexPath, LuceneIndexRecordConverter converter )
86     {
87         this.indexLocation = indexPath;
88         this.converter = converter;
89     }
90
91     public LuceneRepositoryArtifactIndex( File indexLocation, LuceneIndexRecordConverter converter,
92                                           MavenProjectBuilder projectBuilder )
93     {
94         this.indexLocation = indexLocation;
95         this.converter = converter;
96         this.projectBuilder = projectBuilder;
97     }
98
99     public void indexRecords( Collection records )
100         throws RepositoryIndexException
101     {
102         deleteRecords( records );
103
104         addRecords( records );
105     }
106
107     private void addRecords( Collection records )
108         throws RepositoryIndexException
109     {
110         IndexWriter indexWriter;
111         try
112         {
113             indexWriter = new IndexWriter( indexLocation, getAnalyzer(), !exists() );
114         }
115         catch ( IOException e )
116         {
117             throw new RepositoryIndexException( "Unable to open index", e );
118         }
119
120         try
121         {
122             for ( Iterator i = records.iterator(); i.hasNext(); )
123             {
124                 RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
125
126                 if ( record != null )
127                 {
128                     Document document = converter.convert( record );
129                     document.add(
130                         new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
131
132                     indexWriter.addDocument( document );
133                 }
134             }
135
136             indexWriter.optimize();
137         }
138         catch ( IOException e )
139         {
140             throw new RepositoryIndexException( "Failed to add an index document", e );
141         }
142         finally
143         {
144             closeQuietly( indexWriter );
145             lastUpdatedTime = System.currentTimeMillis();
146         }
147     }
148
149     public static Analyzer getAnalyzer()
150     {
151         return luceneAnalyzer;
152     }
153
154     private static class LuceneAnalyzer
155         extends Analyzer
156     {
157         private static final Analyzer STANDARD = new StandardAnalyzer();
158
159         public TokenStream tokenStream( String field, final Reader reader )
160         {
161             // do not tokenize field called 'element'
162             if ( StandardIndexRecordFields.DEPENDENCIES.equals( field ) )
163             {
164                 return new CharTokenizer( reader )
165                 {
166                     protected boolean isTokenChar( char c )
167                     {
168                         return c != '\n';
169                     }
170                 };
171             }
172             else if ( StandardIndexRecordFields.FILES.equals( field ) )
173             {
174                 return new CharTokenizer( reader )
175                 {
176                     protected boolean isTokenChar( char c )
177                     {
178                         return c != '\n' && c != '/';
179                     }
180                 };
181             }
182             else
183             if ( StandardIndexRecordFields.CLASSES.equals( field ) || MinimalIndexRecordFields.CLASSES.equals( field ) )
184             {
185                 return new CharTokenizer( reader )
186                 {
187                     protected boolean isTokenChar( char c )
188                     {
189                         return c != '\n' && c != '.';
190                     }
191
192                     protected char normalize( char c )
193                     {
194                         return Character.toLowerCase( c );
195                     }
196                 };
197             }
198             else if ( StandardIndexRecordFields.GROUPID.equals( field ) )
199             {
200                 return new CharTokenizer( reader )
201                 {
202                     protected boolean isTokenChar( char c )
203                     {
204                         return c != '.';
205                     }
206
207                     protected char normalize( char c )
208                     {
209                         return Character.toLowerCase( c );
210                     }
211                 };
212             }
213             else if ( StandardIndexRecordFields.VERSION.equals( field ) ||
214                 StandardIndexRecordFields.BASE_VERSION.equals( field ) )
215             {
216                 return new CharTokenizer( reader )
217                 {
218                     protected boolean isTokenChar( char c )
219                     {
220                         return c != '-';
221                     }
222                 };
223             }
224             else if ( StandardIndexRecordFields.FILENAME.equals( field ) ||
225                 MinimalIndexRecordFields.FILENAME.equals( field ) )
226             {
227                 return new CharTokenizer( reader )
228                 {
229                     protected boolean isTokenChar( char c )
230                     {
231                         return c != '-' && c != '.' && c != '/';
232                     }
233                 };
234             }
235             else
236             {
237                 // use standard analyzer
238                 return STANDARD.tokenStream( field, reader );
239             }
240         }
241     }
242
243     public void deleteRecords( Collection records )
244         throws RepositoryIndexException
245     {
246         if ( exists() )
247         {
248             IndexReader indexReader = null;
249             try
250             {
251                 indexReader = IndexReader.open( indexLocation );
252
253                 for ( Iterator i = records.iterator(); i.hasNext(); )
254                 {
255                     RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
256
257                     if ( record != null )
258                     {
259                         Term term = new Term( FLD_PK, record.getPrimaryKey() );
260
261                         indexReader.deleteDocuments( term );
262                     }
263                 }
264             }
265             catch ( IOException e )
266             {
267                 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
268             }
269             finally
270             {
271                 closeQuietly( indexReader );
272             }
273         }
274     }
275
276     public Collection getAllRecords()
277         throws RepositoryIndexSearchException
278     {
279         return search( new LuceneQuery( new MatchAllDocsQuery() ) );
280     }
281
282     public Collection getAllRecordKeys()
283         throws RepositoryIndexException
284     {
285         return getAllFieldValues( FLD_PK );
286     }
287
288     private List getAllFieldValues( String fieldName )
289         throws RepositoryIndexException
290     {
291         List keys = new ArrayList();
292
293         if ( exists() )
294         {
295             IndexReader indexReader = null;
296             TermEnum terms = null;
297             try
298             {
299                 indexReader = IndexReader.open( indexLocation );
300
301                 terms = indexReader.terms( new Term( fieldName, "" ) );
302                 while ( fieldName.equals( terms.term().field() ) )
303                 {
304                     keys.add( terms.term().text() );
305
306                     if ( !terms.next() )
307                     {
308                         break;
309                     }
310                 }
311             }
312             catch ( IOException e )
313             {
314                 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
315             }
316             finally
317             {
318                 closeQuietly( indexReader );
319                 closeQuietly( terms );
320             }
321         }
322         return keys;
323     }
324
325     public void indexArtifacts( List artifacts, RepositoryIndexRecordFactory factory )
326         throws RepositoryIndexException
327     {
328         IndexModifier indexModifier = null;
329         try
330         {
331             indexModifier = new IndexModifier( indexLocation, getAnalyzer(), !exists() );
332
333             int count = 0;
334             for ( Iterator i = artifacts.iterator(); i.hasNext(); count++ )
335             {
336                 Artifact artifact = (Artifact) i.next();
337                 RepositoryIndexRecord record = factory.createRecord( artifact );
338
339                 if ( record != null )
340                 {
341                     Term term = new Term( FLD_PK, record.getPrimaryKey() );
342
343                     indexModifier.deleteDocuments( term );
344
345                     Document document = converter.convert( record );
346                     document.add(
347                         new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
348
349                     indexModifier.addDocument( document );
350                 }
351
352                 if ( count % 100 == 0 )
353                 {
354                     // MNG-142 - the project builder retains a lot of objects in its inflexible cache. This is a hack
355                     // around that. TODO: remove when it is configurable
356                     flushProjectBuilderCacheHack();
357                 }
358             }
359             indexModifier.optimize();
360         }
361         catch ( IOException e )
362         {
363             throw new RepositoryIndexException( "Error updating index: " + e.getMessage(), e );
364         }
365         finally
366         {
367             closeQuietly( indexModifier );
368             lastUpdatedTime = System.currentTimeMillis();
369         }
370     }
371
372     public List getAllGroupIds()
373         throws RepositoryIndexException
374     {
375         return getAllFieldValues( StandardIndexRecordFields.GROUPID_EXACT );
376     }
377
378     public List getArtifactIds( String groupId )
379         throws RepositoryIndexSearchException
380     {
381         return searchField( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
382                             StandardIndexRecordFields.ARTIFACTID );
383     }
384
385     public List getVersions( String groupId, String artifactId )
386         throws RepositoryIndexSearchException
387     {
388         BooleanQuery query = new BooleanQuery();
389         query.add( new TermQuery( new Term( StandardIndexRecordFields.GROUPID_EXACT, groupId ) ),
390                    BooleanClause.Occur.MUST );
391         query.add( new TermQuery( new Term( StandardIndexRecordFields.ARTIFACTID_EXACT, artifactId ) ),
392                    BooleanClause.Occur.MUST );
393
394         return searchField( query, StandardIndexRecordFields.VERSION );
395     }
396
397     public long getLastUpdatedTime()
398     {
399         return lastUpdatedTime;
400     }
401
402     private List searchField( org.apache.lucene.search.Query luceneQuery, String fieldName )
403         throws RepositoryIndexSearchException
404     {
405         Set results = new LinkedHashSet();
406
407         IndexSearcher searcher;
408         try
409         {
410             searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
411         }
412         catch ( IOException e )
413         {
414             throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
415         }
416
417         try
418         {
419             Hits hits = searcher.search( luceneQuery );
420             for ( int i = 0; i < hits.length(); i++ )
421             {
422                 Document doc = hits.doc( i );
423
424                 results.add( doc.get( fieldName ) );
425             }
426         }
427         catch ( IOException e )
428         {
429             throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
430         }
431         finally
432         {
433             closeQuietly( searcher );
434         }
435         return new ArrayList( results );
436     }
437
438     private void flushProjectBuilderCacheHack()
439     {
440         try
441         {
442             if ( projectBuilder != null )
443             {
444                 java.lang.reflect.Field f = projectBuilder.getClass().getDeclaredField( "rawProjectCache" );
445                 f.setAccessible( true );
446                 Map cache = (Map) f.get( projectBuilder );
447                 cache.clear();
448
449                 f = projectBuilder.getClass().getDeclaredField( "processedProjectCache" );
450                 f.setAccessible( true );
451                 cache = (Map) f.get( projectBuilder );
452                 cache.clear();
453             }
454         }
455         catch ( NoSuchFieldException e )
456         {
457             throw new RuntimeException( e );
458         }
459         catch ( IllegalAccessException e )
460         {
461             throw new RuntimeException( e );
462         }
463     }
464
465     public boolean exists()
466         throws RepositoryIndexException
467     {
468         if ( IndexReader.indexExists( indexLocation ) )
469         {
470             return true;
471         }
472         else if ( !indexLocation.exists() )
473         {
474             return false;
475         }
476         else if ( indexLocation.isDirectory() )
477         {
478             if ( indexLocation.listFiles().length > 1 )
479             {
480                 throw new RepositoryIndexException( indexLocation + " is not a valid index directory." );
481             }
482             else
483             {
484                 return false;
485             }
486         }
487         else
488         {
489             throw new RepositoryIndexException( indexLocation + " is not a directory." );
490         }
491     }
492
493     public List search( Query query )
494         throws RepositoryIndexSearchException
495     {
496         LuceneQuery lQuery = (LuceneQuery) query;
497
498         org.apache.lucene.search.Query luceneQuery = lQuery.getLuceneQuery();
499
500         IndexSearcher searcher;
501         try
502         {
503             searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
504         }
505         catch ( IOException e )
506         {
507             throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
508         }
509
510         List records = new ArrayList();
511         try
512         {
513             Hits hits = searcher.search( luceneQuery );
514             for ( int i = 0; i < hits.length(); i++ )
515             {
516                 Document doc = hits.doc( i );
517
518                 records.add( converter.convert( doc ) );
519             }
520         }
521         catch ( IOException e )
522         {
523             throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
524         }
525         catch ( ParseException e )
526         {
527             throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
528         }
529         finally
530         {
531             closeQuietly( searcher );
532         }
533
534         return records;
535     }
536
537     private static void closeQuietly( IndexSearcher searcher )
538     {
539         try
540         {
541             if ( searcher != null )
542             {
543                 searcher.close();
544             }
545         }
546         catch ( IOException e )
547         {
548             // ignore
549         }
550     }
551
552     private static void closeQuietly( TermEnum terms )
553         throws RepositoryIndexException
554     {
555         if ( terms != null )
556         {
557             try
558             {
559                 terms.close();
560             }
561             catch ( IOException e )
562             {
563                 // ignore
564             }
565         }
566     }
567
568     private static void closeQuietly( IndexWriter indexWriter )
569         throws RepositoryIndexException
570     {
571         try
572         {
573             if ( indexWriter != null )
574             {
575                 indexWriter.close();
576             }
577         }
578         catch ( IOException e )
579         {
580             // write should compain if it can't be closed, data probably not persisted
581             throw new RepositoryIndexException( e.getMessage(), e );
582         }
583     }
584
585     private static void closeQuietly( IndexModifier indexModifier )
586     {
587         if ( indexModifier != null )
588         {
589             try
590             {
591                 indexModifier.close();
592             }
593             catch ( IOException e )
594             {
595                 // ignore
596             }
597         }
598     }
599
600     private static void closeQuietly( IndexReader reader )
601     {
602         try
603         {
604             if ( reader != null )
605             {
606                 reader.close();
607             }
608         }
609         catch ( IOException e )
610         {
611             // ignore
612         }
613     }
614 }