]> source.dussan.org Git - archiva.git/blob
dd6e81764b34de0e2d72822298c0fde56e6fcedc
[archiva.git] /
1 package org.apache.maven.archiva.indexer.lucene;
2
3 /*
4  * Copyright 2005-2006 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *      http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18
19 import org.apache.lucene.analysis.Analyzer;
20 import org.apache.lucene.analysis.CharTokenizer;
21 import org.apache.lucene.analysis.TokenStream;
22 import org.apache.lucene.analysis.standard.StandardAnalyzer;
23 import org.apache.lucene.document.Document;
24 import org.apache.lucene.document.Field;
25 import org.apache.lucene.index.IndexModifier;
26 import org.apache.lucene.index.IndexReader;
27 import org.apache.lucene.index.IndexWriter;
28 import org.apache.lucene.index.Term;
29 import org.apache.lucene.index.TermEnum;
30 import org.apache.lucene.search.Hits;
31 import org.apache.lucene.search.IndexSearcher;
32 import org.apache.lucene.search.MatchAllDocsQuery;
33 import org.apache.maven.archiva.indexer.RepositoryArtifactIndex;
34 import org.apache.maven.archiva.indexer.RepositoryIndexException;
35 import org.apache.maven.archiva.indexer.RepositoryIndexSearchException;
36 import org.apache.maven.archiva.indexer.query.Query;
37 import org.apache.maven.archiva.indexer.record.MinimalIndexRecordFields;
38 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecord;
39 import org.apache.maven.archiva.indexer.record.RepositoryIndexRecordFactory;
40 import org.apache.maven.archiva.indexer.record.StandardIndexRecordFields;
41 import org.apache.maven.artifact.Artifact;
42 import org.apache.maven.project.MavenProjectBuilder;
43
44 import java.io.File;
45 import java.io.IOException;
46 import java.io.Reader;
47 import java.text.ParseException;
48 import java.util.ArrayList;
49 import java.util.Collection;
50 import java.util.HashSet;
51 import java.util.Iterator;
52 import java.util.List;
53 import java.util.Map;
54 import java.util.Set;
55
56 /**
57  * Lucene implementation of a repository index.
58  *
59  * @author <a href="mailto:brett@apache.org">Brett Porter</a>
60  */
61 public class LuceneRepositoryArtifactIndex
62     implements RepositoryArtifactIndex
63 {
64     /**
65      * The location of the index on the file system.
66      */
67     private File indexLocation;
68
69     /**
70      * Convert repository records to Lucene documents.
71      */
72     private LuceneIndexRecordConverter converter;
73
74     private static final String FLD_PK = "pk";
75
76     private static Analyzer luceneAnalyzer = new LuceneAnalyzer();
77
78     private MavenProjectBuilder projectBuilder;
79
80     public LuceneRepositoryArtifactIndex( File indexPath, LuceneIndexRecordConverter converter )
81     {
82         this.indexLocation = indexPath;
83         this.converter = converter;
84     }
85
86     public LuceneRepositoryArtifactIndex( File indexLocation, LuceneIndexRecordConverter converter,
87                                           MavenProjectBuilder projectBuilder )
88     {
89         this.indexLocation = indexLocation;
90         this.converter = converter;
91         this.projectBuilder = projectBuilder;
92     }
93
94     public void indexRecords( Collection records )
95         throws RepositoryIndexException
96     {
97         deleteRecords( records );
98
99         addRecords( records );
100     }
101
102     private void addRecords( Collection records )
103         throws RepositoryIndexException
104     {
105         IndexWriter indexWriter;
106         try
107         {
108             indexWriter = new IndexWriter( indexLocation, getAnalyzer(), !exists() );
109         }
110         catch ( IOException e )
111         {
112             throw new RepositoryIndexException( "Unable to open index", e );
113         }
114
115         try
116         {
117             for ( Iterator i = records.iterator(); i.hasNext(); )
118             {
119                 RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
120
121                 if ( record != null )
122                 {
123                     Document document = converter.convert( record );
124                     document.add(
125                         new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
126
127                     indexWriter.addDocument( document );
128                 }
129             }
130
131             indexWriter.optimize();
132         }
133         catch ( IOException e )
134         {
135             throw new RepositoryIndexException( "Failed to add an index document", e );
136         }
137         finally
138         {
139             closeQuietly( indexWriter );
140         }
141     }
142
143     public static Analyzer getAnalyzer()
144     {
145         return luceneAnalyzer;
146     }
147
148     private static class LuceneAnalyzer
149         extends Analyzer
150     {
151         private static final Analyzer STANDARD = new StandardAnalyzer();
152
153         public TokenStream tokenStream( String field, final Reader reader )
154         {
155             // do not tokenize field called 'element'
156             if ( StandardIndexRecordFields.DEPENDENCIES.equals( field ) )
157             {
158                 return new CharTokenizer( reader )
159                 {
160                     protected boolean isTokenChar( char c )
161                     {
162                         return c != '\n';
163                     }
164                 };
165             }
166             else if ( StandardIndexRecordFields.FILES.equals( field ) )
167             {
168                 return new CharTokenizer( reader )
169                 {
170                     protected boolean isTokenChar( char c )
171                     {
172                         return c != '\n' && c != '/';
173                     }
174                 };
175             }
176             else
177             if ( StandardIndexRecordFields.CLASSES.equals( field ) || MinimalIndexRecordFields.CLASSES.equals( field ) )
178             {
179                 return new CharTokenizer( reader )
180                 {
181                     protected boolean isTokenChar( char c )
182                     {
183                         return c != '\n' && c != '.';
184                     }
185
186                     protected char normalize( char c )
187                     {
188                         return Character.toLowerCase( c );
189                     }
190                 };
191             }
192             else if ( StandardIndexRecordFields.GROUPID.equals( field ) )
193             {
194                 return new CharTokenizer( reader )
195                 {
196                     protected boolean isTokenChar( char c )
197                     {
198                         return c != '.';
199                     }
200
201                     protected char normalize( char c )
202                     {
203                         return Character.toLowerCase( c );
204                     }
205                 };
206             }
207             else if ( StandardIndexRecordFields.VERSION.equals( field ) ||
208                 StandardIndexRecordFields.BASE_VERSION.equals( field ) )
209             {
210                 return new CharTokenizer( reader )
211                 {
212                     protected boolean isTokenChar( char c )
213                     {
214                         return c != '-';
215                     }
216                 };
217             }
218             else if ( StandardIndexRecordFields.FILENAME.equals( field ) ||
219                 MinimalIndexRecordFields.FILENAME.equals( field ) )
220             {
221                 return new CharTokenizer( reader )
222                 {
223                     protected boolean isTokenChar( char c )
224                     {
225                         return c != '-' && c != '.' && c != '/';
226                     }
227                 };
228             }
229             else
230             {
231                 // use standard analyzer
232                 return STANDARD.tokenStream( field, reader );
233             }
234         }
235     }
236
237     public void deleteRecords( Collection records )
238         throws RepositoryIndexException
239     {
240         if ( exists() )
241         {
242             IndexReader indexReader = null;
243             try
244             {
245                 indexReader = IndexReader.open( indexLocation );
246
247                 for ( Iterator i = records.iterator(); i.hasNext(); )
248                 {
249                     RepositoryIndexRecord record = (RepositoryIndexRecord) i.next();
250
251                     if ( record != null )
252                     {
253                         Term term = new Term( FLD_PK, record.getPrimaryKey() );
254
255                         indexReader.deleteDocuments( term );
256                     }
257                 }
258             }
259             catch ( IOException e )
260             {
261                 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
262             }
263             finally
264             {
265                 closeQuietly( indexReader );
266             }
267         }
268     }
269
270     public Collection getAllRecords()
271         throws RepositoryIndexSearchException
272     {
273         return search( new LuceneQuery( new MatchAllDocsQuery() ) );
274     }
275
276     public Collection getAllRecordKeys()
277         throws RepositoryIndexException
278     {
279         Set keys = new HashSet();
280
281         if ( exists() )
282         {
283             IndexReader indexReader = null;
284             TermEnum terms = null;
285             try
286             {
287                 indexReader = IndexReader.open( indexLocation );
288
289                 terms = indexReader.terms( new Term( FLD_PK, "" ) );
290                 while ( FLD_PK.equals( terms.term().field() ) )
291                 {
292                     keys.add( terms.term().text() );
293
294                     if ( !terms.next() )
295                     {
296                         break;
297                     }
298                 }
299             }
300             catch ( IOException e )
301             {
302                 throw new RepositoryIndexException( "Error deleting document: " + e.getMessage(), e );
303             }
304             finally
305             {
306                 closeQuietly( indexReader );
307                 closeQuietly( terms );
308             }
309         }
310         return keys;
311     }
312
313     public void indexArtifacts( List artifacts, RepositoryIndexRecordFactory factory )
314         throws RepositoryIndexException
315     {
316         IndexModifier indexModifier = null;
317         try
318         {
319             indexModifier = new IndexModifier( indexLocation, getAnalyzer(), !exists() );
320
321             int count = 0;
322             for ( Iterator i = artifacts.iterator(); i.hasNext(); count++ )
323             {
324                 Artifact artifact = (Artifact) i.next();
325                 RepositoryIndexRecord record = factory.createRecord( artifact );
326
327                 if ( record != null )
328                 {
329                     Term term = new Term( FLD_PK, record.getPrimaryKey() );
330
331                     indexModifier.deleteDocuments( term );
332
333                     Document document = converter.convert( record );
334                     document.add(
335                         new Field( FLD_PK, record.getPrimaryKey(), Field.Store.NO, Field.Index.UN_TOKENIZED ) );
336
337                     indexModifier.addDocument( document );
338                 }
339
340                 if ( count % 100 == 0 )
341                 {
342                     // MNG-142 - the project builder retains a lot of objects in its inflexible cache. This is a hack
343                     // around that. TODO: remove when it is configurable
344                     flushProjectBuilderCacheHack();
345                 }
346             }
347             indexModifier.optimize();
348         }
349         catch ( IOException e )
350         {
351             throw new RepositoryIndexException( "Error updating index: " + e.getMessage(), e );
352         }
353         finally
354         {
355             closeQuietly( indexModifier );
356         }
357     }
358
359     private void flushProjectBuilderCacheHack()
360     {
361         try
362         {
363             if ( projectBuilder != null )
364             {
365                 java.lang.reflect.Field f = projectBuilder.getClass().getDeclaredField( "rawProjectCache" );
366                 f.setAccessible( true );
367                 Map cache = (Map) f.get( projectBuilder );
368                 cache.clear();
369
370                 f = projectBuilder.getClass().getDeclaredField( "processedProjectCache" );
371                 f.setAccessible( true );
372                 cache = (Map) f.get( projectBuilder );
373                 cache.clear();
374             }
375         }
376         catch ( NoSuchFieldException e )
377         {
378             throw new RuntimeException( e );
379         }
380         catch ( IllegalAccessException e )
381         {
382             throw new RuntimeException( e );
383         }
384     }
385
386     public boolean exists()
387         throws RepositoryIndexException
388     {
389         if ( IndexReader.indexExists( indexLocation ) )
390         {
391             return true;
392         }
393         else if ( !indexLocation.exists() )
394         {
395             return false;
396         }
397         else if ( indexLocation.isDirectory() )
398         {
399             if ( indexLocation.listFiles().length > 1 )
400             {
401                 throw new RepositoryIndexException( indexLocation + " is not a valid index directory." );
402             }
403             else
404             {
405                 return false;
406             }
407         }
408         else
409         {
410             throw new RepositoryIndexException( indexLocation + " is not a directory." );
411         }
412     }
413
414     public List search( Query query )
415         throws RepositoryIndexSearchException
416     {
417         LuceneQuery lQuery = (LuceneQuery) query;
418
419         org.apache.lucene.search.Query luceneQuery = lQuery.getLuceneQuery();
420
421         IndexSearcher searcher;
422         try
423         {
424             searcher = new IndexSearcher( indexLocation.getAbsolutePath() );
425         }
426         catch ( IOException e )
427         {
428             throw new RepositoryIndexSearchException( "Unable to open index: " + e.getMessage(), e );
429         }
430
431         List records = new ArrayList();
432         try
433         {
434             Hits hits = searcher.search( luceneQuery );
435             for ( int i = 0; i < hits.length(); i++ )
436             {
437                 Document doc = hits.doc( i );
438
439                 records.add( converter.convert( doc ) );
440             }
441         }
442         catch ( IOException e )
443         {
444             throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
445         }
446         catch ( ParseException e )
447         {
448             throw new RepositoryIndexSearchException( "Unable to search index: " + e.getMessage(), e );
449         }
450         finally
451         {
452             closeQuietly( searcher );
453         }
454
455         return records;
456     }
457
458     private static void closeQuietly( IndexSearcher searcher )
459     {
460         try
461         {
462             if ( searcher != null )
463             {
464                 searcher.close();
465             }
466         }
467         catch ( IOException e )
468         {
469             // ignore
470         }
471     }
472
473     private static void closeQuietly( TermEnum terms )
474         throws RepositoryIndexException
475     {
476         if ( terms != null )
477         {
478             try
479             {
480                 terms.close();
481             }
482             catch ( IOException e )
483             {
484                 // ignore
485             }
486         }
487     }
488
489     private static void closeQuietly( IndexWriter indexWriter )
490         throws RepositoryIndexException
491     {
492         try
493         {
494             if ( indexWriter != null )
495             {
496                 indexWriter.close();
497             }
498         }
499         catch ( IOException e )
500         {
501             // write should compain if it can't be closed, data probably not persisted
502             throw new RepositoryIndexException( e.getMessage(), e );
503         }
504     }
505
506     private static void closeQuietly( IndexModifier indexModifier )
507     {
508         if ( indexModifier != null )
509         {
510             try
511             {
512                 indexModifier.close();
513             }
514             catch ( IOException e )
515             {
516                 // ignore
517             }
518         }
519     }
520
521     private static void closeQuietly( IndexReader reader )
522     {
523         try
524         {
525             if ( reader != null )
526             {
527                 reader.close();
528             }
529         }
530         catch ( IOException e )
531         {
532             // ignore
533         }
534     }
535 }