]> source.dussan.org Git - archiva.git/commitdiff
Search now has good results. We removed the content indexing and actually included...
authorJames William Dumay <jdumay@apache.org>
Fri, 5 Dec 2008 06:35:48 +0000 (06:35 +0000)
committerJames William Dumay <jdumay@apache.org>
Fri, 5 Dec 2008 06:35:48 +0000 (06:35 +0000)
git-svn-id: https://svn.apache.org/repos/asf/archiva/branches/archiva-search-improvements@723612 13f79535-47bb-0310-9956-ffa450edef68

archiva-modules/archiva-base/archiva-consumers/archiva-lucene-consumers/src/main/java/org/apache/maven/archiva/consumers/lucene/IndexContentConsumer.java
archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentAnalyzer.java
archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentConverter.java
archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentHandlers.java
archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentKeys.java
archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/filecontent/FileContentRecord.java
archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/analyzers/ArtifactIdTokenizer.java [new file with mode: 0644]
archiva-modules/archiva-web/archiva-xmlrpc/archiva-xmlrpc-services/src/test/java/org/apache/archiva/web/xmlrpc/services/SearchServiceImplTest.java

index ea3908772ff23a4eade6b961878612048ae22ae2..2c805f64cbc2da79a06bdfbe06790947bcc0c9b0 100644 (file)
@@ -159,10 +159,8 @@ public class IndexContentConsumer
         FileContentRecord record = new FileContentRecord();
         try
         {
-            File file = new File( repositoryDir, path );
             record.setRepositoryId( this.repository.getId() );
             record.setFilename( path );
-            record.setContents( FileUtils.readFileToString( file, null ) );
 
             // Test for possible artifact reference syntax.
             try
@@ -179,10 +177,6 @@ public class IndexContentConsumer
 
             index.modifyRecord( record );
         }
-        catch ( IOException e )
-        {
-            triggerConsumerError( READ_CONTENT, "Unable to read file contents: " + e.getMessage() );
-        }
         catch ( RepositoryIndexException e )
         {
             triggerConsumerError( INDEX_ERROR, "Unable to index file contents: " + e.getMessage() );
index 855d2259132f6ceefab94f0fd98c4b7374e3e0d1..21518080e880445e74632065206bc43a1fcaec6f 100644 (file)
@@ -23,6 +23,8 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.maven.archiva.indexer.lucene.analyzers.FilenamesTokenizer;
+import org.apache.maven.archiva.indexer.lucene.analyzers.ArtifactIdTokenizer;
+import org.apache.maven.archiva.indexer.lucene.analyzers.GroupIdTokenizer;
 
 import java.io.Reader;
 
@@ -42,6 +44,16 @@ public class FileContentAnalyzer extends Analyzer
             return new FilenamesTokenizer( reader );
         }
 
+        if ( FileContentKeys.ARTIFACTID.equals( field ))
+        {
+            return new ArtifactIdTokenizer(reader);
+        }
+
+        if ( FileContentKeys.GROUPID.equals( field ) )
+        {
+            return new GroupIdTokenizer(reader);
+        }
+
         return STANDARD.tokenStream( field, reader );
     }
 }
index ad191f673ad18f0a5a7dd5d07512495e611d777e..68edf4555ad7775c1149b5d1884541b137706f08 100644 (file)
@@ -37,7 +37,6 @@ import java.text.ParseException;
 public class FileContentConverter
     implements LuceneEntryConverter
 {
-
     public Document convert( LuceneRepositoryContentRecord record )
     {
         if ( !( record instanceof FileContentRecord ) )
@@ -62,9 +61,8 @@ public class FileContentConverter
             doc.addFieldTokenized( ArtifactKeys.TYPE, filecontent.getArtifact().getType() );
             doc.addFieldUntokenized( ArtifactKeys.CLASSIFIER, filecontent.getArtifact().getClassifier() );
         }
-        
+
         doc.addFieldTokenized( FileContentKeys.FILENAME, filecontent.getFilename() );
-        doc.addFieldTokenized( FileContentKeys.CONTENT, filecontent.getContents() );
 
         return doc.getDocument();
     }
@@ -91,7 +89,6 @@ public class FileContentConverter
 
         // Filecontent Specifics
         record.setFilename( document.get( FileContentKeys.FILENAME ) );
-        record.setContents( document.get( FileContentKeys.CONTENT ) );
 
         return record;
     }
index 70a95c9f469ef6099485b88806829a8f47a86b00..f3058dda09ba6d2f67cccf3a5d11a0f3b45f4a81 100644 (file)
@@ -43,8 +43,17 @@ public class FileContentHandlers
     {
         analyzer = new FileContentAnalyzer();
         converter = new FileContentConverter();
-        queryParser = new MultiFieldQueryParser( new String[] { FileContentKeys.FILENAME, FileContentKeys.CONTENT },
-                                                 analyzer );
+        queryParser = new MultiFieldQueryParser( new String[] {
+                                                FileContentKeys.FILENAME,
+                                                FileContentKeys.ARTIFACTID,
+                                                FileContentKeys.GROUPID,
+                                                FileContentKeys.ARTIFACTID_EXACT,
+                                                FileContentKeys.GROUPID_EXACT,
+                                                FileContentKeys.VERSION,
+                                                FileContentKeys.VERSION_EXACT},
+                                                analyzer );
+        //We prefer the narrowing approach to search results.
+        queryParser.setDefaultOperator(MultiFieldQueryParser.Operator.AND);
     }
 
     public String getId()
index 1b9e6260c015dd19efcca96250235411d929ac4d..343f359a3f0bc1db6369c0cb01129ec28da21659 100644 (file)
@@ -32,6 +32,4 @@ public class FileContentKeys
     public static final String ID = "filecontent";
 
     public static final String FILENAME = "filename";
-
-    public static final String CONTENT = "content";
 }
index 991f7b0a21be385e6c5355a43c63b7bc1eaea71d..0a1221e1e702c1a76c770ed7118f99f5ed5d3138 100644 (file)
@@ -39,8 +39,6 @@ public class FileContentRecord
      */
     private ArchivaArtifact artifact;
 
-    private String contents;
-
     public String getRepositoryId()
     {
         return repositoryId;
@@ -51,16 +49,6 @@ public class FileContentRecord
         this.repositoryId = repositoryId;
     }
 
-    public String getContents()
-    {
-        return contents;
-    }
-
-    public void setContents( String contents )
-    {
-        this.contents = contents;
-    }
-
     public String getPrimaryKey()
     {
         return repositoryId + ":" + filename;
diff --git a/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/analyzers/ArtifactIdTokenizer.java b/archiva-modules/archiva-base/archiva-indexer/src/main/java/org/apache/maven/archiva/indexer/lucene/analyzers/ArtifactIdTokenizer.java
new file mode 100644 (file)
index 0000000..2e99c26
--- /dev/null
@@ -0,0 +1,45 @@
+package org.apache.maven.archiva.indexer.lucene.analyzers;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.Reader;
+import org.apache.lucene.analysis.CharTokenizer;
+
+/**
+ * Lucene Tokenizer for {@link ArtifactKeys#ARTIFACTID} fields.
+ */
+public class ArtifactIdTokenizer extends CharTokenizer
+{
+    public ArtifactIdTokenizer( Reader reader )
+    {
+        super( reader );
+    }
+
+    /**
+     * Break on "-" for "atlassian-plugins-core"
+     * @param c
+     * @return
+     */
+    @Override
+    protected boolean isTokenChar(char c)
+    {
+        return (c != '-');
+    }
+}
index 2deb6f1c2e4ef9a4be2c9f5be9c0b94bd3a888cb..2db1b931ce709aff092e16664e655718d46efcaa 100644 (file)
@@ -152,7 +152,6 @@ public class SearchServiceImplTest
         FileContentRecord record = new FileContentRecord();
         record.setRepositoryId( "repo1.mirror" );
         record.setArtifact( artifact );
-        record.setContents( "org.apache.archiva:archiva-test:1.0:jar org.apache.archiva.test.MyClassName" );
         record.setFilename( "archiva-test-1.0.jar" );
                 
         results.addHit( record );
@@ -198,7 +197,6 @@ public class SearchServiceImplTest
         FileContentRecord record = new FileContentRecord();
         record.setRepositoryId( "repo1.mirror" );
         record.setArtifact( artifact );
-        record.setContents( "org.apache.archiva:archiva-test:1.0:jar" );
         record.setFilename( "archiva-test-1.0.jar" );
                 
         results.addHit( record );