]> source.dussan.org Git - sonarqube.git/commitdiff
DAOv.2 - Updated analyzer & Fixed tests
authorStephane Gamard <stephane.gamard@searchbox.com>
Wed, 28 May 2014 21:45:38 +0000 (23:45 +0200)
committerStephane Gamard <stephane.gamard@searchbox.com>
Wed, 28 May 2014 22:18:09 +0000 (00:18 +0200)
sonar-server/src/main/java/org/sonar/server/search/ESNode.java
sonar-server/src/test/java/org/sonar/server/search/ESNodeTest.java

index dd63920e9b0f6e9bc0d218ea2e4f5cff4f464a19..389772e1f04174dfb05b5ade57dae3efa06c8fe2 100644 (file)
@@ -29,7 +29,6 @@ import org.elasticsearch.common.logging.ESLoggerFactory;
 import org.elasticsearch.common.logging.slf4j.Slf4jESLoggerFactory;
 import org.elasticsearch.common.network.NetworkUtils;
 import org.elasticsearch.common.settings.ImmutableSettings;
-import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.node.Node;
 import org.elasticsearch.node.NodeBuilder;
 import org.picocontainer.Startable;
@@ -107,7 +106,7 @@ public class ESNode implements Startable {
     if (
       node.client().admin().cluster().prepareHealth()
         .setWaitForYellowStatus()
-        .get(TimeValue.timeValueMillis(3000))
+        .get()
         .getStatus() == ClusterHealthStatus.RED) {
       throw new IllegalStateException(
         String.format("Elasticsearch index is corrupt, please delete directory '%s/%s' and relaunch the SonarQube server.", fileSystem.getHomeDir().getAbsolutePath(), DATA_DIR));
@@ -163,19 +162,52 @@ public class ESNode implements Startable {
     esSettings
       .put("index.mapper.dynamic", false)
 
+        // Sortable text analyzer
       .put("index.analysis.analyzer.sortable.type", "custom")
       .put("index.analysis.analyzer.sortable.tokenizer", "keyword")
       .putArray("index.analysis.analyzer.sortable.filter", "trim", "lowercase", "truncate")
 
-      .put("index.analysis.analyzer.string_gram.type", "custom")
-      .put("index.analysis.analyzer.string_gram.tokenizer", "whitespace")
-      .putArray("index.analysis.analyzer.string_gram.filter", "lowercase", "code_gram")
-
-      .put("index.analysis.filter.code_gram.type", "edgeNGram")
-      .put("index.analysis.filter.code_gram.min_gram", 2)
-      .put("index.analysis.filter.code_gram.max_gram", 15)
-      .putArray("index.analysis.filter.code_gram.token_chars", "letter", "digit", "punctuation", "symbol")
-
+        // Edge NGram index-analyzer
+      .put("index.analysis.analyzer.index_grams.type", "custom")
+      .put("index.analysis.analyzer.index_grams.tokenizer", "whitespace")
+      .putArray("index.analysis.analyzer.index_grams.filter", "trim", "lowercase", "gram_filter")
+
+        // Edge NGram search-analyzer
+      .put("index.analysis.analyzer.search_grams.type", "custom")
+      .put("index.analysis.analyzer.search_grams.tokenizer", "whitespace")
+      .putArray("index.analysis.analyzer.search_grams.filter", "trim", "lowercase")
+
+        // Word index-analyzer
+      .put("index.analysis.analyzer.index_words.type", "custom")
+      .put("index.analysis.analyzer.index_words.tokenizer", "standard")
+      .putArray("index.analysis.analyzer.index_words.filter",
+        "standard", "word_filter", "lowercase", "stop", "asciifolding", "porter_stem")
+
+        // Word search-analyzer
+      .put("index.analysis.analyzer.search_words.type", "custom")
+      .put("index.analysis.analyzer.search_words.tokenizer", "standard")
+      .putArray("index.analysis.analyzer.search_words.filter",
+        "standard", "lowercase", "stop", "asciifolding", "porter_stem")
+
+
+        // Edge NGram filter
+      .put("index.analysis.filter.gram_filter.type", "edgeNGram")
+      .put("index.analysis.filter.gram_filter.min_gram", 2)
+      .put("index.analysis.filter.gram_filter.max_gram", 15)
+      .putArray("index.analysis.filter.gram_filter.token_chars", "letter", "digit", "punctuation", "symbol")
+
+        // Word filter
+      .put("index.analysis.filter.word_filter.type", "word_delimiter")
+      .put("index.analysis.filter.word_filter.generate_word_parts", true)
+      .put("index.analysis.filter.word_filter.catenate_words", true)
+      .put("index.analysis.filter.word_filter.catenate_numbers", true)
+      .put("index.analysis.filter.word_filter.catenate_all", true)
+      .put("index.analysis.filter.word_filter.split_on_case_change", true)
+      .put("index.analysis.filter.word_filter.preserve_original", true)
+      .put("index.analysis.filter.word_filter.split_on_numerics", true)
+      .put("index.analysis.filter.word_filter.stem_english_possessive", true)
+
+        // Path Analyzer
       .put("index.analysis.analyzer.path_analyzer.type", "custom")
       .put("index.analysis.analyzer.path_analyzer.tokenizer", "path_hierarchy");
 
index 6a6ac75efec65d4032f6f06c03da79252cbca550..de378fe0b2dbd8ddacee887581ed1e8178dc9ac7 100644 (file)
@@ -31,7 +31,6 @@ import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.index.mapper.StrictDynamicMappingException;
 import org.junit.After;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
@@ -115,7 +114,35 @@ public class ESNodeTest {
   }
 
   @Test
-  public void check_analyzer() throws Exception {
+  public void check_path_analyzer() throws Exception {
+    ESNode node = new ESNode(fs, new Settings());
+    node.start();
+
+    node.client().admin().indices().prepareCreate("polop")
+      .addMapping("type1", "{\"type1\": {\"properties\": {\"value\": {\"type\": \"string\"}}}}")
+      .execute().actionGet();
+    node.client().admin().cluster().prepareHealth("polop").setWaitForYellowStatus().get(TimeValue.timeValueMillis(1000));
+
+    // default "path_analyzer" analyzer is defined for all indices
+    AnalyzeResponse response = node.client().admin().indices()
+      .prepareAnalyze("polop", "/temp/65236/test path/MyFile.java").setAnalyzer("path_analyzer").get();
+    // default "path_analyzer" analyzer is defined for all indices
+    assertThat(response.getTokens()).hasSize(4);
+    assertThat(response.getTokens().get(0).getTerm()).isEqualTo("/temp");
+    assertThat(response.getTokens().get(1).getTerm()).isEqualTo("/temp/65236");
+    assertThat(response.getTokens().get(2).getTerm()).isEqualTo("/temp/65236/test path");
+    assertThat(response.getTokens().get(3).getTerm()).isEqualTo("/temp/65236/test path/MyFile.java");
+
+    node.stop();
+  }
+
+  @Test
+  public void check_word_analyzer() throws Exception {
+
+  }
+
+  @Test
+  public void check_sortable_analyzer() throws Exception {
     ESNode node = new ESNode(fs, new Settings());
     node.start();
 
@@ -129,22 +156,26 @@ public class ESNodeTest {
       .prepareAnalyze("polop", "This Is A Wonderful Text").setAnalyzer("sortable").get()
       .getTokens().get(0).getTerm()).isEqualTo("this is a ");
 
+    node.stop();
+  }
+  @Test
+  public void check_gram_analyzer() throws Exception {
+    ESNode node = new ESNode(fs, new Settings());
+    node.start();
+
+    node.client().admin().indices().prepareCreate("polop")
+      .addMapping("type1", "{\"type1\": {\"properties\": {\"value\": {\"type\": \"string\"}}}}")
+      .execute().actionGet();
+    node.client().admin().cluster().prepareHealth("polop").setWaitForYellowStatus().get(TimeValue.timeValueMillis(1000));
+
     // default "string_gram" analyzer is defined for all indices
     AnalyzeResponse response = node.client().admin().indices()
-      .prepareAnalyze("polop", "he.llo w@rl#d").setAnalyzer("string_gram").get();
+      .prepareAnalyze("polop", "he.llo w@rl#d").setAnalyzer("index_grams").get();
     assertThat(response.getTokens()).hasSize(10);
     assertThat(response.getTokens().get(0).getTerm()).isEqualTo("he");
     assertThat(response.getTokens().get(7).getTerm()).isEqualTo("w@rl");
 
-    // default "path_analyzer" analyzer is defined for all indices
-    response = node.client().admin().indices()
-      .prepareAnalyze("polop", "/temp/65236/test path/MyFile.java").setAnalyzer("path_analyzer").get();
-    // default "path_analyzer" analyzer is defined for all indices
-    assertThat(response.getTokens()).hasSize(4);
-    assertThat(response.getTokens().get(0).getTerm()).isEqualTo("/temp");
-    assertThat(response.getTokens().get(1).getTerm()).isEqualTo("/temp/65236");
-    assertThat(response.getTokens().get(2).getTerm()).isEqualTo("/temp/65236/test path");
-    assertThat(response.getTokens().get(3).getTerm()).isEqualTo("/temp/65236/test path/MyFile.java");
+    node.stop();
   }
 
   @Test
@@ -163,7 +194,6 @@ public class ESNodeTest {
   }
 
   @Test(expected = IllegalStateException.class)
-  @Ignore //TODO should use the Mng Index
   public void should_fail_on_corrupt_index() throws Exception {
     File zip = new File(Resources.getResource(getClass(), "ESNodeTest/data-es-corrupt.zip").toURI());
     ZipUtils.unzip(zip, dataDir);