From: Stephane Gamard Date: Wed, 28 May 2014 21:45:38 +0000 (+0200) Subject: DAOv.2 - Updated analyzer & Fixed tests X-Git-Tag: 4.4-RC1~758 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=6712c2176821aced7a0478106d2ccc1114979ca5;p=sonarqube.git DAOv.2 - Updated analyzer & Fixed tests --- diff --git a/sonar-server/src/main/java/org/sonar/server/search/ESNode.java b/sonar-server/src/main/java/org/sonar/server/search/ESNode.java index dd63920e9b0..389772e1f04 100644 --- a/sonar-server/src/main/java/org/sonar/server/search/ESNode.java +++ b/sonar-server/src/main/java/org/sonar/server/search/ESNode.java @@ -29,7 +29,6 @@ import org.elasticsearch.common.logging.ESLoggerFactory; import org.elasticsearch.common.logging.slf4j.Slf4jESLoggerFactory; import org.elasticsearch.common.network.NetworkUtils; import org.elasticsearch.common.settings.ImmutableSettings; -import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.node.Node; import org.elasticsearch.node.NodeBuilder; import org.picocontainer.Startable; @@ -107,7 +106,7 @@ public class ESNode implements Startable { if ( node.client().admin().cluster().prepareHealth() .setWaitForYellowStatus() - .get(TimeValue.timeValueMillis(3000)) + .get() .getStatus() == ClusterHealthStatus.RED) { throw new IllegalStateException( String.format("Elasticsearch index is corrupt, please delete directory '%s/%s' and relaunch the SonarQube server.", fileSystem.getHomeDir().getAbsolutePath(), DATA_DIR)); @@ -163,19 +162,52 @@ public class ESNode implements Startable { esSettings .put("index.mapper.dynamic", false) + // Sortable text analyzer .put("index.analysis.analyzer.sortable.type", "custom") .put("index.analysis.analyzer.sortable.tokenizer", "keyword") .putArray("index.analysis.analyzer.sortable.filter", "trim", "lowercase", "truncate") - .put("index.analysis.analyzer.string_gram.type", "custom") - .put("index.analysis.analyzer.string_gram.tokenizer", "whitespace") - .putArray("index.analysis.analyzer.string_gram.filter", "lowercase", "code_gram") - - .put("index.analysis.filter.code_gram.type", "edgeNGram") - .put("index.analysis.filter.code_gram.min_gram", 2) - .put("index.analysis.filter.code_gram.max_gram", 15) - .putArray("index.analysis.filter.code_gram.token_chars", "letter", "digit", "punctuation", "symbol") - + // Edge NGram index-analyzer + .put("index.analysis.analyzer.index_grams.type", "custom") + .put("index.analysis.analyzer.index_grams.tokenizer", "whitespace") + .putArray("index.analysis.analyzer.index_grams.filter", "trim", "lowercase", "gram_filter") + + // Edge NGram search-analyzer + .put("index.analysis.analyzer.search_grams.type", "custom") + .put("index.analysis.analyzer.search_grams.tokenizer", "whitespace") + .putArray("index.analysis.analyzer.search_grams.filter", "trim", "lowercase") + + // Word index-analyzer + .put("index.analysis.analyzer.index_words.type", "custom") + .put("index.analysis.analyzer.index_words.tokenizer", "standard") + .putArray("index.analysis.analyzer.index_words.filter", + "standard", "word_filter", "lowercase", "stop", "asciifolding", "porter_stem") + + // Word search-analyzer + .put("index.analysis.analyzer.search_words.type", "custom") + .put("index.analysis.analyzer.search_words.tokenizer", "standard") + .putArray("index.analysis.analyzer.search_words.filter", + "standard", "lowercase", "stop", "asciifolding", "porter_stem") + + + // Edge NGram filter + .put("index.analysis.filter.gram_filter.type", "edgeNGram") + .put("index.analysis.filter.gram_filter.min_gram", 2) + .put("index.analysis.filter.gram_filter.max_gram", 15) + .putArray("index.analysis.filter.gram_filter.token_chars", "letter", "digit", "punctuation", "symbol") + + // Word filter + .put("index.analysis.filter.word_filter.type", "word_delimiter") + .put("index.analysis.filter.word_filter.generate_word_parts", true) + .put("index.analysis.filter.word_filter.catenate_words", true) + .put("index.analysis.filter.word_filter.catenate_numbers", true) + .put("index.analysis.filter.word_filter.catenate_all", true) + .put("index.analysis.filter.word_filter.split_on_case_change", true) + .put("index.analysis.filter.word_filter.preserve_original", true) + .put("index.analysis.filter.word_filter.split_on_numerics", true) + .put("index.analysis.filter.word_filter.stem_english_possessive", true) + + // Path Analyzer .put("index.analysis.analyzer.path_analyzer.type", "custom") .put("index.analysis.analyzer.path_analyzer.tokenizer", "path_hierarchy"); diff --git a/sonar-server/src/test/java/org/sonar/server/search/ESNodeTest.java b/sonar-server/src/test/java/org/sonar/server/search/ESNodeTest.java index 6a6ac75efec..de378fe0b2d 100644 --- a/sonar-server/src/test/java/org/sonar/server/search/ESNodeTest.java +++ b/sonar-server/src/test/java/org/sonar/server/search/ESNodeTest.java @@ -31,7 +31,6 @@ import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.mapper.StrictDynamicMappingException; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -115,7 +114,35 @@ public class ESNodeTest { } @Test - public void check_analyzer() throws Exception { + public void check_path_analyzer() throws Exception { + ESNode node = new ESNode(fs, new Settings()); + node.start(); + + node.client().admin().indices().prepareCreate("polop") + .addMapping("type1", "{\"type1\": {\"properties\": {\"value\": {\"type\": \"string\"}}}}") + .execute().actionGet(); + node.client().admin().cluster().prepareHealth("polop").setWaitForYellowStatus().get(TimeValue.timeValueMillis(1000)); + + // default "path_analyzer" analyzer is defined for all indices + AnalyzeResponse response = node.client().admin().indices() + .prepareAnalyze("polop", "/temp/65236/test path/MyFile.java").setAnalyzer("path_analyzer").get(); + // default "path_analyzer" analyzer is defined for all indices + assertThat(response.getTokens()).hasSize(4); + assertThat(response.getTokens().get(0).getTerm()).isEqualTo("/temp"); + assertThat(response.getTokens().get(1).getTerm()).isEqualTo("/temp/65236"); + assertThat(response.getTokens().get(2).getTerm()).isEqualTo("/temp/65236/test path"); + assertThat(response.getTokens().get(3).getTerm()).isEqualTo("/temp/65236/test path/MyFile.java"); + + node.stop(); + } + + @Test + public void check_word_analyzer() throws Exception { + + } + + @Test + public void check_sortable_analyzer() throws Exception { ESNode node = new ESNode(fs, new Settings()); node.start(); @@ -129,22 +156,26 @@ public class ESNodeTest { .prepareAnalyze("polop", "This Is A Wonderful Text").setAnalyzer("sortable").get() .getTokens().get(0).getTerm()).isEqualTo("this is a "); + node.stop(); + } + @Test + public void check_gram_analyzer() throws Exception { + ESNode node = new ESNode(fs, new Settings()); + node.start(); + + node.client().admin().indices().prepareCreate("polop") + .addMapping("type1", "{\"type1\": {\"properties\": {\"value\": {\"type\": \"string\"}}}}") + .execute().actionGet(); + node.client().admin().cluster().prepareHealth("polop").setWaitForYellowStatus().get(TimeValue.timeValueMillis(1000)); + // default "string_gram" analyzer is defined for all indices AnalyzeResponse response = node.client().admin().indices() - .prepareAnalyze("polop", "he.llo w@rl#d").setAnalyzer("string_gram").get(); + .prepareAnalyze("polop", "he.llo w@rl#d").setAnalyzer("index_grams").get(); assertThat(response.getTokens()).hasSize(10); assertThat(response.getTokens().get(0).getTerm()).isEqualTo("he"); assertThat(response.getTokens().get(7).getTerm()).isEqualTo("w@rl"); - // default "path_analyzer" analyzer is defined for all indices - response = node.client().admin().indices() - .prepareAnalyze("polop", "/temp/65236/test path/MyFile.java").setAnalyzer("path_analyzer").get(); - // default "path_analyzer" analyzer is defined for all indices - assertThat(response.getTokens()).hasSize(4); - assertThat(response.getTokens().get(0).getTerm()).isEqualTo("/temp"); - assertThat(response.getTokens().get(1).getTerm()).isEqualTo("/temp/65236"); - assertThat(response.getTokens().get(2).getTerm()).isEqualTo("/temp/65236/test path"); - assertThat(response.getTokens().get(3).getTerm()).isEqualTo("/temp/65236/test path/MyFile.java"); + node.stop(); } @Test @@ -163,7 +194,6 @@ public class ESNodeTest { } @Test(expected = IllegalStateException.class) - @Ignore //TODO should use the Mng Index public void should_fail_on_corrupt_index() throws Exception { File zip = new File(Resources.getResource(getClass(), "ESNodeTest/data-es-corrupt.zip").toURI()); ZipUtils.unzip(zip, dataDir);