From: Simon Brandhof Date: Mon, 16 May 2016 10:12:25 +0000 (+0200) Subject: SONAR-6632 Fail to index rule if description is more than 32kb X-Git-Tag: 5.6-RC1~76 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=23ee784e6d44f8462c8d160c7a113a64424bfb67;p=sonarqube.git SONAR-6632 Fail to index rule if description is more than 32kb --- diff --git a/server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettings.java b/server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettings.java index be7fd204737..1432b5c5b6e 100644 --- a/server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettings.java +++ b/server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettings.java @@ -62,6 +62,13 @@ class DefaultIndexSettings { .putArray("index.analysis.analyzer.search_words.filter", "standard", "lowercase", "stop", "asciifolding", "porter_stem") + // English HTML analyzer + .put("index.analysis.analyzer.html_analyzer.type", "custom") + .put("index.analysis.analyzer.html_analyzer.tokenizer", "standard") + .putArray("index.analysis.analyzer.html_analyzer.filter", + "standard", "lowercase", "stop", "asciifolding", "porter_stem") + .putArray("index.analysis.analyzer.html_analyzer.char_filter", "html_strip") + // Edge NGram filter .put("index.analysis.filter.gram_filter.type", "edgeNGram") .put("index.analysis.filter.gram_filter.min_gram", 2) diff --git a/server/sonar-server/src/main/java/org/sonar/server/es/EsClient.java b/server/sonar-server/src/main/java/org/sonar/server/es/EsClient.java index ecfce0892e3..333427452da 100644 --- a/server/sonar-server/src/main/java/org/sonar/server/es/EsClient.java +++ b/server/sonar-server/src/main/java/org/sonar/server/es/EsClient.java @@ -233,7 +233,7 @@ public class EsClient implements Startable { } } - protected Client nativeClient() { + public Client nativeClient() { return nativeClient; } } diff --git a/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndex.java b/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndex.java index 9578a8c0979..ae5c60bd87d 100644 --- a/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndex.java +++ b/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndex.java @@ -180,7 +180,7 @@ public class RuleIndex extends BaseIndex { // Human readable type of querying qb.should(simpleQueryStringQuery(query.getQueryText()) .field(FIELD_RULE_NAME + "." + SEARCH_WORDS_SUFFIX, 20f) - .field(FIELD_RULE_HTML_DESCRIPTION + "." + SEARCH_WORDS_SUFFIX, 3f) + .field(FIELD_RULE_HTML_DESCRIPTION, 3f) .defaultOperator(SimpleQueryStringBuilder.Operator.AND) ).boost(20f); diff --git a/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndexDefinition.java b/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndexDefinition.java index eb8e554d2d5..5b5db55752b 100644 --- a/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndexDefinition.java +++ b/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndexDefinition.java @@ -21,6 +21,7 @@ package org.sonar.server.rule.index; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.ImmutableSortedMap; import java.util.Set; import org.sonar.api.config.Settings; import org.sonar.server.es.IndexDefinition; @@ -94,7 +95,13 @@ public class RuleIndexDefinition implements IndexDefinition { ruleMapping.stringFieldBuilder(FIELD_RULE_INTERNAL_KEY).disableSearch().docValues().build(); ruleMapping.stringFieldBuilder(FIELD_RULE_NAME).enableSorting().enableWordSearch().build(); - ruleMapping.stringFieldBuilder(FIELD_RULE_HTML_DESCRIPTION).enableWordSearch().build(); + ruleMapping.setProperty(FIELD_RULE_HTML_DESCRIPTION, ImmutableSortedMap.of( + "type", "string", + "index", "analyzed", + "doc_values", "false", + "index_analyzer", "html_analyzer", + "search_analyzer", "html_analyzer" + )); ruleMapping.stringFieldBuilder(FIELD_RULE_SEVERITY).docValues().build(); ruleMapping.stringFieldBuilder(FIELD_RULE_STATUS).docValues().build(); ruleMapping.stringFieldBuilder(FIELD_RULE_LANGUAGE).build(); diff --git a/server/sonar-server/src/test/java/org/sonar/server/rule/index/RuleIndexDefinitionTest.java b/server/sonar-server/src/test/java/org/sonar/server/rule/index/RuleIndexDefinitionTest.java index dfbc11f13f8..6810d5f073a 100644 --- a/server/sonar-server/src/test/java/org/sonar/server/rule/index/RuleIndexDefinitionTest.java +++ b/server/sonar-server/src/test/java/org/sonar/server/rule/index/RuleIndexDefinitionTest.java @@ -19,24 +19,39 @@ */ package org.sonar.server.rule.index; +import com.google.common.collect.ImmutableMap; +import java.util.List; +import org.apache.commons.lang.StringUtils; +import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; +import org.junit.Rule; import org.junit.Test; import org.sonar.api.config.Settings; +import org.sonar.process.ProcessProperties; +import org.sonar.server.es.EsTester; import org.sonar.server.es.IndexDefinition; import org.sonar.server.es.NewIndex; import static org.assertj.core.api.Assertions.assertThat; +import static org.sonar.server.rule.index.RuleIndexDefinition.FIELD_RULE_HTML_DESCRIPTION; +import static org.sonar.server.rule.index.RuleIndexDefinition.FIELD_RULE_KEY; +import static org.sonar.server.rule.index.RuleIndexDefinition.FIELD_RULE_REPOSITORY; +import static org.sonar.server.rule.index.RuleIndexDefinition.INDEX; public class RuleIndexDefinitionTest { - IndexDefinition.IndexDefinitionContext underTest = new IndexDefinition.IndexDefinitionContext(); + Settings settings = new Settings(); + RuleIndexDefinition underTest = new RuleIndexDefinition(settings); + + @Rule + public EsTester tester = new EsTester().addDefinitions(underTest); @Test - public void define() { - RuleIndexDefinition def = new RuleIndexDefinition(new Settings()); - def.define(underTest); + public void test_definition_of_index() { + IndexDefinition.IndexDefinitionContext context = new IndexDefinition.IndexDefinitionContext(); + underTest.define(context); - assertThat(underTest.getIndices()).hasSize(1); - NewIndex ruleIndex = underTest.getIndices().get("rules"); + assertThat(context.getIndices()).hasSize(1); + NewIndex ruleIndex = context.getIndices().get("rules"); assertThat(ruleIndex).isNotNull(); assertThat(ruleIndex.getTypes().keySet()).containsOnly("rule", "activeRule"); @@ -45,4 +60,53 @@ public class RuleIndexDefinitionTest { assertThat(ruleIndex.getSettings().get("index.number_of_replicas")).isEqualTo("0"); } + @Test + public void enable_replica_if_clustering_is_enabled() { + settings.setProperty(ProcessProperties.CLUSTER_ACTIVATE, true); + IndexDefinition.IndexDefinitionContext context = new IndexDefinition.IndexDefinitionContext(); + underTest.define(context); + + NewIndex ruleIndex = context.getIndices().get("rules"); + assertThat(ruleIndex.getSettings().get("index.number_of_replicas")).isEqualTo("1"); + } + + @Test + public void support_long_html_description() throws Exception { + String longText = StringUtils.repeat("hello ", 10_000); + // the following method fails if PUT fails + tester.putDocuments(INDEX, RuleIndexDefinition.TYPE_RULE, ImmutableMap.of( + FIELD_RULE_HTML_DESCRIPTION, longText, + FIELD_RULE_REPOSITORY, "squid", + FIELD_RULE_KEY, "S001")); + assertThat(tester.countDocuments(INDEX, RuleIndexDefinition.TYPE_RULE)).isEqualTo(1); + + List tokens = analyzeIndexedTokens(longText); + for (AnalyzeResponse.AnalyzeToken token : tokens) { + assertThat(token.getTerm().length()).isEqualTo("hello".length()); + } + } + + @Test + public void remove_html_characters_of_html_description() { + String text = "

html line

"; + List tokens = analyzeIndexedTokens(text); + + assertThat(tokens).extracting("term").containsOnly("html", "line"); + } + + @Test + public void sanitize_html_description_as_it_is_english() { + String text = "this is a small list of words"; + // "this", "is", "a" and "of" are not indexed. + // Plural "words" is converted to singular "word" + List tokens = analyzeIndexedTokens(text); + assertThat(tokens).extracting("term").containsOnly("small", "list", "word"); + } + + private List analyzeIndexedTokens(String text) { + return tester.client().nativeClient().admin().indices().prepareAnalyze(INDEX, + text) + .setField(FIELD_RULE_HTML_DESCRIPTION) + .execute().actionGet().getTokens(); + } }