aboutsummaryrefslogtreecommitdiffstats
path: root/server
diff options
context:
space:
mode:
authorSimon Brandhof <simon.brandhof@sonarsource.com>2016-05-16 12:12:25 +0200
committerSimon Brandhof <simon.brandhof@sonarsource.com>2016-05-16 14:29:04 +0200
commit23ee784e6d44f8462c8d160c7a113a64424bfb67 (patch)
tree8aa9b59d7d1b89f737a445db8e40a9ccf994d32d /server
parent74686b7e63cfd31166e6745fa6a81de3082d29a1 (diff)
downloadsonarqube-23ee784e6d44f8462c8d160c7a113a64424bfb67.tar.gz
sonarqube-23ee784e6d44f8462c8d160c7a113a64424bfb67.zip
SONAR-6632 Fail to index rule if description is more than 32kb
Diffstat (limited to 'server')
-rw-r--r--server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettings.java7
-rw-r--r--server/sonar-server/src/main/java/org/sonar/server/es/EsClient.java2
-rw-r--r--server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndex.java2
-rw-r--r--server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndexDefinition.java9
-rw-r--r--server/sonar-server/src/test/java/org/sonar/server/rule/index/RuleIndexDefinitionTest.java76
5 files changed, 87 insertions, 9 deletions
diff --git a/server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettings.java b/server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettings.java
index be7fd204737..1432b5c5b6e 100644
--- a/server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettings.java
+++ b/server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettings.java
@@ -62,6 +62,13 @@ class DefaultIndexSettings {
.putArray("index.analysis.analyzer.search_words.filter",
"standard", "lowercase", "stop", "asciifolding", "porter_stem")
+ // English HTML analyzer
+ .put("index.analysis.analyzer.html_analyzer.type", "custom")
+ .put("index.analysis.analyzer.html_analyzer.tokenizer", "standard")
+ .putArray("index.analysis.analyzer.html_analyzer.filter",
+ "standard", "lowercase", "stop", "asciifolding", "porter_stem")
+ .putArray("index.analysis.analyzer.html_analyzer.char_filter", "html_strip")
+
// Edge NGram filter
.put("index.analysis.filter.gram_filter.type", "edgeNGram")
.put("index.analysis.filter.gram_filter.min_gram", 2)
diff --git a/server/sonar-server/src/main/java/org/sonar/server/es/EsClient.java b/server/sonar-server/src/main/java/org/sonar/server/es/EsClient.java
index ecfce0892e3..333427452da 100644
--- a/server/sonar-server/src/main/java/org/sonar/server/es/EsClient.java
+++ b/server/sonar-server/src/main/java/org/sonar/server/es/EsClient.java
@@ -233,7 +233,7 @@ public class EsClient implements Startable {
}
}
- protected Client nativeClient() {
+ public Client nativeClient() {
return nativeClient;
}
}
diff --git a/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndex.java b/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndex.java
index 9578a8c0979..ae5c60bd87d 100644
--- a/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndex.java
+++ b/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndex.java
@@ -180,7 +180,7 @@ public class RuleIndex extends BaseIndex {
// Human readable type of querying
qb.should(simpleQueryStringQuery(query.getQueryText())
.field(FIELD_RULE_NAME + "." + SEARCH_WORDS_SUFFIX, 20f)
- .field(FIELD_RULE_HTML_DESCRIPTION + "." + SEARCH_WORDS_SUFFIX, 3f)
+ .field(FIELD_RULE_HTML_DESCRIPTION, 3f)
.defaultOperator(SimpleQueryStringBuilder.Operator.AND)
).boost(20f);
diff --git a/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndexDefinition.java b/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndexDefinition.java
index eb8e554d2d5..5b5db55752b 100644
--- a/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndexDefinition.java
+++ b/server/sonar-server/src/main/java/org/sonar/server/rule/index/RuleIndexDefinition.java
@@ -21,6 +21,7 @@ package org.sonar.server.rule.index;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableSortedMap;
import java.util.Set;
import org.sonar.api.config.Settings;
import org.sonar.server.es.IndexDefinition;
@@ -94,7 +95,13 @@ public class RuleIndexDefinition implements IndexDefinition {
ruleMapping.stringFieldBuilder(FIELD_RULE_INTERNAL_KEY).disableSearch().docValues().build();
ruleMapping.stringFieldBuilder(FIELD_RULE_NAME).enableSorting().enableWordSearch().build();
- ruleMapping.stringFieldBuilder(FIELD_RULE_HTML_DESCRIPTION).enableWordSearch().build();
+ ruleMapping.setProperty(FIELD_RULE_HTML_DESCRIPTION, ImmutableSortedMap.of(
+ "type", "string",
+ "index", "analyzed",
+ "doc_values", "false",
+ "index_analyzer", "html_analyzer",
+ "search_analyzer", "html_analyzer"
+ ));
ruleMapping.stringFieldBuilder(FIELD_RULE_SEVERITY).docValues().build();
ruleMapping.stringFieldBuilder(FIELD_RULE_STATUS).docValues().build();
ruleMapping.stringFieldBuilder(FIELD_RULE_LANGUAGE).build();
diff --git a/server/sonar-server/src/test/java/org/sonar/server/rule/index/RuleIndexDefinitionTest.java b/server/sonar-server/src/test/java/org/sonar/server/rule/index/RuleIndexDefinitionTest.java
index dfbc11f13f8..6810d5f073a 100644
--- a/server/sonar-server/src/test/java/org/sonar/server/rule/index/RuleIndexDefinitionTest.java
+++ b/server/sonar-server/src/test/java/org/sonar/server/rule/index/RuleIndexDefinitionTest.java
@@ -19,24 +19,39 @@
*/
package org.sonar.server.rule.index;
+import com.google.common.collect.ImmutableMap;
+import java.util.List;
+import org.apache.commons.lang.StringUtils;
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
+import org.junit.Rule;
import org.junit.Test;
import org.sonar.api.config.Settings;
+import org.sonar.process.ProcessProperties;
+import org.sonar.server.es.EsTester;
import org.sonar.server.es.IndexDefinition;
import org.sonar.server.es.NewIndex;
import static org.assertj.core.api.Assertions.assertThat;
+import static org.sonar.server.rule.index.RuleIndexDefinition.FIELD_RULE_HTML_DESCRIPTION;
+import static org.sonar.server.rule.index.RuleIndexDefinition.FIELD_RULE_KEY;
+import static org.sonar.server.rule.index.RuleIndexDefinition.FIELD_RULE_REPOSITORY;
+import static org.sonar.server.rule.index.RuleIndexDefinition.INDEX;
public class RuleIndexDefinitionTest {
- IndexDefinition.IndexDefinitionContext underTest = new IndexDefinition.IndexDefinitionContext();
+ Settings settings = new Settings();
+ RuleIndexDefinition underTest = new RuleIndexDefinition(settings);
+
+ @Rule
+ public EsTester tester = new EsTester().addDefinitions(underTest);
@Test
- public void define() {
- RuleIndexDefinition def = new RuleIndexDefinition(new Settings());
- def.define(underTest);
+ public void test_definition_of_index() {
+ IndexDefinition.IndexDefinitionContext context = new IndexDefinition.IndexDefinitionContext();
+ underTest.define(context);
- assertThat(underTest.getIndices()).hasSize(1);
- NewIndex ruleIndex = underTest.getIndices().get("rules");
+ assertThat(context.getIndices()).hasSize(1);
+ NewIndex ruleIndex = context.getIndices().get("rules");
assertThat(ruleIndex).isNotNull();
assertThat(ruleIndex.getTypes().keySet()).containsOnly("rule", "activeRule");
@@ -45,4 +60,53 @@ public class RuleIndexDefinitionTest {
assertThat(ruleIndex.getSettings().get("index.number_of_replicas")).isEqualTo("0");
}
+ @Test
+ public void enable_replica_if_clustering_is_enabled() {
+ settings.setProperty(ProcessProperties.CLUSTER_ACTIVATE, true);
+ IndexDefinition.IndexDefinitionContext context = new IndexDefinition.IndexDefinitionContext();
+ underTest.define(context);
+
+ NewIndex ruleIndex = context.getIndices().get("rules");
+ assertThat(ruleIndex.getSettings().get("index.number_of_replicas")).isEqualTo("1");
+ }
+
+ @Test
+ public void support_long_html_description() throws Exception {
+ String longText = StringUtils.repeat("hello ", 10_000);
+ // the following method fails if PUT fails
+ tester.putDocuments(INDEX, RuleIndexDefinition.TYPE_RULE, ImmutableMap.<String, Object>of(
+ FIELD_RULE_HTML_DESCRIPTION, longText,
+ FIELD_RULE_REPOSITORY, "squid",
+ FIELD_RULE_KEY, "S001"));
+ assertThat(tester.countDocuments(INDEX, RuleIndexDefinition.TYPE_RULE)).isEqualTo(1);
+
+ List<AnalyzeResponse.AnalyzeToken> tokens = analyzeIndexedTokens(longText);
+ for (AnalyzeResponse.AnalyzeToken token : tokens) {
+ assertThat(token.getTerm().length()).isEqualTo("hello".length());
+ }
+ }
+
+ @Test
+ public void remove_html_characters_of_html_description() {
+ String text = "<p>html <i>line</i></p>";
+ List<AnalyzeResponse.AnalyzeToken> tokens = analyzeIndexedTokens(text);
+
+ assertThat(tokens).extracting("term").containsOnly("html", "line");
+ }
+
+ @Test
+ public void sanitize_html_description_as_it_is_english() {
+ String text = "this is a small list of words";
+ // "this", "is", "a" and "of" are not indexed.
+ // Plural "words" is converted to singular "word"
+ List<AnalyzeResponse.AnalyzeToken> tokens = analyzeIndexedTokens(text);
+ assertThat(tokens).extracting("term").containsOnly("small", "list", "word");
+ }
+
+ private List<AnalyzeResponse.AnalyzeToken> analyzeIndexedTokens(String text) {
+ return tester.client().nativeClient().admin().indices().prepareAnalyze(INDEX,
+ text)
+ .setField(FIELD_RULE_HTML_DESCRIPTION)
+ .execute().actionGet().getTokens();
+ }
}