From 38030307f50b11df0be8872ee6402fdaf634fab8 Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Mon, 10 Apr 2017 15:59:16 +0200 Subject: [PATCH] SONAR-8725 ws/components/suggestions emphasises matching for prefixes --- .../index/ComponentIndexDefinition.java | 4 +- .../es/DefaultIndexSettingsElement.java | 59 +++++++++++++++++++ .../ComponentTextSearchFeature.java | 24 ++++---- .../index/ComponentIndexFeatureExactTest.java | 48 +++++++++++++++ .../ComponentIndexFeaturePrefixTest.java | 5 ++ .../index/ComponentIndexScoreTest.java | 14 +++++ 6 files changed, 142 insertions(+), 12 deletions(-) create mode 100644 server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexFeatureExactTest.java diff --git a/server/sonar-server/src/main/java/org/sonar/server/component/index/ComponentIndexDefinition.java b/server/sonar-server/src/main/java/org/sonar/server/component/index/ComponentIndexDefinition.java index d345a29f753..e2d3f0cb3a0 100644 --- a/server/sonar-server/src/main/java/org/sonar/server/component/index/ComponentIndexDefinition.java +++ b/server/sonar-server/src/main/java/org/sonar/server/component/index/ComponentIndexDefinition.java @@ -26,6 +26,8 @@ import org.sonar.server.es.IndexType; import org.sonar.server.es.NewIndex; import static org.sonar.server.es.DefaultIndexSettingsElement.SEARCH_GRAMS_ANALYZER; +import static org.sonar.server.es.DefaultIndexSettingsElement.SEARCH_PREFIX_ANALYZER; +import static org.sonar.server.es.DefaultIndexSettingsElement.SEARCH_PREFIX_CASE_INSENSITIVE_ANALYZER; import static org.sonar.server.es.DefaultIndexSettingsElement.SORTABLE_ANALYZER; public class ComponentIndexDefinition implements IndexDefinition { @@ -38,7 +40,7 @@ public class ComponentIndexDefinition implements IndexDefinition { private static final int DEFAULT_NUMBER_OF_SHARDS = 5; - static final DefaultIndexSettingsElement[] NAME_ANALYZERS = {SORTABLE_ANALYZER, SEARCH_GRAMS_ANALYZER}; + static final DefaultIndexSettingsElement[] NAME_ANALYZERS = {SORTABLE_ANALYZER, SEARCH_PREFIX_ANALYZER, SEARCH_PREFIX_CASE_INSENSITIVE_ANALYZER, SEARCH_GRAMS_ANALYZER}; private final Settings settings; diff --git a/server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettingsElement.java b/server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettingsElement.java index d1b2d425352..77f35b6fd8c 100644 --- a/server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettingsElement.java +++ b/server/sonar-server/src/main/java/org/sonar/server/es/DefaultIndexSettingsElement.java @@ -93,6 +93,15 @@ public enum DefaultIndexSettingsElement { setArray("token_chars", "letter", "digit", "punctuation", "symbol"); } }, + PREFIX_TOKENIZER(TOKENIZER) { + + @Override + protected void setup() { + set(TYPE, "edgeNGram"); + set(MIN_GRAM, MINIMUM_NGRAM_LENGTH); + set(MAX_GRAM, MAXIMUM_NGRAM_LENGTH); + } + }, UUID_MODULE_TOKENIZER(TOKENIZER) { @Override @@ -145,6 +154,56 @@ public enum DefaultIndexSettingsElement { SEARCH_ANALYZER, getName()); } }, + INDEX_PREFIX_ANALYZER(ANALYZER) { + + @Override + protected void setup() { + set(TOKENIZER, PREFIX_TOKENIZER); + setArray(FILTER, TRIM); + } + }, + SEARCH_PREFIX_ANALYZER(ANALYZER) { + + @Override + protected void setup() { + set(TOKENIZER, WHITESPACE); + setArray(FILTER, TRIM); + } + + @Override + public SortedMap fieldMapping() { + return ImmutableSortedMap.of( + TYPE, STRING, + INDEX, ANALYZED, + ANALYZER, INDEX_PREFIX_ANALYZER.getName(), + SEARCH_ANALYZER, getName()); + } + }, + INDEX_PREFIX_CASE_INSENSITIVE_ANALYZER(ANALYZER) { + + @Override + protected void setup() { + set(TOKENIZER, PREFIX_TOKENIZER); + setArray(FILTER, TRIM, LOWERCASE); + } + }, + SEARCH_PREFIX_CASE_INSENSITIVE_ANALYZER(ANALYZER) { + + @Override + protected void setup() { + set(TOKENIZER, WHITESPACE); + setArray(FILTER, TRIM, LOWERCASE); + } + + @Override + public SortedMap fieldMapping() { + return ImmutableSortedMap.of( + TYPE, STRING, + INDEX, ANALYZED, + ANALYZER, INDEX_PREFIX_CASE_INSENSITIVE_ANALYZER.getName(), + SEARCH_ANALYZER, getName()); + } + }, USER_INDEX_GRAMS_ANALYZER(ANALYZER) { @Override diff --git a/server/sonar-server/src/main/java/org/sonar/server/es/textsearch/ComponentTextSearchFeature.java b/server/sonar-server/src/main/java/org/sonar/server/es/textsearch/ComponentTextSearchFeature.java index 2cfd5e363f7..82c397c8bab 100644 --- a/server/sonar-server/src/main/java/org/sonar/server/es/textsearch/ComponentTextSearchFeature.java +++ b/server/sonar-server/src/main/java/org/sonar/server/es/textsearch/ComponentTextSearchFeature.java @@ -28,12 +28,14 @@ import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.sonar.server.es.DefaultIndexSettings; +import org.sonar.server.es.DefaultIndexSettingsElement; import org.sonar.server.es.textsearch.ComponentTextSearchQueryFactory.ComponentTextSearchQuery; import static org.elasticsearch.index.query.QueryBuilders.boolQuery; import static org.elasticsearch.index.query.QueryBuilders.matchQuery; -import static org.elasticsearch.index.query.QueryBuilders.prefixQuery; import static org.sonar.server.es.DefaultIndexSettingsElement.SEARCH_GRAMS_ANALYZER; +import static org.sonar.server.es.DefaultIndexSettingsElement.SEARCH_PREFIX_ANALYZER; +import static org.sonar.server.es.DefaultIndexSettingsElement.SEARCH_PREFIX_CASE_INSENSITIVE_ANALYZER; import static org.sonar.server.es.DefaultIndexSettingsElement.SORTABLE_ANALYZER; public enum ComponentTextSearchFeature { @@ -48,16 +50,16 @@ public enum ComponentTextSearchFeature { PREFIX { @Override public QueryBuilder getQuery(ComponentTextSearchQuery query) { - return prefixAndPartialQuery(query.getQueryText(), query.getFieldName(), query.getFieldName()) - .boost(2f); + return prefixAndPartialQuery(query.getQueryText(), query.getFieldName(), SEARCH_PREFIX_ANALYZER) + .boost(3f); } }, PREFIX_IGNORE_CASE { @Override public QueryBuilder getQuery(ComponentTextSearchQuery query) { String lowerCaseQueryText = query.getQueryText().toLowerCase(Locale.getDefault()); - return prefixAndPartialQuery(lowerCaseQueryText, SORTABLE_ANALYZER.subField(query.getFieldName()), query.getFieldName()) - .boost(3f); + return prefixAndPartialQuery(lowerCaseQueryText, query.getFieldName(), SEARCH_PREFIX_CASE_INSENSITIVE_ANALYZER) + .boost(2f); } }, PARTIAL { @@ -65,7 +67,7 @@ public enum ComponentTextSearchFeature { public QueryBuilder getQuery(ComponentTextSearchQuery query) { BoolQueryBuilder queryBuilder = boolQuery(); split(query.getQueryText()) - .map(text -> partialTermQuery(text, query.getFieldName())) + .map(text -> tokenQuery(text, query.getFieldName(), SEARCH_GRAMS_ANALYZER)) .forEach(queryBuilder::must); return queryBuilder .boost(0.5f); @@ -87,7 +89,7 @@ public enum ComponentTextSearchFeature { .filter(StringUtils::isNotEmpty); } - protected BoolQueryBuilder prefixAndPartialQuery(String queryText, String fieldName, String originalFieldName) { + protected BoolQueryBuilder prefixAndPartialQuery(String queryText, String originalFieldName, DefaultIndexSettingsElement analyzer) { BoolQueryBuilder queryBuilder = boolQuery(); AtomicBoolean first = new AtomicBoolean(true); @@ -95,19 +97,19 @@ public enum ComponentTextSearchFeature { .map(queryTerm -> { if (first.getAndSet(false)) { - return prefixQuery(fieldName, queryTerm); + return tokenQuery(queryTerm, originalFieldName, analyzer); } - return partialTermQuery(queryTerm, originalFieldName); + return tokenQuery(queryTerm, originalFieldName, SEARCH_GRAMS_ANALYZER); }) .forEach(queryBuilder::must); return queryBuilder; } - protected MatchQueryBuilder partialTermQuery(String queryTerm, String fieldName) { + protected MatchQueryBuilder tokenQuery(String queryTerm, String fieldName, DefaultIndexSettingsElement analyzer) { // We will truncate the search to the maximum length of nGrams in the index. // Otherwise the search would for sure not find any results. String truncatedQuery = StringUtils.left(queryTerm, DefaultIndexSettings.MAXIMUM_NGRAM_LENGTH); - return matchQuery(SEARCH_GRAMS_ANALYZER.subField(fieldName), truncatedQuery); + return matchQuery(analyzer.subField(fieldName), truncatedQuery); } } diff --git a/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexFeatureExactTest.java b/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexFeatureExactTest.java new file mode 100644 index 00000000000..28411c39cdb --- /dev/null +++ b/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexFeatureExactTest.java @@ -0,0 +1,48 @@ +/* + * SonarQube + * Copyright (C) 2009-2017 SonarSource SA + * mailto:info AT sonarsource DOT com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package org.sonar.server.component.index; + +import org.junit.Before; +import org.junit.Test; +import org.sonar.server.es.textsearch.ComponentTextSearchFeature; + +public class ComponentIndexFeatureExactTest extends ComponentIndexTest { + + @Before + public void before() { + features.set(ComponentTextSearchFeature.EXACT_IGNORE_CASE); + } + + @Test + public void should_find_perfect_match() { + assertFileMatches("ComponentX", "ComponentX"); + } + + @Test + public void should_not_find_partial_match() { + assertNoFileMatches("omp", "ComponentX"); + } + + @Test + public void should_not_find_prefix_match() { + assertNoFileMatches("omp", "ComponentX"); + } +} diff --git a/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexFeaturePrefixTest.java b/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexFeaturePrefixTest.java index 55b8fe0d235..ed010379f5d 100644 --- a/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexFeaturePrefixTest.java +++ b/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexFeaturePrefixTest.java @@ -30,6 +30,11 @@ public class ComponentIndexFeaturePrefixTest extends ComponentIndexTest { features.set(ComponentTextSearchFeature.PREFIX, ComponentTextSearchFeature.PREFIX_IGNORE_CASE); } + @Test + public void should_find_prefix() { + assertResultOrder("comp", "component"); + } + @Test public void should_find_exact_match() { assertResultOrder("component.js", "component.js"); diff --git a/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexScoreTest.java b/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexScoreTest.java index 2851892cc03..bad8ea15400 100644 --- a/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexScoreTest.java +++ b/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexScoreTest.java @@ -83,6 +83,13 @@ public class ComponentIndexScoreTest extends ComponentIndexTest { "ThisIsAVeryLongNameToSearchForAndItEndsDifferently.java"); } + @Test + public void scoring_perfect_match() { + assertResultOrder("SonarQube", + "SonarQube", + "SonarQube SCM Git"); + } + @Test public void scoring_perfect_match_dispite_case_changes() { assertResultOrder("sonarqube", @@ -90,6 +97,13 @@ public class ComponentIndexScoreTest extends ComponentIndexTest { "SonarQube SCM Git"); } + @Test + public void scoring_perfect_match_with_matching_case_higher_than_without_matching_case() { + assertResultOrder("sonarqube", + "sonarqube", + "SonarQube"); + } + @Test public void do_not_match_wrong_file_extension() { ComponentDto file1 = indexFile("MyClass.java"); -- 2.39.5