]> source.dussan.org Git - sonarqube.git/commitdiff
SONAR-9077 calculate matching characters for component
authorDaniel Schwarz <daniel.schwarz@sonarsource.com>
Mon, 10 Apr 2017 12:17:14 +0000 (14:17 +0200)
committerDaniel Schwarz <bartfastiel@users.noreply.github.com>
Thu, 20 Apr 2017 07:48:52 +0000 (09:48 +0200)
server/sonar-server/src/main/java/org/sonar/server/component/index/ComponentHit.java
server/sonar-server/src/main/java/org/sonar/server/component/index/ComponentHitsPerQualifier.java
server/sonar-server/src/main/java/org/sonar/server/component/index/ComponentIndex.java
server/sonar-server/src/main/java/org/sonar/server/component/index/ComponentIndexDefinition.java
server/sonar-server/src/main/java/org/sonar/server/es/NewIndex.java
server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexHighlightTest.java [new file with mode: 0644]

index e17eeaa619bcf789e5af8ce38797e4e7ae3a3e5d..1968f6693e471336bcb4587c2f6f53831d552a02 100644 (file)
  */
 package org.sonar.server.component.index;
 
-import java.util.Arrays;
 import java.util.List;
+import java.util.Optional;
+import org.elasticsearch.common.text.Text;
 import org.elasticsearch.search.SearchHit;
 import org.sonar.core.util.stream.MoreCollectors;
 
+import static java.util.Arrays.stream;
+import static java.util.Optional.ofNullable;
+import static org.sonar.server.component.index.ComponentIndexDefinition.FIELD_NAME;
+
 public class ComponentHit {
 
   private final String uuid;
+  private final Optional<String> highlightedText;
+
+  private ComponentHit(SearchHit hit) {
+    this.uuid = hit.getId();
+    this.highlightedText = getHighlightedText(hit);
+  }
 
-  private ComponentHit(String uuid) {
-    this.uuid = uuid;
+  private static Optional<String> getHighlightedText(SearchHit hit) {
+    return ofNullable(hit.getHighlightFields())
+      .flatMap(fields -> ofNullable(fields.get(FIELD_NAME)))
+      .flatMap(field -> ofNullable(field.getFragments()))
+      .flatMap(fragments -> stream(fragments).findFirst())
+      .map(Text::string);
   }
 
   public String getUuid() {
@@ -37,11 +52,12 @@ public class ComponentHit {
   }
 
   public static List<ComponentHit> fromSearchHits(SearchHit... hits) {
-    return Arrays.stream(hits).map(ComponentHit::fromSearchHit)
+    return stream(hits)
+      .map(ComponentHit::new)
       .collect(MoreCollectors.toList(hits.length));
   }
 
-  public static ComponentHit fromSearchHit(SearchHit hit) {
-    return new ComponentHit(hit.getId());
+  public Optional<String> getHighlightedText() {
+    return highlightedText;
   }
 }
index 4aa32e1f3baa9bbf71747f5dd26ee350f7ec6402..be3ad4ee3be22483e19f5594ba9aef3bf78a2bd2 100644 (file)
@@ -42,6 +42,10 @@ public class ComponentHitsPerQualifier {
     return hits.stream().map(ComponentHit::getUuid).collect(MoreCollectors.toList(hits.size()));
   }
 
+  public List<ComponentHit> getHits() {
+    return hits;
+  }
+
   public long getTotalHits() {
     return totalHits;
   }
index 86f5fbb28cb2b65416ae5cd03b07d274ad53b3cd..d9176c40a13abe91d089af8d5a0043acdcf97865 100644 (file)
 package org.sonar.server.component.index;
 
 import com.google.common.annotations.VisibleForTesting;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
+import java.util.stream.Stream;
 import org.elasticsearch.action.search.SearchRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.index.query.BoolQueryBuilder;
@@ -35,6 +37,7 @@ import org.elasticsearch.search.aggregations.bucket.filters.InternalFilters;
 import org.elasticsearch.search.aggregations.bucket.filters.InternalFilters.Bucket;
 import org.elasticsearch.search.aggregations.metrics.tophits.InternalTopHits;
 import org.elasticsearch.search.aggregations.metrics.tophits.TopHitsBuilder;
+import org.elasticsearch.search.highlight.HighlightBuilder;
 import org.sonar.core.util.stream.MoreCollectors;
 import org.sonar.server.es.EsClient;
 import org.sonar.server.es.textsearch.ComponentTextSearchFeature;
@@ -48,6 +51,7 @@ import static org.sonar.server.component.index.ComponentIndexDefinition.FIELD_KE
 import static org.sonar.server.component.index.ComponentIndexDefinition.FIELD_NAME;
 import static org.sonar.server.component.index.ComponentIndexDefinition.FIELD_QUALIFIER;
 import static org.sonar.server.component.index.ComponentIndexDefinition.INDEX_TYPE_COMPONENT;
+import static org.sonar.server.component.index.ComponentIndexDefinition.NAME_ANALYZERS;
 
 public class ComponentIndex {
 
@@ -62,6 +66,19 @@ public class ComponentIndex {
     this.authorizationTypeSupport = authorizationTypeSupport;
   }
 
+  private static HighlightBuilder.Field createHighlighter() {
+    HighlightBuilder.Field field = new HighlightBuilder.Field(FIELD_NAME);
+    field.highlighterType("fvh");
+    field.matchedFields(
+      Stream.concat(
+        Stream.of(FIELD_NAME),
+        Arrays
+          .stream(NAME_ANALYZERS)
+          .map(a -> a.subField(FIELD_NAME)))
+        .toArray(String[]::new));
+    return field;
+  }
+
   public List<ComponentHitsPerQualifier> search(ComponentIndexQuery query) {
     return search(query, ComponentTextSearchFeature.values());
   }
@@ -94,7 +111,11 @@ public class ComponentIndex {
   }
 
   private static TopHitsBuilder createSubAggregation(ComponentIndexQuery query) {
-    TopHitsBuilder sub = AggregationBuilders.topHits(DOCS_AGGREGATION_NAME);
+    TopHitsBuilder sub = AggregationBuilders.topHits(DOCS_AGGREGATION_NAME)
+      .setHighlighterEncoder("html")
+      .setHighlighterPreTags("<mark>")
+      .setHighlighterPostTags("</mark>")
+      .addHighlightedField(createHighlighter());
     query.getLimit().ifPresent(sub::setSize);
     return sub.setFetchSource(false);
   }
index a273241ffb7b263fde6a02ed80a5fa70ea65c9d1..d345a29f753110b85ace0ef2555561b8c1bbad93 100644 (file)
@@ -20,6 +20,7 @@
 package org.sonar.server.component.index;
 
 import org.sonar.api.config.Settings;
+import org.sonar.server.es.DefaultIndexSettingsElement;
 import org.sonar.server.es.IndexDefinition;
 import org.sonar.server.es.IndexType;
 import org.sonar.server.es.NewIndex;
@@ -37,6 +38,8 @@ public class ComponentIndexDefinition implements IndexDefinition {
 
   private static final int DEFAULT_NUMBER_OF_SHARDS = 5;
 
+  static final DefaultIndexSettingsElement[] NAME_ANALYZERS = {SORTABLE_ANALYZER, SEARCH_GRAMS_ANALYZER};
+
   private final Settings settings;
 
   public ComponentIndexDefinition(Settings settings) {
@@ -54,8 +57,11 @@ public class ComponentIndexDefinition implements IndexDefinition {
 
     mapping.stringFieldBuilder(FIELD_PROJECT_UUID).build();
     mapping.stringFieldBuilder(FIELD_KEY).addSubFields(SORTABLE_ANALYZER).build();
-    mapping.stringFieldBuilder(FIELD_NAME).addSubFields(SORTABLE_ANALYZER, SEARCH_GRAMS_ANALYZER).build();
+    mapping.stringFieldBuilder(FIELD_NAME)
+      .termVectorWithPositionOffsets()
+      .addSubFields(NAME_ANALYZERS)
+      .build();
+
     mapping.stringFieldBuilder(FIELD_QUALIFIER).build();
-    mapping.setEnableSource(false);
   }
 }
index 096a45ebf5abbce252a735862af523619c1c9fca..9e0b9d7f60adaa259fda4141fbd8db42bff775b3 100644 (file)
@@ -202,6 +202,7 @@ public class NewIndex {
     private final String fieldName;
     private boolean disableSearch = false;
     private boolean disableNorms = false;
+    private boolean termVectorWithPositionOffsets = false;
     private SortedMap<String, Object> subFields = Maps.newTreeMap();
 
     private StringFieldBuilder(NewIndexType indexType, String fieldName) {
@@ -238,6 +239,14 @@ public class NewIndex {
       return this;
     }
 
+    /**
+     * Position offset term vectors are required for the fast_vector_highlighter (fvh).
+     */
+    public StringFieldBuilder termVectorWithPositionOffsets() {
+      this.termVectorWithPositionOffsets = true;
+      return this;
+    }
+
     /**
      * "index: no" -> Don’t index this field at all. This field will not be searchable.
      * By default field is "not_analyzed": it is searchable, but index the value exactly
@@ -257,16 +266,38 @@ public class NewIndex {
           "norms", ImmutableMap.of("enabled", String.valueOf(!disableNorms))));
       } else {
         hash.put("type", "multi_field");
+
         Map<String, Object> multiFields = new TreeMap<>(subFields);
+
+        if (termVectorWithPositionOffsets) {
+          multiFields.entrySet().forEach(entry -> {
+            Object subFieldMapping = entry.getValue();
+            if (subFieldMapping instanceof Map) {
+              entry.setValue(
+                addFieldToMapping(
+                  (Map<String, String>) subFieldMapping,
+                  "term_vector", "with_positions_offsets"));
+            }
+          });
+        }
+
         multiFields.put(fieldName, ImmutableMap.of(
           "type", "string",
           "index", "not_analyzed",
+          "term_vector", termVectorWithPositionOffsets ? "with_positions_offsets" : "no",
           "norms", ImmutableMap.of("enabled", "false")));
+
         hash.put("fields", multiFields);
       }
 
       return indexType.setProperty(fieldName, hash);
     }
+
+    private static SortedMap<String, String> addFieldToMapping(Map<String, String> source, String key, String value) {
+      SortedMap<String, String> mutable = new TreeMap<>(source);
+      mutable.put(key, value);
+      return ImmutableSortedMap.copyOf(mutable);
+    }
   }
 
   public static class NestedFieldBuilder {
diff --git a/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexHighlightTest.java b/server/sonar-server/src/test/java/org/sonar/server/component/index/ComponentIndexHighlightTest.java
new file mode 100644 (file)
index 0000000..cef70eb
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * SonarQube
+ * Copyright (C) 2009-2016 SonarSource SA
+ * mailto:contact AT sonarsource DOT com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+package org.sonar.server.component.index;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+import org.junit.Test;
+import org.sonar.api.resources.Qualifiers;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+public class ComponentIndexHighlightTest extends ComponentIndexTest {
+
+  @Test
+  public void should_highlight_prefix() {
+    assertHighlighting("quick brown fox", "brown", "quick <mark>brown</mark> fox");
+  }
+
+  @Test
+  public void should_escape_html() {
+    assertHighlighting("quick< brown fox", "brown", "quick&lt; <mark>brown</mark> fox");
+  }
+
+  private void assertHighlighting(String fileName, String search, String expectedHighlighting) {
+    indexFile(fileName);
+
+    ComponentIndexQuery query = ComponentIndexQuery.builder()
+      .setQuery(search)
+      .setQualifiers(Collections.singletonList(Qualifiers.FILE))
+      .build();
+    List<ComponentHitsPerQualifier> results = index.search(query, features.get());
+
+    assertThat(results).flatExtracting(ComponentHitsPerQualifier::getHits)
+      .extracting(ComponentHit::getHighlightedText)
+      .extracting(Optional::get)
+      .containsExactly(expectedHighlighting);
+  }
+}