import javax.annotation.Nullable;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.join.ScoreMode;
+import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
private static final double[] COVERAGE_THRESHOLDS = {30D, 50D, 70D, 80D};
private static final double[] SECURITY_REVIEW_RATING_THRESHOLDS = {30D, 50D, 70D, 80D};
private static final double[] DUPLICATIONS_THRESHOLDS = {3D, 5D, 10D, 20D};
+ private static final int SCROLL_SIZE = 5000;
+ private static final TimeValue KEEP_ALIVE_SCROLL_DURATION = TimeValue.timeValueMinutes(1L);
public enum Facet {
NCLOC(new RangeMeasureFacet(NCLOC_KEY, LINES_THRESHOLDS)),
}
public ProjectMeasuresStatistics searchTelemetryStatistics() {
+ SearchRequest projectMeasuresSearchRequest = buildProjectMeasureSearchRequest();
+ SearchResponse projectMeasures = client.search(projectMeasuresSearchRequest);
+ return buildProjectMeasuresStatistics(projectMeasures);
+ }
+
+ private static SearchRequest buildProjectMeasureSearchRequest() {
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.fetchSource(false)
.size(0);
.minDocCount(1)
.order(BucketOrder.count(false))
.subAggregation(sum(FIELD_NCLOC_DISTRIBUTION_NCLOC).field(FIELD_NCLOC_DISTRIBUTION_NCLOC))));
-
searchSourceBuilder.aggregation(AggregationBuilders.nested(NCLOC_KEY, FIELD_MEASURES)
.subAggregation(AggregationBuilders.filter(NCLOC_KEY + "_filter", termQuery(FIELD_MEASURES_MEASURE_KEY, NCLOC_KEY))
.subAggregation(sum(NCLOC_KEY + "_filter_sum").field(FIELD_MEASURES_MEASURE_VALUE))));
+ searchSourceBuilder.size(SCROLL_SIZE);
- ProjectMeasuresStatistics.Builder statistics = ProjectMeasuresStatistics.builder();
-
- SearchResponse response = client.search(EsClient.prepareSearch(TYPE_PROJECT_MEASURES.getMainType())
- .source(searchSourceBuilder));
+ return EsClient.prepareSearch(TYPE_PROJECT_MEASURES.getMainType()).source(searchSourceBuilder).scroll(KEEP_ALIVE_SCROLL_DURATION);
+ }
+ private static ProjectMeasuresStatistics buildProjectMeasuresStatistics(SearchResponse response) {
+ ProjectMeasuresStatistics.Builder statistics = ProjectMeasuresStatistics.builder();
statistics.setProjectCount(getTotalHits(response.getHits().getTotalHits()).value);
statistics.setProjectCountByLanguage(termsToMap(response.getAggregations().get(FIELD_LANGUAGES)));
+
Function<Terms.Bucket, Long> bucketToNcloc = bucket -> Math.round(((Sum) bucket.getAggregations().get(FIELD_NCLOC_DISTRIBUTION_NCLOC)).getValue());
Map<String, Long> nclocByLanguage = Stream.of((Nested) response.getAggregations().get(FIELD_NCLOC_DISTRIBUTION))
.map(nested -> (Terms) nested.getAggregations().get(nested.getName() + "_terms"))
entry("java", 500L), entry("cs", 250L), entry("js", 50L), entry("python", 100L), entry("kotlin", 404L));
}
+ @Test
+ public void search_statistics_for_large_instances() {
+ int nbProjects = 25000;
+ int javaLocByProjects = 100;
+ int jsLocByProjects = 900;
+ int csLocByProjects = 2;
+
+ ProjectMeasuresDoc[] documents = IntStream.range(0, nbProjects).mapToObj(i ->
+ newDoc("lines", 10, "coverage", 80)
+ .setLanguages(asList("java", "cs", "js"))
+ .setNclocLanguageDistributionFromMap(ImmutableMap.of("java", javaLocByProjects, "cs", csLocByProjects, "js", jsLocByProjects))).toArray(ProjectMeasuresDoc[]::new);
+
+ es.putDocuments(TYPE_PROJECT_MEASURES, documents);
+
+ ProjectMeasuresStatistics result = underTest.searchTelemetryStatistics();
+
+ assertThat(result.getProjectCount()).isEqualTo(nbProjects);
+ assertThat(result.getProjectCountByLanguage())
+ .hasSize(3)
+ .containsEntry("java", (long) nbProjects)
+ .containsEntry("cs", (long) nbProjects)
+ .containsEntry("js", (long) nbProjects);
+
+ assertThat(result.getNclocByLanguage())
+ .hasSize(3)
+ .containsEntry("java",(long) nbProjects * javaLocByProjects)
+ .containsEntry("cs",(long) nbProjects * csLocByProjects)
+ .containsEntry("js",(long) nbProjects * jsLocByProjects);
+ }
+
@Test
public void search_statistics_should_ignore_applications() {
es.putDocuments(TYPE_PROJECT_MEASURES,