Browse Source

SONAR-11873 Memory leak when Compute Engine persists file duplications

tags/7.8
Simon Brandhof 5 years ago
parent
commit
251688c711

server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/step/DuplicationDataMeasuresStep.java → server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/step/PersistDuplicationDataStep.java View File

@@ -19,6 +19,8 @@
*/
package org.sonar.ce.task.projectanalysis.step;

import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringEscapeUtils;
import org.sonar.ce.task.projectanalysis.component.Component;
import org.sonar.ce.task.projectanalysis.component.CrawlerDepthLimit;
@@ -34,10 +36,13 @@ import org.sonar.ce.task.projectanalysis.duplication.InProjectDuplicate;
import org.sonar.ce.task.projectanalysis.duplication.InnerDuplicate;
import org.sonar.ce.task.projectanalysis.duplication.TextBlock;
import org.sonar.ce.task.projectanalysis.measure.Measure;
import org.sonar.ce.task.projectanalysis.measure.MeasureRepository;
import org.sonar.ce.task.projectanalysis.measure.MeasureToMeasureDto;
import org.sonar.ce.task.projectanalysis.metric.Metric;
import org.sonar.ce.task.projectanalysis.metric.MetricRepository;
import org.sonar.ce.task.step.ComputationStep;
import org.sonar.db.DbClient;
import org.sonar.db.DbSession;
import org.sonar.db.measure.LiveMeasureDto;

import static com.google.common.collect.Iterables.isEmpty;
import static org.sonar.api.measures.CoreMetrics.DUPLICATIONS_DATA_KEY;
@@ -46,32 +51,50 @@ import static org.sonar.ce.task.projectanalysis.component.ComponentVisitor.Order
/**
* Compute duplication data measures on files, based on the {@link DuplicationRepository}
*/
public class DuplicationDataMeasuresStep implements ComputationStep {
public class PersistDuplicationDataStep implements ComputationStep {

private final MeasureRepository measureRepository;
private final DbClient dbClient;
private final TreeRootHolder treeRootHolder;
private final DuplicationRepository duplicationRepository;
private final MeasureToMeasureDto measureToMeasureDto;
private final Metric duplicationDataMetric;

public DuplicationDataMeasuresStep(TreeRootHolder treeRootHolder, MetricRepository metricRepository, MeasureRepository measureRepository,
DuplicationRepository duplicationRepository) {
this.measureRepository = measureRepository;
public PersistDuplicationDataStep(DbClient dbClient, TreeRootHolder treeRootHolder, MetricRepository metricRepository,
DuplicationRepository duplicationRepository, MeasureToMeasureDto measureToMeasureDto) {
this.dbClient = dbClient;
this.treeRootHolder = treeRootHolder;
this.duplicationRepository = duplicationRepository;
this.measureToMeasureDto = measureToMeasureDto;
this.duplicationDataMetric = metricRepository.getByKey(DUPLICATIONS_DATA_KEY);
}

@Override
public void execute(ComputationStep.Context context) {
new DepthTraversalTypeAwareCrawler(new DuplicationVisitor())
.visit(treeRootHolder.getRoot());
boolean supportUpsert = dbClient.getDatabase().getDialect().supportsUpsert();

// batch mode of DB session does not have benefits:
// - on postgres the multi-row upserts are the major optimization and have exactly the same
// performance between batch and non-batch sessions
// - on other dbs the sequence of inserts and updates, in order to emulate upserts,
// breaks the constraint of batch sessions (consecutive requests should have the same
// structure (same PreparedStatement))
try (DbSession dbSession = dbClient.openSession(false);
DuplicationVisitor visitor = new DuplicationVisitor(dbSession, supportUpsert)) {
new DepthTraversalTypeAwareCrawler(visitor).visit(treeRootHolder.getRoot());
context.getStatistics().add("insertsOrUpdates", visitor.insertsOrUpdates);
}
}

private class DuplicationVisitor extends TypeAwareVisitorAdapter {
private class DuplicationVisitor extends TypeAwareVisitorAdapter implements AutoCloseable {
private final DbSession dbSession;
private final boolean supportUpsert;
private final List<LiveMeasureDto> nonPersistedBuffer = new ArrayList<>();
private int insertsOrUpdates = 0;

private DuplicationVisitor() {
private DuplicationVisitor(DbSession dbSession, boolean supportUpsert) {
super(CrawlerDepthLimit.FILE, PRE_ORDER);
this.dbSession = dbSession;
this.supportUpsert = supportUpsert;
}

@Override
@@ -83,14 +106,41 @@ public class DuplicationDataMeasuresStep implements ComputationStep {
}

private void computeDuplications(Component component, Iterable<Duplication> duplications) {
String duplicationXml = createXmlDuplications(component.getDbKey(), duplications);
measureRepository.add(
component,
duplicationDataMetric,
Measure.newMeasureBuilder().create(duplicationXml));
Measure measure = generateMeasure(component.getDbKey(), duplications);
LiveMeasureDto dto = measureToMeasureDto.toLiveMeasureDto(measure, duplicationDataMetric, component);
nonPersistedBuffer.add(dto);
persist(false);
}

private void persist(boolean force) {
// Persist a bunch of 100 or less measures. That prevents from having more than 100 XML documents
// in memory. Consumption of memory does not explode with the number of duplications and is kept
// under control.
// Measures are upserted and transactions are committed every 100 rows (arbitrary number to
// maximize the performance of a multi-rows request on PostgreSQL).
// On PostgreSQL, a bunch of 100 measures is persisted into a single request (multi-rows upsert).
// On other DBs, measures are persisted one by one, with update-or-insert requests.
boolean shouldPersist = !nonPersistedBuffer.isEmpty() && (force || nonPersistedBuffer.size() > 100);
if (!shouldPersist) {
return;
}
if (supportUpsert) {
dbClient.liveMeasureDao().upsert(dbSession, nonPersistedBuffer);
} else {
nonPersistedBuffer.forEach(d -> dbClient.liveMeasureDao().insertOrUpdate(dbSession, d));
}
insertsOrUpdates += nonPersistedBuffer.size();
nonPersistedBuffer.clear();
dbSession.commit();
}

@Override
public void close() {
// persist the measures remaining in the buffer
persist(true);
}

private String createXmlDuplications(String componentDbKey, Iterable<Duplication> duplications) {
private Measure generateMeasure(String componentDbKey, Iterable<Duplication> duplications) {
StringBuilder xml = new StringBuilder();
xml.append("<duplications>");
for (Duplication duplication : duplications) {
@@ -102,7 +152,7 @@ public class DuplicationDataMeasuresStep implements ComputationStep {
xml.append("</g>");
}
xml.append("</duplications>");
return xml.toString();
return Measure.newMeasureBuilder().create(xml.toString());
}

private void processDuplicationBlock(StringBuilder xml, Duplicate duplicate, String componentDbKey) {
@@ -140,7 +190,7 @@ public class DuplicationDataMeasuresStep implements ComputationStep {

@Override
public String getDescription() {
return "Compute duplication data measures";
return "Persist duplication data";
}

}

+ 1
- 1
server/sonar-ce-task-projectanalysis/src/main/java/org/sonar/ce/task/projectanalysis/step/ReportComputationSteps.java View File

@@ -67,7 +67,6 @@ public class ReportComputationSteps extends AbstractComputationSteps {
CommentMeasuresStep.class,
CustomMeasuresCopyStep.class,
DuplicationMeasuresStep.class,
DuplicationDataMeasuresStep.class,
NewSizeMeasuresStep.class,
LanguageDistributionMeasuresStep.class,
UnitTestMeasuresStep.class,
@@ -94,6 +93,7 @@ public class ReportComputationSteps extends AbstractComputationSteps {
PersistAnalysisPropertiesStep.class,
PersistMeasuresStep.class,
PersistLiveMeasuresStep.class,
PersistDuplicationDataStep.class,
PersistAdHocRulesStep.class,
PersistIssuesStep.class,
PersistProjectLinksStep.class,

+ 0
- 139
server/sonar-ce-task-projectanalysis/src/test/java/org/sonar/ce/task/projectanalysis/step/DuplicationDataMeasuresStepTest.java View File

@@ -1,139 +0,0 @@
/*
* SonarQube
* Copyright (C) 2009-2019 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.ce.task.projectanalysis.step;

import org.junit.Rule;
import org.junit.Test;
import org.sonar.ce.task.projectanalysis.component.TreeRootHolderRule;
import org.sonar.ce.task.projectanalysis.duplication.DuplicationRepositoryRule;
import org.sonar.ce.task.projectanalysis.duplication.TextBlock;
import org.sonar.ce.task.projectanalysis.measure.MeasureRepositoryRule;
import org.sonar.ce.task.projectanalysis.metric.MetricRepositoryRule;
import org.sonar.ce.task.step.ComputationStep;
import org.sonar.ce.task.step.TestComputationStepContext;

import static org.assertj.core.api.Assertions.assertThat;
import static org.sonar.api.measures.CoreMetrics.DUPLICATIONS_DATA;
import static org.sonar.api.measures.CoreMetrics.DUPLICATIONS_DATA_KEY;
import static org.sonar.ce.task.projectanalysis.component.Component.Type.FILE;
import static org.sonar.ce.task.projectanalysis.component.Component.Type.PROJECT;
import static org.sonar.ce.task.projectanalysis.component.ReportComponent.builder;

public class DuplicationDataMeasuresStepTest extends BaseStepTest {

private static final int ROOT_REF = 1;
private static final String PROJECT_KEY = "PROJECT_KEY";

private static final int FILE_1_REF = 2;
private static final String FILE_1_KEY = "FILE_1_KEY";

private static final int FILE_2_REF = 3;
private static final String FILE_2_KEY = "FILE_2_KEY";

@Rule
public TreeRootHolderRule treeRootHolder = new TreeRootHolderRule()
.setRoot(
builder(PROJECT, ROOT_REF).setKey(PROJECT_KEY)
.addChildren(
builder(FILE, FILE_1_REF).setKey(FILE_1_KEY)
.build(),
builder(FILE, FILE_2_REF).setKey(FILE_2_KEY)
.build())
.build());

@Rule
public DuplicationRepositoryRule duplicationRepository = DuplicationRepositoryRule.create(treeRootHolder);

@Rule
public MetricRepositoryRule metricRepository = new MetricRepositoryRule()
.add(DUPLICATIONS_DATA);

@Rule
public MeasureRepositoryRule measureRepository = MeasureRepositoryRule.create(treeRootHolder, metricRepository);

private DuplicationDataMeasuresStep underTest = new DuplicationDataMeasuresStep(treeRootHolder, metricRepository, measureRepository, duplicationRepository);

@Override
protected ComputationStep step() {
return underTest;
}

@Test
public void nothing_to_do_when_no_duplication() {
underTest.execute(new TestComputationStepContext());

assertThat(measureRepository.getAddedRawMeasure(FILE_1_REF, DUPLICATIONS_DATA_KEY)).isNotPresent();
assertThat(measureRepository.getAddedRawMeasure(FILE_2_REF, DUPLICATIONS_DATA_KEY)).isNotPresent();
}

@Test
public void compute_duplications_on_same_file() {
duplicationRepository.addDuplication(FILE_1_REF, new TextBlock(1, 5), new TextBlock(6, 10));

underTest.execute(new TestComputationStepContext());

assertThat(measureRepository.getAddedRawMeasure(FILE_1_REF, DUPLICATIONS_DATA_KEY)).isPresent();
assertThat(measureRepository.getAddedRawMeasure(FILE_1_REF, DUPLICATIONS_DATA_KEY).get().getData()).isEqualTo(
"<duplications><g><b s=\"1\" l=\"5\" t=\"false\" r=\"" + FILE_1_KEY + "\"/><b s=\"6\" l=\"5\" t=\"false\" r=\""
+ FILE_1_KEY + "\"/></g></duplications>");
assertThat(measureRepository.getAddedRawMeasure(FILE_2_REF, DUPLICATIONS_DATA_KEY)).isNotPresent();
}

@Test
public void compute_duplications_on_different_files() {
duplicationRepository.addDuplication(FILE_1_REF, new TextBlock(1, 5), FILE_2_REF, new TextBlock(6, 10));

underTest.execute(new TestComputationStepContext());

assertThat(measureRepository.getAddedRawMeasure(FILE_1_REF, DUPLICATIONS_DATA_KEY)).isPresent();
assertThat(measureRepository.getAddedRawMeasure(FILE_1_REF, DUPLICATIONS_DATA_KEY).get().getData()).isEqualTo(
"<duplications><g><b s=\"1\" l=\"5\" t=\"false\" r=\"" + FILE_1_KEY + "\"/><b s=\"6\" l=\"5\" t=\"false\" r=\""
+ FILE_2_KEY + "\"/></g></duplications>");
assertThat(measureRepository.getAddedRawMeasure(FILE_2_REF, DUPLICATIONS_DATA_KEY)).isNotPresent();
}

@Test
public void compute_duplications_on_unchanged_file() {
duplicationRepository.addExtendedProjectDuplication(FILE_1_REF, new TextBlock(1, 5), FILE_2_REF, new TextBlock(6, 10));

underTest.execute(new TestComputationStepContext());

assertThat(measureRepository.getAddedRawMeasure(FILE_1_REF, DUPLICATIONS_DATA_KEY)).isPresent();
assertThat(measureRepository.getAddedRawMeasure(FILE_1_REF, DUPLICATIONS_DATA_KEY).get().getData()).isEqualTo(
"<duplications><g><b s=\"1\" l=\"5\" t=\"false\" r=\"" + FILE_1_KEY + "\"/><b s=\"6\" l=\"5\" t=\"true\" r=\""
+ FILE_2_KEY + "\"/></g></duplications>");
assertThat(measureRepository.getAddedRawMeasure(FILE_2_REF, DUPLICATIONS_DATA_KEY)).isNotPresent();
}

@Test
public void compute_duplications_on_different_projects() {
String fileKeyFromOtherProject = "PROJECT2_KEY:file2";
duplicationRepository.addCrossProjectDuplication(FILE_1_REF, new TextBlock(1, 5), fileKeyFromOtherProject, new TextBlock(6, 10));

underTest.execute(new TestComputationStepContext());

assertThat(measureRepository.getAddedRawMeasure(FILE_1_REF, DUPLICATIONS_DATA_KEY)).isPresent();
assertThat(measureRepository.getAddedRawMeasure(FILE_1_REF, DUPLICATIONS_DATA_KEY).get().getData()).isEqualTo(
"<duplications><g><b s=\"1\" l=\"5\" t=\"false\" r=\"" + FILE_1_KEY + "\"/><b s=\"6\" l=\"5\" t=\"false\" r=\""
+ fileKeyFromOtherProject + "\"/></g></duplications>");
assertThat(measureRepository.getAddedRawMeasure(FILE_2_REF, DUPLICATIONS_DATA_KEY)).isNotPresent();
}

}

+ 191
- 0
server/sonar-ce-task-projectanalysis/src/test/java/org/sonar/ce/task/projectanalysis/step/PersistDuplicationDataStepTest.java View File

@@ -0,0 +1,191 @@
/*
* SonarQube
* Copyright (C) 2009-2019 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.ce.task.projectanalysis.step;

import java.util.Optional;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.sonar.api.measures.Metric;
import org.sonar.api.utils.System2;
import org.sonar.ce.task.projectanalysis.analysis.MutableAnalysisMetadataHolderRule;
import org.sonar.ce.task.projectanalysis.component.TreeRootHolderRule;
import org.sonar.ce.task.projectanalysis.duplication.DuplicationRepositoryRule;
import org.sonar.ce.task.projectanalysis.duplication.TextBlock;
import org.sonar.ce.task.projectanalysis.measure.MeasureToMeasureDto;
import org.sonar.ce.task.projectanalysis.metric.MetricRepositoryRule;
import org.sonar.ce.task.step.ComputationStep;
import org.sonar.ce.task.step.TestComputationStepContext;
import org.sonar.db.DbTester;
import org.sonar.db.component.ComponentDto;
import org.sonar.db.measure.LiveMeasureDto;
import org.sonar.db.metric.MetricDto;

import static org.assertj.core.api.Assertions.assertThat;
import static org.sonar.api.measures.CoreMetrics.DUPLICATIONS_DATA_KEY;
import static org.sonar.ce.task.projectanalysis.component.Component.Type.FILE;
import static org.sonar.ce.task.projectanalysis.component.Component.Type.PROJECT;
import static org.sonar.ce.task.projectanalysis.component.ReportComponent.builder;

public class PersistDuplicationDataStepTest extends BaseStepTest {

private static final int ROOT_REF = 1;
private static final String PROJECT_KEY = "PROJECT_KEY";
private static final String PROJECT_UUID = "u1";

private static final int FILE_1_REF = 2;
private static final String FILE_1_KEY = "FILE_1_KEY";
private static final String FILE_1_UUID = "u2";

private static final int FILE_2_REF = 3;
private static final String FILE_2_KEY = "FILE_2_KEY";
private static final String FILE_2_UUID = "u3";

@Rule
public DbTester db = DbTester.create(System2.INSTANCE);
@Rule
public TreeRootHolderRule treeRootHolder = new TreeRootHolderRule()
.setRoot(
builder(PROJECT, ROOT_REF).setKey(PROJECT_KEY).setUuid(PROJECT_UUID)
.addChildren(
builder(FILE, FILE_1_REF).setKey(FILE_1_KEY).setUuid(FILE_1_UUID)
.build(),
builder(FILE, FILE_2_REF).setKey(FILE_2_KEY).setUuid(FILE_2_UUID)
.build())
.build());

@Rule
public MutableAnalysisMetadataHolderRule analysisMetadataHolder = new MutableAnalysisMetadataHolderRule();
@Rule
public DuplicationRepositoryRule duplicationRepository = DuplicationRepositoryRule.create(treeRootHolder);
@Rule
public MetricRepositoryRule metricRepository = new MetricRepositoryRule();

@Before
public void setUp() throws Exception {
MetricDto metric = db.measures().insertMetric(m -> m.setKey(DUPLICATIONS_DATA_KEY).setValueType(Metric.ValueType.STRING.name()));
insertComponent(PROJECT_KEY, PROJECT_UUID);
insertComponent(FILE_1_KEY, FILE_1_UUID);
insertComponent(FILE_2_KEY, FILE_2_UUID);
db.commit();
metricRepository.add(metric.getId(), new Metric.Builder(DUPLICATIONS_DATA_KEY, DUPLICATIONS_DATA_KEY, Metric.ValueType.STRING).create());
}

@Override
protected ComputationStep step() {
return underTest();
}

@Test
public void nothing_to_persist_when_no_duplication() {
TestComputationStepContext context = new TestComputationStepContext();

underTest().execute(context);

assertThatNothingPersisted();
verifyStatistics(context, 0);
}

@Test
public void compute_duplications_on_same_file() {
duplicationRepository.addDuplication(FILE_1_REF, new TextBlock(1, 5), new TextBlock(6, 10));
TestComputationStepContext context = new TestComputationStepContext();

underTest().execute(context);

assertThat(selectMeasureData(FILE_1_UUID)).hasValue("<duplications><g><b s=\"1\" l=\"5\" t=\"false\" r=\"" + FILE_1_KEY + "\"/><b s=\"6\" l=\"5\" t=\"false\" r=\""
+ FILE_1_KEY + "\"/></g></duplications>");
assertThat(selectMeasureData(FILE_2_UUID)).isEmpty();
assertThat(selectMeasureData(PROJECT_UUID)).isEmpty();
}

@Test
public void compute_duplications_on_different_files() {
duplicationRepository.addDuplication(FILE_1_REF, new TextBlock(1, 5), FILE_2_REF, new TextBlock(6, 10));
TestComputationStepContext context = new TestComputationStepContext();

underTest().execute(context);

assertThat(selectMeasureData(FILE_1_UUID)).hasValue(
"<duplications><g><b s=\"1\" l=\"5\" t=\"false\" r=\"" + FILE_1_KEY + "\"/><b s=\"6\" l=\"5\" t=\"false\" r=\""
+ FILE_2_KEY + "\"/></g></duplications>");
assertThat(selectMeasureData(FILE_2_UUID)).isEmpty();
assertThat(selectMeasureData(PROJECT_UUID)).isEmpty();
}

@Test
public void compute_duplications_on_unchanged_file() {
duplicationRepository.addExtendedProjectDuplication(FILE_1_REF, new TextBlock(1, 5), FILE_2_REF, new TextBlock(6, 10));
TestComputationStepContext context = new TestComputationStepContext();

underTest().execute(context);

assertThat(selectMeasureData(FILE_1_UUID)).hasValue(
"<duplications><g><b s=\"1\" l=\"5\" t=\"false\" r=\"" + FILE_1_KEY + "\"/><b s=\"6\" l=\"5\" t=\"true\" r=\""
+ FILE_2_KEY + "\"/></g></duplications>");
assertThat(selectMeasureData(FILE_2_UUID)).isEmpty();
assertThat(selectMeasureData(PROJECT_UUID)).isEmpty();
}

@Test
public void compute_duplications_on_different_projects() {
String fileKeyFromOtherProject = "PROJECT2_KEY:file2";
duplicationRepository.addCrossProjectDuplication(FILE_1_REF, new TextBlock(1, 5), fileKeyFromOtherProject, new TextBlock(6, 10));
TestComputationStepContext context = new TestComputationStepContext();

underTest().execute(context);

assertThat(selectMeasureData(FILE_1_UUID)).hasValue(
"<duplications><g><b s=\"1\" l=\"5\" t=\"false\" r=\"" + FILE_1_KEY + "\"/><b s=\"6\" l=\"5\" t=\"false\" r=\""
+ fileKeyFromOtherProject + "\"/></g></duplications>");
assertThat(selectMeasureData(FILE_2_UUID)).isEmpty();
assertThat(selectMeasureData(PROJECT_UUID)).isEmpty();
}

private PersistDuplicationDataStep underTest() {
return new PersistDuplicationDataStep(db.getDbClient(), treeRootHolder, metricRepository, duplicationRepository,
new MeasureToMeasureDto(analysisMetadataHolder, treeRootHolder));
}

private void assertThatNothingPersisted() {
assertThat(db.countRowsOfTable(db.getSession(), "live_measures")).isEqualTo(0);
}

private Optional<String> selectMeasureData(String componentUuid) {
return db.getDbClient().liveMeasureDao().selectMeasure(db.getSession(), componentUuid, "duplications_data")
.map(LiveMeasureDto::getTextValue);
}

private ComponentDto insertComponent(String key, String uuid) {
ComponentDto componentDto = new ComponentDto()
.setOrganizationUuid("org1")
.setDbKey(key)
.setUuid(uuid)
.setUuidPath(uuid + ".")
.setRootUuid(uuid)
.setProjectUuid(uuid);
db.getDbClient().componentDao().insert(db.getSession(), componentDto);
return componentDto;
}

private static void verifyStatistics(TestComputationStepContext context, int expectedInsertsOrUpdates) {
context.getStatistics().assertValue("insertsOrUpdates", expectedInsertsOrUpdates);
}
}

Loading…
Cancel
Save