SONAR-5801 Create search index structure and initial indexation method for source lines

This commit is contained in:
Jean-Baptiste Lievremont 2014-11-19 17:57:41 +01:00
parent 3d927716ac
commit 8626de845c
7 changed files with 414 additions and 0 deletions

View File

@ -695,6 +695,11 @@
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>

View File

@ -137,6 +137,10 @@
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>

View File

@ -162,6 +162,8 @@ import org.sonar.server.source.CodeColorizers;
import org.sonar.server.source.DeprecatedSourceDecorator;
import org.sonar.server.source.HtmlSourceDecorator;
import org.sonar.server.source.SourceService;
import org.sonar.server.source.index.SourceLineIndexDefinition;
import org.sonar.server.source.index.SourceLineIndexer;
import org.sonar.server.source.ws.*;
import org.sonar.server.source.ws.ShowAction;
import org.sonar.server.startup.*;
@ -548,6 +550,8 @@ class ServerComponents {
pico.addSingleton(ScmWriter.class);
pico.addSingleton(RawAction.class);
pico.addSingleton(ScmAction.class);
pico.addSingleton(SourceLineIndexDefinition.class);
pico.addSingleton(SourceLineIndexer.class);
// Duplications
pico.addSingleton(DuplicationsParser.class);

View File

@ -0,0 +1,109 @@
/*
* SonarQube, open source software quality management tool.
* Copyright (C) 2008-2014 SonarSource
* mailto:contact AT sonarsource DOT com
*
* SonarQube is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* SonarQube is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.server.source.index;
import org.sonar.server.search.BaseDoc;
import org.sonar.server.search.BaseNormalizer;
import java.util.Date;
import java.util.Map;
public class SourceLineDoc extends BaseDoc {
public SourceLineDoc(Map<String, Object> fields) {
super(fields);
}
public String projectUuid() {
return getField(SourceLineIndexDefinition.FIELD_PROJECT_UUID);
}
public void setProjectUuid(String projectUuid) {
setField(SourceLineIndexDefinition.FIELD_PROJECT_UUID, projectUuid);
}
public String fileUuid() {
return getField(SourceLineIndexDefinition.FIELD_FILE_UUID);
}
public void setFileUuid(String fileUuid) {
setField(SourceLineIndexDefinition.FIELD_FILE_UUID, fileUuid);
}
public int line() {
return getField(SourceLineIndexDefinition.FIELD_LINE);
}
public void setLine(int line) {
setField(SourceLineIndexDefinition.FIELD_LINE, line);
}
public String scmRevision() {
return getField(SourceLineIndexDefinition.FIELD_SCM_REVISION);
}
public void setScmRevision(String scmRevision) {
setField(SourceLineIndexDefinition.FIELD_SCM_REVISION, scmRevision);
}
public String scmAuthor() {
return getField(SourceLineIndexDefinition.FIELD_SCM_AUTHOR);
}
public void setScmAuthor(String scmAuthor) {
setField(SourceLineIndexDefinition.FIELD_SCM_AUTHOR, scmAuthor);
}
public Date scmDate() {
return getField(SourceLineIndexDefinition.FIELD_SCM_DATE);
}
public void setScmDate(Date scmDate) {
setField(SourceLineIndexDefinition.FIELD_SCM_DATE, scmDate);
}
public String highlighting() {
return getField(SourceLineIndexDefinition.FIELD_HIGHLIGHTING);
}
public void setHighlighting(String fileUuid) {
setField(SourceLineIndexDefinition.FIELD_FILE_UUID, fileUuid);
}
public String source() {
return getField(SourceLineIndexDefinition.FIELD_SOURCE);
}
public void setSource(String source) {
setField(SourceLineIndexDefinition.FIELD_SOURCE, source);
}
public Date updateDate() {
return getField(BaseNormalizer.UPDATED_AT_FIELD);
}
public void setUpdateDate(Date updatedAt) {
setField(BaseNormalizer.UPDATED_AT_FIELD, updatedAt);
}
public String key() {
return String.format("%s_%d", fileUuid(), line());
}
}

View File

@ -0,0 +1,75 @@
/*
* SonarQube, open source software quality management tool.
* Copyright (C) 2008-2014 SonarSource
* mailto:contact AT sonarsource DOT com
*
* SonarQube is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* SonarQube is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.server.source.index;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.sonar.api.config.Settings;
import org.sonar.process.ProcessConstants;
import org.sonar.server.es.IndexDefinition;
import org.sonar.server.es.NewIndex;
import org.sonar.server.search.BaseNormalizer;
public class SourceLineIndexDefinition implements IndexDefinition {
public static final String FIELD_PROJECT_UUID = "projectUuid";
public static final String FIELD_FILE_UUID = "fileUuid";
public static final String FIELD_LINE = "line";
public static final String FIELD_SCM_REVISION = "scm_revision";
public static final String FIELD_SCM_AUTHOR = "scm_author";
public static final String FIELD_SCM_DATE = "scm_date";
public static final String FIELD_HIGHLIGHTING = "highlighting";
public static final String FIELD_SOURCE = "source";
public static final String INDEX_SOURCE_LINES = "sourcelines";
public static final String TYPE_SOURCE_LINE = "sourceLine";
private final Settings settings;
public SourceLineIndexDefinition(Settings settings) {
this.settings = settings;
}
@Override
public void define(IndexDefinitionContext context) {
NewIndex index = context.create(INDEX_SOURCE_LINES);
// shards
boolean clusterMode = settings.getBoolean(ProcessConstants.CLUSTER_ACTIVATE);
if (clusterMode) {
index.getSettings().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 4);
index.getSettings().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1);
// else keep defaults (one shard)
}
// type "sourceLine"
NewIndex.NewIndexType sourceLineMapping = index.createType(TYPE_SOURCE_LINE);
sourceLineMapping.stringFieldBuilder(FIELD_PROJECT_UUID).build();
sourceLineMapping.stringFieldBuilder(FIELD_FILE_UUID).build();
sourceLineMapping.createIntegerField(FIELD_LINE);
sourceLineMapping.stringFieldBuilder(FIELD_SCM_REVISION).build();
sourceLineMapping.stringFieldBuilder(FIELD_SCM_AUTHOR).build();
sourceLineMapping.createDateTimeField(FIELD_SCM_DATE);
sourceLineMapping.stringFieldBuilder(FIELD_HIGHLIGHTING).build();
sourceLineMapping.stringFieldBuilder(FIELD_SOURCE).build();
sourceLineMapping.createDateTimeField(BaseNormalizer.UPDATED_AT_FIELD);
}
}

View File

@ -0,0 +1,100 @@
/*
* SonarQube, open source software quality management tool.
* Copyright (C) 2008-2014 SonarSource
* mailto:contact AT sonarsource DOT com
*
* SonarQube is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* SonarQube is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.server.source.index;
import org.elasticsearch.action.update.UpdateRequest;
import org.sonar.api.ServerComponent;
import org.sonar.core.persistence.DbSession;
import org.sonar.server.db.DbClient;
import org.sonar.server.es.BulkIndexer;
import org.sonar.server.es.EsClient;
import org.sonar.server.es.IssueIndexDefinition;
import java.sql.Connection;
import java.util.Collection;
import java.util.Iterator;
/**
* Not thread-safe
*/
public class SourceLineIndexer implements ServerComponent {
private final DbClient dbClient;
private final EsClient esClient;
private long lastUpdatedAt = 0L;
public SourceLineIndexer(DbClient dbClient, EsClient esClient) {
this.dbClient = dbClient;
this.esClient = esClient;
}
public void indexSourceLines(boolean large) {
// TODO support timezones
final BulkIndexer bulk = new BulkIndexer(esClient, SourceLineIndexDefinition.INDEX_SOURCE_LINES);
bulk.setLarge(large);
DbSession dbSession = dbClient.openSession(false);
Connection dbConnection = dbSession.getConnection();
try {
SourceLineResultSetIterator rowIt = SourceLineResultSetIterator.create(dbClient, dbConnection, getLastUpdatedAt());
indexSourceLines(bulk, rowIt);
rowIt.close();
} finally {
dbSession.close();
}
}
public void indexSourceLines(BulkIndexer bulk, Iterator<Collection<SourceLineDoc>> sourceLines) {
bulk.start();
while (sourceLines.hasNext()) {
Collection<SourceLineDoc> lineDocs = sourceLines.next();
for (SourceLineDoc sourceLine: lineDocs) {
bulk.add(newUpsertRequest(sourceLine));
long dtoUpdatedAt = sourceLine.updateDate().getTime();
if (lastUpdatedAt < dtoUpdatedAt) {
lastUpdatedAt = dtoUpdatedAt;
}
}
}
bulk.stop();
}
private long getLastUpdatedAt() {
long result;
if (lastUpdatedAt <= 0L) {
// request ES to get the max(updatedAt)
result = esClient.getLastUpdatedAt(SourceLineIndexDefinition.INDEX_SOURCE_LINES, SourceLineIndexDefinition.TYPE_SOURCE_LINE);
} else {
// use cache. Will not work with Tomcat cluster.
result = lastUpdatedAt;
}
return result;
}
private UpdateRequest newUpsertRequest(SourceLineDoc lineDoc) {
String projectUuid = lineDoc.projectUuid();
return new UpdateRequest(IssueIndexDefinition.INDEX_ISSUES, IssueIndexDefinition.TYPE_ISSUE, lineDoc.key())
.routing(projectUuid)
.doc(lineDoc.getFields())
.upsert(lineDoc.getFields());
}
}

View File

@ -0,0 +1,117 @@
/*
* SonarQube, open source software quality management tool.
* Copyright (C) 2008-2014 SonarSource
* mailto:contact AT sonarsource DOT com
*
* SonarQube is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* SonarQube is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.server.source.index;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.sonar.api.utils.DateUtils;
import org.sonar.server.db.DbClient;
import org.sonar.server.db.ResultSetIterator;
import org.sonar.server.db.migrations.SqlUtil;
import java.io.IOException;
import java.io.Reader;
import java.sql.*;
import java.util.Collection;
import java.util.Date;
import java.util.List;
/**
* Scroll over table ISSUES and directly read the maps required to
* post index requests
*/
class SourceLineResultSetIterator extends ResultSetIterator<Collection<SourceLineDoc>> {
private static final String[] FIELDS = {
// column 1
"project_uuid",
"file_uuid",
"created_at",
"updated_at",
"data",
"data_hash"
};
private static final String SQL_ALL = "select " + StringUtils.join(FIELDS, ",") + " from file_sources";
private static final String SQL_AFTER_DATE = SQL_ALL + " where i.updated_at>=?";
static SourceLineResultSetIterator create(DbClient dbClient, Connection connection, long afterDate) {
try {
String sql = afterDate > 0L ? SQL_AFTER_DATE : SQL_ALL;
PreparedStatement stmt = dbClient.newScrollingSelectStatement(connection, sql);
if (afterDate > 0L) {
stmt.setTimestamp(0, new Timestamp(afterDate));
}
return new SourceLineResultSetIterator(stmt);
} catch (SQLException e) {
throw new IllegalStateException("Fail to prepare SQL request to select all issues", e);
}
}
private SourceLineResultSetIterator(PreparedStatement stmt) throws SQLException {
super(stmt);
}
@Override
protected Collection<SourceLineDoc> read(ResultSet rs) throws SQLException {
String projectUuid = rs.getString(1);
String fileUuid = rs.getString(2);
// createdAt = rs.getDate(3);
Date updatedAt = SqlUtil.getDate(rs, 4);
Reader dataStream = rs.getClob(5).getCharacterStream();
// String dataHash = rs.getString(6);
int line = 1;
List<SourceLineDoc> lines = Lists.newArrayList();
CSVParser csvParser = null;
try {
csvParser = new CSVParser(dataStream, CSVFormat.DEFAULT);
for(CSVRecord record: csvParser) {
SourceLineDoc doc = new SourceLineDoc(Maps.<String, Object>newHashMapWithExpectedSize(8));
doc.setProjectUuid(projectUuid);
doc.setFileUuid(fileUuid);
doc.setLine(line ++);
doc.setUpdateDate(updatedAt);
doc.setScmRevision(record.get(0));
doc.setScmAuthor(record.get(1));
doc.setScmDate(DateUtils.parseDateTimeQuietly(record.get(2)));
doc.setHighlighting(record.get(3));
doc.setSource(record.get(4));
}
} catch(IOException ioError) {
throw new IllegalStateException(
String.format("Impossible to parse source line data, stuck at line %d", line), ioError);
} finally {
IOUtils.closeQuietly(csvParser);
IOUtils.closeQuietly(dataStream);
}
return lines;
}
}