]> source.dussan.org Git - sonarqube.git/commitdiff
SONAR-5801 Create search index structure and initial indexation method for source...
authorJean-Baptiste Lievremont <jean-baptiste.lievremont@sonarsource.com>
Wed, 19 Nov 2014 16:57:41 +0000 (17:57 +0100)
committerJean-Baptiste Lievremont <jean-baptiste.lievremont@sonarsource.com>
Fri, 21 Nov 2014 11:31:43 +0000 (12:31 +0100)
pom.xml
server/sonar-server/pom.xml
server/sonar-server/src/main/java/org/sonar/server/platform/ServerComponents.java
server/sonar-server/src/main/java/org/sonar/server/source/index/SourceLineDoc.java [new file with mode: 0644]
server/sonar-server/src/main/java/org/sonar/server/source/index/SourceLineIndexDefinition.java [new file with mode: 0644]
server/sonar-server/src/main/java/org/sonar/server/source/index/SourceLineIndexer.java [new file with mode: 0644]
server/sonar-server/src/main/java/org/sonar/server/source/index/SourceLineResultSetIterator.java [new file with mode: 0644]

diff --git a/pom.xml b/pom.xml
index 95a61e66533f4021314e8a34ab7d374d969dd673..e10fbdcf014d98c65c38eae04073ed20759113e7 100644 (file)
--- a/pom.xml
+++ b/pom.xml
         <artifactId>commons-io</artifactId>
         <version>2.4</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-csv</artifactId>
+       <version>1.0</version>
+      </dependency>
       <dependency>
         <groupId>commons-codec</groupId>
         <artifactId>commons-codec</artifactId>
index 4c826b8542f5a383c8a29b6960a3e3289de009ff..96a59b97e9746e2746ed9d82ce5308c4202f5ea6 100644 (file)
       <groupId>commons-io</groupId>
       <artifactId>commons-io</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-csv</artifactId>
+    </dependency>
     <dependency>
       <groupId>commons-lang</groupId>
       <artifactId>commons-lang</artifactId>
index 9702cf63942e9c411e4f703bbf0813024d2a638e..dd0a25b7c1b756aa66407cc853a0c092ebe78869 100644 (file)
@@ -162,6 +162,8 @@ import org.sonar.server.source.CodeColorizers;
 import org.sonar.server.source.DeprecatedSourceDecorator;
 import org.sonar.server.source.HtmlSourceDecorator;
 import org.sonar.server.source.SourceService;
+import org.sonar.server.source.index.SourceLineIndexDefinition;
+import org.sonar.server.source.index.SourceLineIndexer;
 import org.sonar.server.source.ws.*;
 import org.sonar.server.source.ws.ShowAction;
 import org.sonar.server.startup.*;
@@ -548,6 +550,8 @@ class ServerComponents {
     pico.addSingleton(ScmWriter.class);
     pico.addSingleton(RawAction.class);
     pico.addSingleton(ScmAction.class);
+    pico.addSingleton(SourceLineIndexDefinition.class);
+    pico.addSingleton(SourceLineIndexer.class);
 
     // Duplications
     pico.addSingleton(DuplicationsParser.class);
diff --git a/server/sonar-server/src/main/java/org/sonar/server/source/index/SourceLineDoc.java b/server/sonar-server/src/main/java/org/sonar/server/source/index/SourceLineDoc.java
new file mode 100644 (file)
index 0000000..225aa26
--- /dev/null
@@ -0,0 +1,109 @@
+/*
+ * SonarQube, open source software quality management tool.
+ * Copyright (C) 2008-2014 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * SonarQube is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * SonarQube is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+package org.sonar.server.source.index;
+
+import org.sonar.server.search.BaseDoc;
+import org.sonar.server.search.BaseNormalizer;
+
+import java.util.Date;
+import java.util.Map;
+
+public class SourceLineDoc extends BaseDoc {
+
+  public SourceLineDoc(Map<String, Object> fields) {
+    super(fields);
+  }
+
+  public String projectUuid() {
+    return getField(SourceLineIndexDefinition.FIELD_PROJECT_UUID);
+  }
+
+  public void setProjectUuid(String projectUuid) {
+    setField(SourceLineIndexDefinition.FIELD_PROJECT_UUID, projectUuid);
+  }
+
+  public String fileUuid() {
+    return getField(SourceLineIndexDefinition.FIELD_FILE_UUID);
+  }
+
+  public void setFileUuid(String fileUuid) {
+    setField(SourceLineIndexDefinition.FIELD_FILE_UUID, fileUuid);
+  }
+
+  public int line() {
+    return getField(SourceLineIndexDefinition.FIELD_LINE);
+  }
+
+  public void setLine(int line) {
+    setField(SourceLineIndexDefinition.FIELD_LINE, line);
+  }
+
+  public String scmRevision() {
+    return getField(SourceLineIndexDefinition.FIELD_SCM_REVISION);
+  }
+
+  public void setScmRevision(String scmRevision) {
+    setField(SourceLineIndexDefinition.FIELD_SCM_REVISION, scmRevision);
+  }
+
+  public String scmAuthor() {
+    return getField(SourceLineIndexDefinition.FIELD_SCM_AUTHOR);
+  }
+
+  public void setScmAuthor(String scmAuthor) {
+    setField(SourceLineIndexDefinition.FIELD_SCM_AUTHOR, scmAuthor);
+  }
+
+  public Date scmDate() {
+    return getField(SourceLineIndexDefinition.FIELD_SCM_DATE);
+  }
+
+  public void setScmDate(Date scmDate) {
+    setField(SourceLineIndexDefinition.FIELD_SCM_DATE, scmDate);
+  }
+
+  public String highlighting() {
+    return getField(SourceLineIndexDefinition.FIELD_HIGHLIGHTING);
+  }
+
+  public void setHighlighting(String fileUuid) {
+    setField(SourceLineIndexDefinition.FIELD_FILE_UUID, fileUuid);
+  }
+
+  public String source() {
+    return getField(SourceLineIndexDefinition.FIELD_SOURCE);
+  }
+
+  public void setSource(String source) {
+    setField(SourceLineIndexDefinition.FIELD_SOURCE, source);
+  }
+
+  public Date updateDate() {
+    return getField(BaseNormalizer.UPDATED_AT_FIELD);
+  }
+
+  public void setUpdateDate(Date updatedAt) {
+    setField(BaseNormalizer.UPDATED_AT_FIELD, updatedAt);
+  }
+
+  public String key() {
+    return String.format("%s_%d", fileUuid(), line());
+  }
+}
diff --git a/server/sonar-server/src/main/java/org/sonar/server/source/index/SourceLineIndexDefinition.java b/server/sonar-server/src/main/java/org/sonar/server/source/index/SourceLineIndexDefinition.java
new file mode 100644 (file)
index 0000000..c473e62
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * SonarQube, open source software quality management tool.
+ * Copyright (C) 2008-2014 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * SonarQube is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * SonarQube is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+package org.sonar.server.source.index;
+
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.sonar.api.config.Settings;
+import org.sonar.process.ProcessConstants;
+import org.sonar.server.es.IndexDefinition;
+import org.sonar.server.es.NewIndex;
+import org.sonar.server.search.BaseNormalizer;
+
+public class SourceLineIndexDefinition implements IndexDefinition {
+
+  public static final String FIELD_PROJECT_UUID = "projectUuid";
+  public static final String FIELD_FILE_UUID = "fileUuid";
+  public static final String FIELD_LINE = "line";
+  public static final String FIELD_SCM_REVISION = "scm_revision";
+  public static final String FIELD_SCM_AUTHOR = "scm_author";
+  public static final String FIELD_SCM_DATE = "scm_date";
+  public static final String FIELD_HIGHLIGHTING = "highlighting";
+  public static final String FIELD_SOURCE = "source";
+
+  public static final String INDEX_SOURCE_LINES = "sourcelines";
+
+  public static final String TYPE_SOURCE_LINE = "sourceLine";
+
+
+  private final Settings settings;
+
+  public SourceLineIndexDefinition(Settings settings) {
+    this.settings = settings;
+  }
+
+  @Override
+  public void define(IndexDefinitionContext context) {
+    NewIndex index = context.create(INDEX_SOURCE_LINES);
+
+    // shards
+    boolean clusterMode = settings.getBoolean(ProcessConstants.CLUSTER_ACTIVATE);
+    if (clusterMode) {
+      index.getSettings().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 4);
+      index.getSettings().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1);
+      // else keep defaults (one shard)
+    }
+
+    // type "sourceLine"
+    NewIndex.NewIndexType sourceLineMapping = index.createType(TYPE_SOURCE_LINE);
+    sourceLineMapping.stringFieldBuilder(FIELD_PROJECT_UUID).build();
+    sourceLineMapping.stringFieldBuilder(FIELD_FILE_UUID).build();
+    sourceLineMapping.createIntegerField(FIELD_LINE);
+    sourceLineMapping.stringFieldBuilder(FIELD_SCM_REVISION).build();
+    sourceLineMapping.stringFieldBuilder(FIELD_SCM_AUTHOR).build();
+    sourceLineMapping.createDateTimeField(FIELD_SCM_DATE);
+    sourceLineMapping.stringFieldBuilder(FIELD_HIGHLIGHTING).build();
+    sourceLineMapping.stringFieldBuilder(FIELD_SOURCE).build();
+    sourceLineMapping.createDateTimeField(BaseNormalizer.UPDATED_AT_FIELD);
+  }
+}
diff --git a/server/sonar-server/src/main/java/org/sonar/server/source/index/SourceLineIndexer.java b/server/sonar-server/src/main/java/org/sonar/server/source/index/SourceLineIndexer.java
new file mode 100644 (file)
index 0000000..27227fa
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * SonarQube, open source software quality management tool.
+ * Copyright (C) 2008-2014 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * SonarQube is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * SonarQube is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+package org.sonar.server.source.index;
+
+import org.elasticsearch.action.update.UpdateRequest;
+import org.sonar.api.ServerComponent;
+import org.sonar.core.persistence.DbSession;
+import org.sonar.server.db.DbClient;
+import org.sonar.server.es.BulkIndexer;
+import org.sonar.server.es.EsClient;
+import org.sonar.server.es.IssueIndexDefinition;
+
+import java.sql.Connection;
+import java.util.Collection;
+import java.util.Iterator;
+
+/**
+ * Not thread-safe
+ */
+public class SourceLineIndexer implements ServerComponent {
+
+  private final DbClient dbClient;
+  private final EsClient esClient;
+  private long lastUpdatedAt = 0L;
+
+  public SourceLineIndexer(DbClient dbClient, EsClient esClient) {
+    this.dbClient = dbClient;
+    this.esClient = esClient;
+  }
+
+  public void indexSourceLines(boolean large) {
+    // TODO support timezones
+    final BulkIndexer bulk = new BulkIndexer(esClient, SourceLineIndexDefinition.INDEX_SOURCE_LINES);
+    bulk.setLarge(large);
+
+    DbSession dbSession = dbClient.openSession(false);
+    Connection dbConnection = dbSession.getConnection();
+    try {
+      SourceLineResultSetIterator rowIt = SourceLineResultSetIterator.create(dbClient, dbConnection, getLastUpdatedAt());
+      indexSourceLines(bulk, rowIt);
+      rowIt.close();
+
+    } finally {
+      dbSession.close();
+    }
+  }
+
+  public void indexSourceLines(BulkIndexer bulk, Iterator<Collection<SourceLineDoc>> sourceLines) {
+    bulk.start();
+    while (sourceLines.hasNext()) {
+      Collection<SourceLineDoc> lineDocs = sourceLines.next();
+      for (SourceLineDoc sourceLine: lineDocs) {
+        bulk.add(newUpsertRequest(sourceLine));
+        long dtoUpdatedAt = sourceLine.updateDate().getTime();
+        if (lastUpdatedAt < dtoUpdatedAt) {
+          lastUpdatedAt = dtoUpdatedAt;
+        }
+      }
+
+    }
+    bulk.stop();
+  }
+
+  private long getLastUpdatedAt() {
+    long result;
+    if (lastUpdatedAt <= 0L) {
+      // request ES to get the max(updatedAt)
+      result = esClient.getLastUpdatedAt(SourceLineIndexDefinition.INDEX_SOURCE_LINES, SourceLineIndexDefinition.TYPE_SOURCE_LINE);
+    } else {
+      // use cache. Will not work with Tomcat cluster.
+      result = lastUpdatedAt;
+    }
+    return result;
+  }
+
+  private UpdateRequest newUpsertRequest(SourceLineDoc lineDoc) {
+    String projectUuid = lineDoc.projectUuid();
+    return new UpdateRequest(IssueIndexDefinition.INDEX_ISSUES, IssueIndexDefinition.TYPE_ISSUE, lineDoc.key())
+      .routing(projectUuid)
+      .doc(lineDoc.getFields())
+      .upsert(lineDoc.getFields());
+  }
+}
diff --git a/server/sonar-server/src/main/java/org/sonar/server/source/index/SourceLineResultSetIterator.java b/server/sonar-server/src/main/java/org/sonar/server/source/index/SourceLineResultSetIterator.java
new file mode 100644 (file)
index 0000000..47cb2ef
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ * SonarQube, open source software quality management tool.
+ * Copyright (C) 2008-2014 SonarSource
+ * mailto:contact AT sonarsource DOT com
+ *
+ * SonarQube is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * SonarQube is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+package org.sonar.server.source.index;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
+import org.sonar.api.utils.DateUtils;
+import org.sonar.server.db.DbClient;
+import org.sonar.server.db.ResultSetIterator;
+import org.sonar.server.db.migrations.SqlUtil;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.sql.*;
+import java.util.Collection;
+import java.util.Date;
+import java.util.List;
+
+/**
+ * Scroll over table ISSUES and directly read the maps required to
+ * post index requests
+ */
+class SourceLineResultSetIterator extends ResultSetIterator<Collection<SourceLineDoc>> {
+
+  private static final String[] FIELDS = {
+    // column 1
+    "project_uuid",
+    "file_uuid",
+    "created_at",
+    "updated_at",
+    "data",
+    "data_hash"
+  };
+
+  private static final String SQL_ALL = "select " + StringUtils.join(FIELDS, ",") + " from file_sources";
+
+  private static final String SQL_AFTER_DATE = SQL_ALL + " where i.updated_at>=?";
+
+  static SourceLineResultSetIterator create(DbClient dbClient, Connection connection, long afterDate) {
+    try {
+      String sql = afterDate > 0L ? SQL_AFTER_DATE : SQL_ALL;
+      PreparedStatement stmt = dbClient.newScrollingSelectStatement(connection, sql);
+      if (afterDate > 0L) {
+        stmt.setTimestamp(0, new Timestamp(afterDate));
+      }
+      return new SourceLineResultSetIterator(stmt);
+    } catch (SQLException e) {
+      throw new IllegalStateException("Fail to prepare SQL request to select all issues", e);
+    }
+  }
+
+  private SourceLineResultSetIterator(PreparedStatement stmt) throws SQLException {
+    super(stmt);
+  }
+
+  @Override
+  protected Collection<SourceLineDoc> read(ResultSet rs) throws SQLException {
+
+    String projectUuid = rs.getString(1);
+    String fileUuid = rs.getString(2);
+    // createdAt = rs.getDate(3);
+    Date updatedAt = SqlUtil.getDate(rs, 4);
+    Reader dataStream = rs.getClob(5).getCharacterStream();
+    // String dataHash = rs.getString(6);
+
+    int line = 1;
+    List<SourceLineDoc> lines = Lists.newArrayList();
+    CSVParser csvParser = null;
+    try {
+      csvParser = new CSVParser(dataStream, CSVFormat.DEFAULT);
+
+      for(CSVRecord record: csvParser) {
+        SourceLineDoc doc = new SourceLineDoc(Maps.<String, Object>newHashMapWithExpectedSize(8));
+  
+        doc.setProjectUuid(projectUuid);
+        doc.setFileUuid(fileUuid);
+        doc.setLine(line ++);
+        doc.setUpdateDate(updatedAt);
+        doc.setScmRevision(record.get(0));
+        doc.setScmAuthor(record.get(1));
+        doc.setScmDate(DateUtils.parseDateTimeQuietly(record.get(2)));
+        doc.setHighlighting(record.get(3));
+        doc.setSource(record.get(4));
+      }
+    } catch(IOException ioError) {
+      throw new IllegalStateException(
+        String.format("Impossible to parse source line data, stuck at line %d", line), ioError);
+    } finally {
+      IOUtils.closeQuietly(csvParser);
+      IOUtils.closeQuietly(dataStream);
+    }
+
+    return lines;
+  }
+}