*/ | */ | ||||
package org.sonar.batch.index; | package org.sonar.batch.index; | ||||
import org.apache.commons.codec.binary.Hex; | |||||
import org.apache.commons.codec.digest.DigestUtils; | import org.apache.commons.codec.digest.DigestUtils; | ||||
import org.apache.ibatis.session.ResultContext; | import org.apache.ibatis.session.ResultContext; | ||||
import org.apache.ibatis.session.ResultHandler; | import org.apache.ibatis.session.ResultHandler; | ||||
import org.sonar.api.batch.fs.internal.DefaultInputFile; | import org.sonar.api.batch.fs.internal.DefaultInputFile; | ||||
import org.sonar.api.utils.System2; | import org.sonar.api.utils.System2; | ||||
import org.sonar.batch.ProjectTree; | import org.sonar.batch.ProjectTree; | ||||
import org.sonar.batch.scan.filesystem.FileMetadata; | |||||
import org.sonar.batch.scan.filesystem.FileMetadata.LineHashConsumer; | |||||
import org.sonar.batch.scan.filesystem.InputFileMetadata; | import org.sonar.batch.scan.filesystem.InputFileMetadata; | ||||
import org.sonar.batch.scan.filesystem.InputPathCache; | import org.sonar.batch.scan.filesystem.InputPathCache; | ||||
import org.sonar.core.persistence.DbSession; | import org.sonar.core.persistence.DbSession; | ||||
import org.sonar.core.source.db.FileSourceMapper; | import org.sonar.core.source.db.FileSourceMapper; | ||||
import javax.annotation.CheckForNull; | import javax.annotation.CheckForNull; | ||||
import javax.annotation.Nullable; | |||||
import java.io.BufferedReader; | |||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.nio.file.Files; | |||||
import java.util.HashMap; | import java.util.HashMap; | ||||
import java.util.Map; | import java.util.Map; | ||||
.setBinaryData(data) | .setBinaryData(data) | ||||
.setDataHash(dataHash) | .setDataHash(dataHash) | ||||
.setSrcHash(metadata.hash()) | .setSrcHash(metadata.hash()) | ||||
.setLineHashes(lineHashesAsMd5Hex(inputFile, metadata)) | |||||
.setLineHashes(lineHashesAsMd5Hex(inputFile)) | |||||
.setCreatedAt(system2.now()) | .setCreatedAt(system2.now()) | ||||
.setUpdatedAt(system2.now()); | .setUpdatedAt(system2.now()); | ||||
mapper.insert(dto); | mapper.insert(dto); | ||||
.setBinaryData(data) | .setBinaryData(data) | ||||
.setDataHash(dataHash) | .setDataHash(dataHash) | ||||
.setSrcHash(metadata.hash()) | .setSrcHash(metadata.hash()) | ||||
.setLineHashes(lineHashesAsMd5Hex(inputFile, metadata)) | |||||
.setLineHashes(lineHashesAsMd5Hex(inputFile)) | |||||
.setUpdatedAt(system2.now()); | .setUpdatedAt(system2.now()); | ||||
mapper.update(previousDto); | mapper.update(previousDto); | ||||
session.commit(); | session.commit(); | ||||
} | } | ||||
@CheckForNull | @CheckForNull | ||||
private String lineHashesAsMd5Hex(DefaultInputFile f, InputFileMetadata metadata) { | |||||
private String lineHashesAsMd5Hex(DefaultInputFile f) { | |||||
if (f.lines() == 0) { | if (f.lines() == 0) { | ||||
return null; | return null; | ||||
} | } | ||||
// A md5 string is 32 char long + '\n' = 33 | // A md5 string is 32 char long + '\n' = 33 | ||||
StringBuilder result = new StringBuilder(f.lines() * (32 + 1)); | |||||
final StringBuilder result = new StringBuilder(f.lines() * (32 + 1)); | |||||
try { | |||||
BufferedReader reader = Files.newBufferedReader(f.path(), f.charset()); | |||||
StringBuilder sb = new StringBuilder(); | |||||
for (int i = 0; i < f.lines(); i++) { | |||||
String lineStr = reader.readLine(); | |||||
lineStr = lineStr == null ? "" : lineStr; | |||||
for (int j = 0; j < lineStr.length(); j++) { | |||||
char c = lineStr.charAt(j); | |||||
if (!Character.isWhitespace(c)) { | |||||
sb.append(c); | |||||
} | |||||
} | |||||
if (i > 0) { | |||||
FileMetadata.computeLineHashesForIssueTracking(f, new LineHashConsumer() { | |||||
@Override | |||||
public void consume(int lineIdx, @Nullable byte[] hash) { | |||||
if (lineIdx > 0) { | |||||
result.append("\n"); | result.append("\n"); | ||||
} | } | ||||
result.append(sb.length() > 0 ? DigestUtils.md5Hex(sb.toString()) : ""); | |||||
sb.setLength(0); | |||||
result.append(hash != null ? Hex.encodeHexString(hash) : ""); | |||||
} | } | ||||
} catch (Exception e) { | |||||
throw new IllegalStateException("Unable to compute line hashes of file " + f, e); | |||||
} | |||||
}); | |||||
return result.toString(); | return result.toString(); | ||||
} | } |
*/ | */ | ||||
package org.sonar.batch.issue.tracking; | package org.sonar.batch.issue.tracking; | ||||
import com.google.common.base.Charsets; | |||||
import com.google.common.collect.LinkedHashMultimap; | import com.google.common.collect.LinkedHashMultimap; | ||||
import com.google.common.collect.Multimap; | import com.google.common.collect.Multimap; | ||||
import org.apache.commons.codec.binary.Hex; | import org.apache.commons.codec.binary.Hex; | ||||
import org.apache.commons.codec.digest.DigestUtils; | |||||
import org.apache.commons.lang.ObjectUtils; | import org.apache.commons.lang.ObjectUtils; | ||||
import org.sonar.api.batch.fs.internal.DefaultInputFile; | import org.sonar.api.batch.fs.internal.DefaultInputFile; | ||||
import org.sonar.batch.scan.filesystem.FileMetadata; | |||||
import org.sonar.batch.scan.filesystem.FileMetadata.LineHashConsumer; | |||||
import javax.annotation.Nullable; | |||||
import java.io.BufferedReader; | |||||
import java.nio.file.Files; | |||||
import java.security.MessageDigest; | |||||
import java.util.Collection; | import java.util.Collection; | ||||
/** | /** | ||||
} | } | ||||
public static FileHashes create(DefaultInputFile f) { | public static FileHashes create(DefaultInputFile f) { | ||||
byte[][] hashes = new byte[f.lines()][]; | |||||
try { | |||||
BufferedReader reader = Files.newBufferedReader(f.path(), f.charset()); | |||||
MessageDigest lineMd5Digest = DigestUtils.getMd5Digest(); | |||||
StringBuilder sb = new StringBuilder(); | |||||
for (int i = 0; i < f.lines(); i++) { | |||||
String lineStr = reader.readLine(); | |||||
if (lineStr != null) { | |||||
for (int j = 0; j < lineStr.length(); j++) { | |||||
char c = lineStr.charAt(j); | |||||
if (!Character.isWhitespace(c)) { | |||||
sb.append(c); | |||||
} | |||||
} | |||||
} | |||||
hashes[i] = sb.length() > 0 ? lineMd5Digest.digest(sb.toString().getBytes(Charsets.UTF_8)) : null; | |||||
sb.setLength(0); | |||||
final byte[][] hashes = new byte[f.lines()][]; | |||||
FileMetadata.computeLineHashesForIssueTracking(f, new LineHashConsumer() { | |||||
@Override | |||||
public void consume(int lineIdx, @Nullable byte[] hash) { | |||||
hashes[lineIdx - 1] = hash; | |||||
} | } | ||||
} catch (Exception e) { | |||||
throw new IllegalStateException("Unable to compute line hashes of file " + f, e); | |||||
} | |||||
}); | |||||
int size = hashes.length; | int size = hashes.length; | ||||
Multimap<String, Integer> linesByHash = LinkedHashMultimap.create(); | Multimap<String, Integer> linesByHash = LinkedHashMultimap.create(); |
import org.apache.commons.codec.digest.DigestUtils; | import org.apache.commons.codec.digest.DigestUtils; | ||||
import org.apache.commons.io.ByteOrderMark; | import org.apache.commons.io.ByteOrderMark; | ||||
import org.apache.commons.io.input.BOMInputStream; | import org.apache.commons.io.input.BOMInputStream; | ||||
import org.slf4j.Logger; | |||||
import org.slf4j.LoggerFactory; | |||||
import org.sonar.api.BatchComponent; | import org.sonar.api.BatchComponent; | ||||
import org.sonar.api.CoreProperties; | |||||
import org.sonar.api.batch.AnalysisMode; | import org.sonar.api.batch.AnalysisMode; | ||||
import org.sonar.api.batch.fs.internal.DefaultInputFile; | |||||
import javax.annotation.CheckForNull; | import javax.annotation.CheckForNull; | ||||
import javax.annotation.Nullable; | |||||
import java.io.BufferedReader; | import java.io.BufferedReader; | ||||
import java.io.File; | import java.io.File; | ||||
*/ | */ | ||||
public class FileMetadata implements BatchComponent { | public class FileMetadata implements BatchComponent { | ||||
private static final Logger LOG = LoggerFactory.getLogger(FileMetadata.class); | |||||
private static final char LINE_FEED = '\n'; | private static final char LINE_FEED = '\n'; | ||||
private static final char CARRIAGE_RETURN = '\r'; | private static final char CARRIAGE_RETURN = '\r'; | ||||
private final AnalysisMode analysisMode; | private final AnalysisMode analysisMode; | ||||
this.analysisMode = analysisMode; | this.analysisMode = analysisMode; | ||||
} | } | ||||
private abstract class CharHandler { | |||||
private static abstract class CharHandler { | |||||
void handleAll(char c) { | void handleAll(char c) { | ||||
} | } | ||||
} | } | ||||
} | } | ||||
private class LineCounter extends CharHandler { | |||||
private static class LineCounter extends CharHandler { | |||||
private boolean empty = true; | private boolean empty = true; | ||||
private int lines = 1; | private int lines = 1; | ||||
private int nonBlankLines = 0; | private int nonBlankLines = 0; | ||||
private boolean blankLine = true; | private boolean blankLine = true; | ||||
boolean alreadyLoggedInvalidCharacter = false; | |||||
private final File file; | |||||
private final Charset encoding; | |||||
LineCounter(File file, Charset encoding) { | |||||
this.file = file; | |||||
this.encoding = encoding; | |||||
} | |||||
@Override | @Override | ||||
void handleAll(char c) { | void handleAll(char c) { | ||||
this.empty = false; | this.empty = false; | ||||
if (!alreadyLoggedInvalidCharacter && c == '\ufffd') { | |||||
LOG.warn("Invalid character encountered in file " + file + " at line " + lines | |||||
+ " for encoding " + encoding + ". Please fix file content or configure the encoding to be used using property '" + CoreProperties.ENCODING_PROPERTY + "'."); | |||||
alreadyLoggedInvalidCharacter = true; | |||||
} | |||||
} | } | ||||
@Override | @Override | ||||
} | } | ||||
} | } | ||||
private class FileHashComputer extends CharHandler { | |||||
private static class FileHashComputer extends CharHandler { | |||||
private MessageDigest globalMd5Digest = DigestUtils.getMd5Digest(); | private MessageDigest globalMd5Digest = DigestUtils.getMd5Digest(); | ||||
StringBuffer sb = new StringBuffer(); | |||||
private StringBuffer sb = new StringBuffer(); | |||||
@Override | @Override | ||||
void handleIgnoreEoL(char c) { | void handleIgnoreEoL(char c) { | ||||
} | } | ||||
} | } | ||||
private class LineOffsetCounter extends CharHandler { | |||||
private static class LineHashComputer extends CharHandler { | |||||
private final MessageDigest lineMd5Digest = DigestUtils.getMd5Digest(); | |||||
private final StringBuffer sb = new StringBuffer(); | |||||
private final LineHashConsumer consumer; | |||||
private int line = 1; | |||||
public LineHashComputer(LineHashConsumer consumer) { | |||||
this.consumer = consumer; | |||||
} | |||||
@Override | |||||
void handleIgnoreEoL(char c) { | |||||
if (!Character.isWhitespace(c)) { | |||||
sb.append(c); | |||||
} | |||||
} | |||||
@Override | |||||
void newLine() { | |||||
consumer.consume(line, sb.length() > 0 ? lineMd5Digest.digest(sb.toString().getBytes(Charsets.UTF_8)) : null); | |||||
sb.setLength(0); | |||||
line++; | |||||
} | |||||
@Override | |||||
void eof() { | |||||
consumer.consume(line, sb.length() > 0 ? lineMd5Digest.digest(sb.toString().getBytes(Charsets.UTF_8)) : null); | |||||
} | |||||
} | |||||
private static class LineOffsetCounter extends CharHandler { | |||||
private int currentOriginalOffset = 0; | private int currentOriginalOffset = 0; | ||||
private List<Integer> originalLineOffsets = new ArrayList<Integer>(); | private List<Integer> originalLineOffsets = new ArrayList<Integer>(); | ||||
* Maximum performance is needed. | * Maximum performance is needed. | ||||
*/ | */ | ||||
Metadata read(File file, Charset encoding) { | Metadata read(File file, Charset encoding) { | ||||
char c = (char) 0; | |||||
LineCounter lineCounter = new LineCounter(); | |||||
LineCounter lineCounter = new LineCounter(file, encoding); | |||||
FileHashComputer fileHashComputer = new FileHashComputer(); | FileHashComputer fileHashComputer = new FileHashComputer(); | ||||
LineOffsetCounter lineOffsetCounter = new LineOffsetCounter(); | LineOffsetCounter lineOffsetCounter = new LineOffsetCounter(); | ||||
CharHandler[] handlers; | |||||
if (analysisMode.isPreview()) { | |||||
// No need to compute line offsets in preview mode since there is no syntax highlighting | |||||
handlers = new CharHandler[] {lineCounter, fileHashComputer}; | |||||
if (!analysisMode.isPreview()) { | |||||
scanFile(file, encoding, lineCounter, fileHashComputer, lineOffsetCounter); | |||||
} else { | } else { | ||||
handlers = new CharHandler[] {lineCounter, fileHashComputer, lineOffsetCounter}; | |||||
// No need to compute line offsets in preview mode since there is no syntax highlighting | |||||
scanFile(file, encoding, lineCounter, fileHashComputer); | |||||
} | } | ||||
return new Metadata(lineCounter.lines(), lineCounter.nonBlankLines(), fileHashComputer.getHash(), lineOffsetCounter.getOriginalLineOffsets(), | |||||
lineCounter.isEmpty()); | |||||
} | |||||
private static void scanFile(File file, Charset encoding, CharHandler... handlers) { | |||||
char c = (char) 0; | |||||
try (BOMInputStream bomIn = new BOMInputStream(new FileInputStream(file), | try (BOMInputStream bomIn = new BOMInputStream(new FileInputStream(file), | ||||
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); | ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); | ||||
Reader reader = new BufferedReader(new InputStreamReader(bomIn, encoding))) { | Reader reader = new BufferedReader(new InputStreamReader(bomIn, encoding))) { | ||||
for (CharHandler handler : handlers) { | for (CharHandler handler : handlers) { | ||||
handler.eof(); | handler.eof(); | ||||
} | } | ||||
return new Metadata(lineCounter.lines(), lineCounter.nonBlankLines(), fileHashComputer.getHash(), lineOffsetCounter.getOriginalLineOffsets(), | |||||
lineCounter.isEmpty()); | |||||
} catch (IOException e) { | } catch (IOException e) { | ||||
throw new IllegalStateException(String.format("Fail to read file '%s' with encoding '%s'", file.getAbsolutePath(), encoding), e); | throw new IllegalStateException(String.format("Fail to read file '%s' with encoding '%s'", file.getAbsolutePath(), encoding), e); | ||||
} | } | ||||
this.originalLineOffsets = Ints.toArray(originalLineOffsets); | this.originalLineOffsets = Ints.toArray(originalLineOffsets); | ||||
} | } | ||||
} | } | ||||
public static interface LineHashConsumer { | |||||
void consume(int lineIdx, @Nullable byte[] hash); | |||||
} | |||||
/** | |||||
* Compute a MD5 hash of each line of the file after removing of all blank chars | |||||
*/ | |||||
public static void computeLineHashesForIssueTracking(DefaultInputFile f, LineHashConsumer consumer) { | |||||
scanFile(f.file(), f.charset(), new LineHashComputer(consumer)); | |||||
} | |||||
} | } |
import com.google.common.base.Charsets; | import com.google.common.base.Charsets; | ||||
import org.apache.commons.codec.digest.DigestUtils; | import org.apache.commons.codec.digest.DigestUtils; | ||||
import org.apache.commons.io.FileUtils; | import org.apache.commons.io.FileUtils; | ||||
import org.apache.commons.lang.StringUtils; | |||||
import org.junit.Before; | import org.junit.Before; | ||||
import org.junit.Test; | import org.junit.Test; | ||||
import org.junit.rules.TemporaryFolder; | import org.junit.rules.TemporaryFolder; | ||||
DefaultInputFile inputFile = mock(DefaultInputFile.class); | DefaultInputFile inputFile = mock(DefaultInputFile.class); | ||||
java.io.File f = temp.newFile(); | java.io.File f = temp.newFile(); | ||||
when(inputFile.path()).thenReturn(f.toPath()); | when(inputFile.path()).thenReturn(f.toPath()); | ||||
when(inputFile.file()).thenReturn(f); | |||||
when(inputFile.charset()).thenReturn(Charsets.UTF_8); | when(inputFile.charset()).thenReturn(Charsets.UTF_8); | ||||
when(inputFile.lines()).thenReturn(newSource.split("\n").length); | |||||
when(inputFile.lines()).thenReturn(StringUtils.countMatches(newSource, "\n") + 1); | |||||
FileUtils.write(f, newSource, Charsets.UTF_8); | FileUtils.write(f, newSource, Charsets.UTF_8); | ||||
when(inputFile.key()).thenReturn("foo:Action.java"); | when(inputFile.key()).thenReturn("foo:Action.java"); | ||||
when(inputPathCache.getFile("foo", "Action.java")).thenReturn(inputFile); | when(inputPathCache.getFile("foo", "Action.java")).thenReturn(inputFile); |
import com.google.common.io.Resources; | import com.google.common.io.Resources; | ||||
import org.apache.commons.codec.digest.DigestUtils; | import org.apache.commons.codec.digest.DigestUtils; | ||||
import org.apache.commons.io.FileUtils; | import org.apache.commons.io.FileUtils; | ||||
import org.apache.commons.lang.StringUtils; | |||||
import org.junit.Before; | import org.junit.Before; | ||||
import org.junit.Rule; | import org.junit.Rule; | ||||
import org.junit.Test; | import org.junit.Test; | ||||
DefaultInputFile inputFile = mock(DefaultInputFile.class); | DefaultInputFile inputFile = mock(DefaultInputFile.class); | ||||
File f = temp.newFile(); | File f = temp.newFile(); | ||||
when(inputFile.path()).thenReturn(f.toPath()); | when(inputFile.path()).thenReturn(f.toPath()); | ||||
when(inputFile.file()).thenReturn(f); | |||||
when(inputFile.charset()).thenReturn(Charsets.UTF_8); | when(inputFile.charset()).thenReturn(Charsets.UTF_8); | ||||
String data = load(newSource); | String data = load(newSource); | ||||
when(inputFile.lines()).thenReturn(data.split("\n").length); | |||||
when(inputFile.lines()).thenReturn(StringUtils.countMatches(data, "\n") + 1); | |||||
FileUtils.write(f, data, Charsets.UTF_8); | FileUtils.write(f, data, Charsets.UTF_8); | ||||
when(inputFile.key()).thenReturn("foo:Action.java"); | when(inputFile.key()).thenReturn("foo:Action.java"); | ||||
when(lastSnapshots.getLineHashes("foo:Action.java")).thenReturn(computeHexHashes(load(reference))); | when(lastSnapshots.getLineHashes("foo:Action.java")).thenReturn(computeHexHashes(load(reference))); |
import org.sonar.api.batch.AnalysisMode; | import org.sonar.api.batch.AnalysisMode; | ||||
import java.io.File; | import java.io.File; | ||||
import java.nio.charset.Charset; | |||||
import static org.apache.commons.codec.digest.DigestUtils.md5Hex; | import static org.apache.commons.codec.digest.DigestUtils.md5Hex; | ||||
import static org.assertj.core.api.Assertions.assertThat; | import static org.assertj.core.api.Assertions.assertThat; | ||||
assertThat(metadata.empty).isFalse(); | assertThat(metadata.empty).isFalse(); | ||||
} | } | ||||
@Test | |||||
public void read_with_wrong_encoding() throws Exception { | |||||
File tempFile = temp.newFile(); | |||||
FileUtils.write(tempFile, "marker´s\n", Charset.forName("cp1252")); | |||||
FileMetadata.Metadata metadata = new FileMetadata(mode).read(tempFile, Charsets.UTF_8); | |||||
assertThat(metadata.lines).isEqualTo(2); | |||||
assertThat(metadata.hash).isEqualTo(md5Hex("marker\ufffds\n")); | |||||
assertThat(metadata.originalLineOffsets).containsOnly(0, 9); | |||||
} | |||||
@Test | @Test | ||||
public void non_ascii_utf_8() throws Exception { | public void non_ascii_utf_8() throws Exception { | ||||
File tempFile = temp.newFile(); | File tempFile = temp.newFile(); |