aboutsummaryrefslogtreecommitdiffstats
path: root/sonar-duplications
diff options
context:
space:
mode:
authorJulien HENRY <julien.henry@sonarsource.com>2017-05-09 14:17:18 +0200
committerJulien HENRY <henryju@yahoo.fr>2017-05-09 18:02:07 +0200
commit88bb8230b1bfd3e6ec923c35890c1daba93fece4 (patch)
treed51186a8ea7d6c1402ec221ac5c18bff66963beb /sonar-duplications
parent53caac9fa3f2c97ca67936fe9d11ae47ae55c6ca (diff)
downloadsonarqube-88bb8230b1bfd3e6ec923c35890c1daba93fece4.tar.gz
sonarqube-88bb8230b1bfd3e6ec923c35890c1daba93fece4.zip
SONAR-9204 File with BOM are not correctly parsed by the CPD tokenizer
Diffstat (limited to 'sonar-duplications')
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/cpd/FileCodeLoaderWithoutCache.java17
-rw-r--r--sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java20
-rw-r--r--sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java20
-rw-r--r--sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java16
4 files changed, 35 insertions, 38 deletions
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/cpd/FileCodeLoaderWithoutCache.java b/sonar-duplications/src/main/java/org/sonar/duplications/cpd/FileCodeLoaderWithoutCache.java
index b24bafa56a1..b3ac2c69cd8 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/cpd/FileCodeLoaderWithoutCache.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/cpd/FileCodeLoaderWithoutCache.java
@@ -19,28 +19,25 @@
*/
package org.sonar.duplications.cpd;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStreamReader;
import java.io.Reader;
public class FileCodeLoaderWithoutCache extends CodeLoaderWithoutCache {
- private File file;
- private String encoding;
+ private final String fileName;
+ private final Reader fileReader;
- public FileCodeLoaderWithoutCache(File file, String encoding) {
- this.file = file;
- this.encoding = encoding;
+ public FileCodeLoaderWithoutCache(String fileName, Reader fileReader) {
+ this.fileName = fileName;
+ this.fileReader = fileReader;
}
@Override
public Reader getReader() throws Exception {
- return new InputStreamReader(new FileInputStream(file), encoding);
+ return fileReader;
}
@Override
public String getFileName() {
- return this.file.getAbsolutePath();
+ return fileName;
}
}
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java
index a32fe6054e1..0fc08827663 100644
--- a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java
+++ b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java
@@ -21,6 +21,9 @@ package org.sonar.duplications.internal.pmd;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.List;
import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
@@ -28,32 +31,25 @@ import net.sourceforge.pmd.cpd.Tokens;
import org.sonar.duplications.block.Block;
import org.sonar.duplications.cpd.FileCodeLoaderWithoutCache;
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-
/**
* Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link TokensLine}s.
*/
public class TokenizerBridge {
private final Tokenizer tokenizer;
- private final String encoding;
private final PmdBlockChunker blockBuilder;
- public TokenizerBridge(Tokenizer tokenizer, String encoding, int blockSize) {
+ public TokenizerBridge(Tokenizer tokenizer, int blockSize) {
this.tokenizer = tokenizer;
- this.encoding = encoding;
this.blockBuilder = new PmdBlockChunker(blockSize);
}
- // TODO remove from here
- public List<Block> chunk(String resourceId, File file) {
- return blockBuilder.chunk(resourceId, chunk(file));
+ public List<Block> chunk(String resourceId, String fileName, Reader fileReader) {
+ return blockBuilder.chunk(resourceId, chunk(fileName, fileReader));
}
- public List<TokensLine> chunk(File file) {
- SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding));
+ public List<TokensLine> chunk(String fileName, Reader fileReader) {
+ SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(fileName, fileReader));
Tokens tokens = new Tokens();
TokenEntry.clearImages();
try {
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java
index d3bac18a7f4..0d142829d23 100644
--- a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java
@@ -19,6 +19,12 @@
*/
package org.sonar.duplications.internal.pmd;
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.Collection;
+import java.util.List;
import net.sourceforge.pmd.cpd.JavaTokenizer;
import org.junit.Before;
import org.junit.Test;
@@ -29,10 +35,6 @@ import org.sonar.duplications.index.CloneIndex;
import org.sonar.duplications.index.ClonePart;
import org.sonar.duplications.index.PackedMemoryCloneIndex;
-import java.io.File;
-import java.util.Collection;
-import java.util.List;
-
import static org.assertj.core.api.Assertions.assertThat;
public class PmdBridgeTest {
@@ -43,11 +45,11 @@ public class PmdBridgeTest {
@Before
public void setUp() {
index = new PackedMemoryCloneIndex();
- bridge = new TokenizerBridge(new JavaTokenizer(), "UTF-8", 10);
+ bridge = new TokenizerBridge(new JavaTokenizer(), 10);
}
@Test
- public void testDuplicationInSingleFile() {
+ public void testDuplicationInSingleFile() throws IOException {
File file = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile3.java");
addToIndex(file);
@@ -66,7 +68,7 @@ public class PmdBridgeTest {
}
@Test
- public void testDuplicationBetweenTwoFiles() {
+ public void testDuplicationBetweenTwoFiles() throws IOException {
File file1 = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile1.java");
File file2 = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile2.java");
addToIndex(file1);
@@ -88,8 +90,8 @@ public class PmdBridgeTest {
return SuffixTreeCloneDetectionAlgorithm.detect(index, fileBlocks);
}
- private void addToIndex(File file) {
- List<Block> blocks = bridge.chunk(file.getAbsolutePath(), file);
+ private void addToIndex(File file) throws IOException {
+ List<Block> blocks = bridge.chunk(file.getAbsolutePath(), file.getAbsolutePath(), Files.newBufferedReader(file.toPath(), StandardCharsets.UTF_8));
for (Block block : blocks) {
index.insert(block);
}
diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java
index 8e3d0c9c630..f21a8195a83 100644
--- a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java
+++ b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java
@@ -19,6 +19,11 @@
*/
package org.sonar.duplications.internal.pmd;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
@@ -26,9 +31,6 @@ import net.sourceforge.pmd.cpd.Tokens;
import org.junit.Before;
import org.junit.Test;
-import java.io.IOException;
-import java.util.List;
-
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertThat;
@@ -49,12 +51,12 @@ public class TokenizerBridgeTest {
tokenEntries.add(TokenEntry.getEOF());
}
};
- bridge = new TokenizerBridge(tokenizer, "UTF-8", 10);
+ bridge = new TokenizerBridge(tokenizer, 10);
}
@Test
public void shouldClearCacheInTokenEntry() {
- bridge.chunk(null);
+ bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8));
TokenEntry token = new TokenEntry("image", "srcId", 0);
assertThat(token.getIndex(), is(0));
assertThat(token.getIdentifier(), is(1));
@@ -63,8 +65,8 @@ public class TokenizerBridgeTest {
@Test
public void test() {
// To be sure that token index will be relative to file - run twice:
- bridge.chunk(null);
- List<TokensLine> lines = bridge.chunk(null);
+ bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8));
+ List<TokensLine> lines = bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8));
assertThat(lines.size(), is(3));