diff options
author | Julien HENRY <julien.henry@sonarsource.com> | 2017-05-09 14:17:18 +0200 |
---|---|---|
committer | Julien HENRY <henryju@yahoo.fr> | 2017-05-09 18:02:07 +0200 |
commit | 88bb8230b1bfd3e6ec923c35890c1daba93fece4 (patch) | |
tree | d51186a8ea7d6c1402ec221ac5c18bff66963beb /sonar-duplications | |
parent | 53caac9fa3f2c97ca67936fe9d11ae47ae55c6ca (diff) | |
download | sonarqube-88bb8230b1bfd3e6ec923c35890c1daba93fece4.tar.gz sonarqube-88bb8230b1bfd3e6ec923c35890c1daba93fece4.zip |
SONAR-9204 File with BOM are not correctly parsed by the CPD tokenizer
Diffstat (limited to 'sonar-duplications')
4 files changed, 35 insertions, 38 deletions
diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/cpd/FileCodeLoaderWithoutCache.java b/sonar-duplications/src/main/java/org/sonar/duplications/cpd/FileCodeLoaderWithoutCache.java index b24bafa56a1..b3ac2c69cd8 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/cpd/FileCodeLoaderWithoutCache.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/cpd/FileCodeLoaderWithoutCache.java @@ -19,28 +19,25 @@ */ package org.sonar.duplications.cpd; -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStreamReader; import java.io.Reader; public class FileCodeLoaderWithoutCache extends CodeLoaderWithoutCache { - private File file; - private String encoding; + private final String fileName; + private final Reader fileReader; - public FileCodeLoaderWithoutCache(File file, String encoding) { - this.file = file; - this.encoding = encoding; + public FileCodeLoaderWithoutCache(String fileName, Reader fileReader) { + this.fileName = fileName; + this.fileReader = fileReader; } @Override public Reader getReader() throws Exception { - return new InputStreamReader(new FileInputStream(file), encoding); + return fileReader; } @Override public String getFileName() { - return this.file.getAbsolutePath(); + return fileName; } } diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java index a32fe6054e1..0fc08827663 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/internal/pmd/TokenizerBridge.java @@ -21,6 +21,9 @@ package org.sonar.duplications.internal.pmd; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; +import java.io.IOException; +import java.io.Reader; +import java.util.List; import net.sourceforge.pmd.cpd.SourceCode; import net.sourceforge.pmd.cpd.TokenEntry; import net.sourceforge.pmd.cpd.Tokenizer; @@ -28,32 +31,25 @@ import net.sourceforge.pmd.cpd.Tokens; import org.sonar.duplications.block.Block; import org.sonar.duplications.cpd.FileCodeLoaderWithoutCache; -import java.io.File; -import java.io.IOException; -import java.util.List; - /** * Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link TokensLine}s. */ public class TokenizerBridge { private final Tokenizer tokenizer; - private final String encoding; private final PmdBlockChunker blockBuilder; - public TokenizerBridge(Tokenizer tokenizer, String encoding, int blockSize) { + public TokenizerBridge(Tokenizer tokenizer, int blockSize) { this.tokenizer = tokenizer; - this.encoding = encoding; this.blockBuilder = new PmdBlockChunker(blockSize); } - // TODO remove from here - public List<Block> chunk(String resourceId, File file) { - return blockBuilder.chunk(resourceId, chunk(file)); + public List<Block> chunk(String resourceId, String fileName, Reader fileReader) { + return blockBuilder.chunk(resourceId, chunk(fileName, fileReader)); } - public List<TokensLine> chunk(File file) { - SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding)); + public List<TokensLine> chunk(String fileName, Reader fileReader) { + SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(fileName, fileReader)); Tokens tokens = new Tokens(); TokenEntry.clearImages(); try { diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java index d3bac18a7f4..0d142829d23 100644 --- a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java +++ b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/PmdBridgeTest.java @@ -19,6 +19,12 @@ */ package org.sonar.duplications.internal.pmd; +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.Collection; +import java.util.List; import net.sourceforge.pmd.cpd.JavaTokenizer; import org.junit.Before; import org.junit.Test; @@ -29,10 +35,6 @@ import org.sonar.duplications.index.CloneIndex; import org.sonar.duplications.index.ClonePart; import org.sonar.duplications.index.PackedMemoryCloneIndex; -import java.io.File; -import java.util.Collection; -import java.util.List; - import static org.assertj.core.api.Assertions.assertThat; public class PmdBridgeTest { @@ -43,11 +45,11 @@ public class PmdBridgeTest { @Before public void setUp() { index = new PackedMemoryCloneIndex(); - bridge = new TokenizerBridge(new JavaTokenizer(), "UTF-8", 10); + bridge = new TokenizerBridge(new JavaTokenizer(), 10); } @Test - public void testDuplicationInSingleFile() { + public void testDuplicationInSingleFile() throws IOException { File file = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile3.java"); addToIndex(file); @@ -66,7 +68,7 @@ public class PmdBridgeTest { } @Test - public void testDuplicationBetweenTwoFiles() { + public void testDuplicationBetweenTwoFiles() throws IOException { File file1 = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile1.java"); File file2 = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile2.java"); addToIndex(file1); @@ -88,8 +90,8 @@ public class PmdBridgeTest { return SuffixTreeCloneDetectionAlgorithm.detect(index, fileBlocks); } - private void addToIndex(File file) { - List<Block> blocks = bridge.chunk(file.getAbsolutePath(), file); + private void addToIndex(File file) throws IOException { + List<Block> blocks = bridge.chunk(file.getAbsolutePath(), file.getAbsolutePath(), Files.newBufferedReader(file.toPath(), StandardCharsets.UTF_8)); for (Block block : blocks) { index.insert(block); } diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java index 8e3d0c9c630..f21a8195a83 100644 --- a/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java +++ b/sonar-duplications/src/test/java/org/sonar/duplications/internal/pmd/TokenizerBridgeTest.java @@ -19,6 +19,11 @@ */ package org.sonar.duplications.internal.pmd; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.List; import net.sourceforge.pmd.cpd.SourceCode; import net.sourceforge.pmd.cpd.TokenEntry; import net.sourceforge.pmd.cpd.Tokenizer; @@ -26,9 +31,6 @@ import net.sourceforge.pmd.cpd.Tokens; import org.junit.Before; import org.junit.Test; -import java.io.IOException; -import java.util.List; - import static org.hamcrest.Matchers.is; import static org.junit.Assert.assertThat; @@ -49,12 +51,12 @@ public class TokenizerBridgeTest { tokenEntries.add(TokenEntry.getEOF()); } }; - bridge = new TokenizerBridge(tokenizer, "UTF-8", 10); + bridge = new TokenizerBridge(tokenizer, 10); } @Test public void shouldClearCacheInTokenEntry() { - bridge.chunk(null); + bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8)); TokenEntry token = new TokenEntry("image", "srcId", 0); assertThat(token.getIndex(), is(0)); assertThat(token.getIdentifier(), is(1)); @@ -63,8 +65,8 @@ public class TokenizerBridgeTest { @Test public void test() { // To be sure that token index will be relative to file - run twice: - bridge.chunk(null); - List<TokensLine> lines = bridge.chunk(null); + bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8)); + List<TokensLine> lines = bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8)); assertThat(lines.size(), is(3)); |