String fileName = source.getFileName();
LOG.info("Using deprecated tokenizer extension point to tokenize {}", fileName);
int lineIdx = 1;
- try {
- for (String line : FileUtils.readLines(new File(fileName), fs.encoding())) {
- for (String token : Splitter.on(" ").split(line)) {
- TokenEntry cpdToken = new TokenEntry(token, fileName, lineIdx);
- cpdTokens.add(cpdToken);
- }
- lineIdx++;
+ for (String line : source.getCode()) {
+ for (String token : Splitter.on(" ").split(line)) {
+ TokenEntry cpdToken = new TokenEntry(token, fileName, lineIdx);
+ cpdTokens.add(cpdToken);
}
- } catch (IOException e) {
- throw new IllegalStateException("Unable to tokenize", e);
+ lineIdx++;
}
cpdTokens.add(TokenEntry.getEOF());
}
*/
package org.sonar.xoo.lang;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokens;
import org.sonar.api.batch.fs.internal.DefaultInputFile;
import org.sonar.api.batch.fs.internal.TestInputFileBuilder;
import org.sonar.api.batch.sensor.SensorContext;
-import org.sonar.api.config.Settings;
-
-import java.io.File;
-import java.io.IOException;
import org.sonar.api.config.MapSettings;
+import org.sonar.api.config.Settings;
+import org.sonar.duplications.cpd.FileCodeLoaderWithoutCache;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.mock;
@Test
public void testExecution() throws IOException {
File source = new File(baseDir, "src/foo.xoo");
- FileUtils.write(source, "token1 token2 token3\ntoken4");
+ FileUtils.write(source, "token1 token2 token3\ntoken4", StandardCharsets.UTF_8);
DefaultInputFile inputFile = new TestInputFileBuilder("foo", "src/foo.xoo")
.setLanguage("xoo")
.setModuleBaseDir(baseDir.toPath())
+ .setCharset(StandardCharsets.UTF_8)
.build();
fileSystem.add(inputFile);
XooTokenizer tokenizer = new XooTokenizer(fileSystem);
- SourceCode sourceCode = mock(SourceCode.class);
- when(sourceCode.getFileName()).thenReturn(inputFile.absolutePath());
Tokens cpdTokens = new Tokens();
- tokenizer.tokenize(sourceCode, cpdTokens);
+ try (InputStreamReader reader = new InputStreamReader(inputFile.inputStream(), inputFile.charset())) {
+ SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(inputFile.absolutePath(), reader));
+ tokenizer.tokenize(sourceCode, cpdTokens);
+ }
// 4 tokens + EOF
assertThat(cpdTokens.getTokens()).hasSize(5);
*/
package org.sonar.duplications.cpd;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStreamReader;
import java.io.Reader;
public class FileCodeLoaderWithoutCache extends CodeLoaderWithoutCache {
- private File file;
- private String encoding;
+ private final String fileName;
+ private final Reader fileReader;
- public FileCodeLoaderWithoutCache(File file, String encoding) {
- this.file = file;
- this.encoding = encoding;
+ public FileCodeLoaderWithoutCache(String fileName, Reader fileReader) {
+ this.fileName = fileName;
+ this.fileReader = fileReader;
}
@Override
public Reader getReader() throws Exception {
- return new InputStreamReader(new FileInputStream(file), encoding);
+ return fileReader;
}
@Override
public String getFileName() {
- return this.file.getAbsolutePath();
+ return fileName;
}
}
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.List;
import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
import org.sonar.duplications.block.Block;
import org.sonar.duplications.cpd.FileCodeLoaderWithoutCache;
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-
/**
* Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link TokensLine}s.
*/
public class TokenizerBridge {
private final Tokenizer tokenizer;
- private final String encoding;
private final PmdBlockChunker blockBuilder;
- public TokenizerBridge(Tokenizer tokenizer, String encoding, int blockSize) {
+ public TokenizerBridge(Tokenizer tokenizer, int blockSize) {
this.tokenizer = tokenizer;
- this.encoding = encoding;
this.blockBuilder = new PmdBlockChunker(blockSize);
}
- // TODO remove from here
- public List<Block> chunk(String resourceId, File file) {
- return blockBuilder.chunk(resourceId, chunk(file));
+ public List<Block> chunk(String resourceId, String fileName, Reader fileReader) {
+ return blockBuilder.chunk(resourceId, chunk(fileName, fileReader));
}
- public List<TokensLine> chunk(File file) {
- SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding));
+ public List<TokensLine> chunk(String fileName, Reader fileReader) {
+ SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(fileName, fileReader));
Tokens tokens = new Tokens();
TokenEntry.clearImages();
try {
*/
package org.sonar.duplications.internal.pmd;
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.Collection;
+import java.util.List;
import net.sourceforge.pmd.cpd.JavaTokenizer;
import org.junit.Before;
import org.junit.Test;
import org.sonar.duplications.index.ClonePart;
import org.sonar.duplications.index.PackedMemoryCloneIndex;
-import java.io.File;
-import java.util.Collection;
-import java.util.List;
-
import static org.assertj.core.api.Assertions.assertThat;
public class PmdBridgeTest {
@Before
public void setUp() {
index = new PackedMemoryCloneIndex();
- bridge = new TokenizerBridge(new JavaTokenizer(), "UTF-8", 10);
+ bridge = new TokenizerBridge(new JavaTokenizer(), 10);
}
@Test
- public void testDuplicationInSingleFile() {
+ public void testDuplicationInSingleFile() throws IOException {
File file = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile3.java");
addToIndex(file);
}
@Test
- public void testDuplicationBetweenTwoFiles() {
+ public void testDuplicationBetweenTwoFiles() throws IOException {
File file1 = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile1.java");
File file2 = new File("test-resources/org/sonar/duplications/cpd/CPDTest/CPDFile2.java");
addToIndex(file1);
return SuffixTreeCloneDetectionAlgorithm.detect(index, fileBlocks);
}
- private void addToIndex(File file) {
- List<Block> blocks = bridge.chunk(file.getAbsolutePath(), file);
+ private void addToIndex(File file) throws IOException {
+ List<Block> blocks = bridge.chunk(file.getAbsolutePath(), file.getAbsolutePath(), Files.newBufferedReader(file.toPath(), StandardCharsets.UTF_8));
for (Block block : blocks) {
index.insert(block);
}
*/
package org.sonar.duplications.internal.pmd;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
import net.sourceforge.pmd.cpd.SourceCode;
import net.sourceforge.pmd.cpd.TokenEntry;
import net.sourceforge.pmd.cpd.Tokenizer;
import org.junit.Before;
import org.junit.Test;
-import java.io.IOException;
-import java.util.List;
-
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertThat;
tokenEntries.add(TokenEntry.getEOF());
}
};
- bridge = new TokenizerBridge(tokenizer, "UTF-8", 10);
+ bridge = new TokenizerBridge(tokenizer, 10);
}
@Test
public void shouldClearCacheInTokenEntry() {
- bridge.chunk(null);
+ bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8));
TokenEntry token = new TokenEntry("image", "srcId", 0);
assertThat(token.getIndex(), is(0));
assertThat(token.getIdentifier(), is(1));
@Test
public void test() {
// To be sure that token index will be relative to file - run twice:
- bridge.chunk(null);
- List<TokensLine> lines = bridge.chunk(null);
+ bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8));
+ List<TokensLine> lines = bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8));
assertThat(lines.size(), is(3));
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.io.InputStreamReader;
import java.util.List;
import org.sonar.api.CoreProperties;
import org.sonar.api.batch.CpdMapping;
}
private void populateIndex(String languageKey, List<InputFile> sourceFiles, CpdMapping mapping) {
- TokenizerBridge bridge = new TokenizerBridge(mapping.getTokenizer(), fs.encoding().name(), getBlockSize(languageKey));
+ TokenizerBridge bridge = new TokenizerBridge(mapping.getTokenizer(), getBlockSize(languageKey));
for (InputFile inputFile : sourceFiles) {
if (!index.isIndexed(inputFile)) {
LOG.debug("Populating index from {}", inputFile.absolutePath());
String resourceEffectiveKey = ((DefaultInputFile) inputFile).key();
- List<Block> blocks = bridge.chunk(resourceEffectiveKey, inputFile.file());
+ List<Block> blocks;
+ try (InputStreamReader isr = new InputStreamReader(inputFile.inputStream(), inputFile.charset())) {
+ blocks = bridge.chunk(resourceEffectiveKey, inputFile.absolutePath(), isr);
+ } catch (IOException e) {
+ throw new IllegalStateException("Unable to read content of file " + inputFile.absolutePath(), e);
+ }
index.insert(inputFile, blocks);
}
}
LOG.debug("Detection of duplicated code is not supported for {}", language);
continue;
}
- LOG.info("{} is used for {}", blockIndexer, language);
+ LOG.debug("{} is used for {}", blockIndexer.getClass().getName(), language);
blockIndexer.index(language);
}
}
package org.sonar.scanner.cpd.deprecated;
import com.google.common.collect.Lists;
-import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
List<Statement> statements;
- try (InputStream is = new FileInputStream(inputFile.file());
- Reader reader = new InputStreamReader(is, fs.encoding())) {
+ try (InputStream is = inputFile.inputStream();
+ Reader reader = new InputStreamReader(is, inputFile.charset())) {
statements = statementChunker.chunk(tokenChunker.chunk(reader));
} catch (FileNotFoundException e) {
throw new IllegalStateException("Cannot find file " + inputFile.file(), e);
throw new IllegalStateException("Exception handling file: " + inputFile.file(), e);
}
- List<Block> blocks = blockChunker.chunk(resourceEffectiveKey, statements);
+ List<Block> blocks;
+ try {
+ blocks = blockChunker.chunk(resourceEffectiveKey, statements);
+ } catch (Exception e) {
+ throw new IllegalStateException("Cannot process file " + inputFile.file(), e);
+ }
index.insert(inputFile, blocks);
}
}
*/
package org.sonar.scanner.report;
-import org.apache.commons.io.ByteOrderMark;
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.io.input.BOMInputStream;
-import org.sonar.api.batch.fs.internal.DefaultInputFile;
-import org.sonar.scanner.protocol.output.ScannerReportWriter;
-import org.sonar.scanner.scan.filesystem.InputComponentStore;
-
import java.io.BufferedReader;
import java.io.File;
-import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
+import org.apache.commons.io.IOUtils;
+import org.sonar.api.batch.fs.internal.DefaultInputFile;
+import org.sonar.scanner.protocol.output.ScannerReportWriter;
+import org.sonar.scanner.scan.filesystem.InputComponentStore;
public class SourcePublisher implements ReportPublisherStep {
File iofile = writer.getSourceFile(inputFile.batchId());
try (FileOutputStream output = new FileOutputStream(iofile);
- BOMInputStream bomIn = new BOMInputStream(new FileInputStream(inputFile.file()),
- ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
- BufferedReader reader = new BufferedReader(new InputStreamReader(bomIn, inputFile.charset()))) {
+ InputStream in = inputFile.inputStream();
+ BufferedReader reader = new BufferedReader(new InputStreamReader(in, inputFile.charset()))) {
writeSource(reader, output, inputFile.lines());
} catch (IOException e) {
throw new IllegalStateException("Unable to store file source in the report", e);
import java.io.File;
import java.io.IOException;
+import java.nio.charset.StandardCharsets;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.junit.Before;
import org.sonar.api.batch.fs.InputFile;
import org.sonar.api.batch.fs.internal.DefaultFileSystem;
import org.sonar.api.batch.fs.internal.TestInputFileBuilder;
-import org.sonar.api.config.Settings;
import org.sonar.api.config.MapSettings;
+import org.sonar.api.config.Settings;
import org.sonar.duplications.block.Block;
import org.sonar.scanner.cpd.index.SonarCpdBlockIndex;
DefaultFileSystem fs = new DefaultFileSystem(baseDir);
file = new TestInputFileBuilder("foo", "src/ManyStatements.java")
.setModuleBaseDir(baseDir.toPath())
+ .setCharset(StandardCharsets.UTF_8)
.setLanguage(JAVA).build();
fs.add(file);
File ioFile = file.file();