1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
/*
* Sonar, open source software quality management tool.
* Copyright (C) 2009 SonarSource SA
* mailto:contact AT sonarsource DOT com
*
* Sonar is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* Sonar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Sonar; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
*/
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package org.sonar.duplications.cpd;
import net.sourceforge.pmd.cpd.*;
import net.sourceforge.pmd.util.FileFinder;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
public class CPD {
private Map<String, SourceCode> source = new HashMap<String, SourceCode>();
private CPDListener listener = new CPDNullListener();
private Tokens tokens = new Tokens();
private int minimumTileSize;
private MatchAlgorithm matchAlgorithm;
private Language language;
private boolean skipDuplicates;
public static boolean debugEnable = false;
private boolean loadSourceCodeSlices = true;
private String encoding = System.getProperty("file.encoding");
public CPD(int minimumTileSize, Language language) {
TokenEntry.clearImages(); // workaround for bug 1947823
this.minimumTileSize = minimumTileSize;
this.language = language;
}
public void skipDuplicates() {
this.skipDuplicates = true;
}
public void setCpdListener(CPDListener cpdListener) {
this.listener = cpdListener;
}
public void setEncoding(String encoding) {
this.encoding = encoding;
}
public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) {
this.loadSourceCodeSlices = loadSourceCodeSlices;
}
public void go() {
TokenEntry.clearImages();
matchAlgorithm = new MatchAlgorithm(source, tokens, minimumTileSize, listener);
matchAlgorithm.setLoadSourceCodeSlices(loadSourceCodeSlices);
matchAlgorithm.findMatches();
}
public Iterator<Match> getMatches() {
return matchAlgorithm.matches();
}
public void add(File file) throws IOException {
add(1, file);
}
public void addAllInDirectory(String dir) throws IOException {
addDirectory(dir, false);
}
public void addRecursively(String dir) throws IOException {
addDirectory(dir, true);
}
public void add(List<File> files) throws IOException {
for (File f : files) {
add(files.size(), f);
}
}
private void addDirectory(String dir, boolean recurse) throws IOException {
if ( !(new File(dir)).exists()) {
throw new FileNotFoundException("Couldn't find directory " + dir);
}
FileFinder finder = new FileFinder();
// TODO - could use SourceFileSelector here
add(finder.findFilesFrom(dir, language.getFileFilter(), recurse));
}
private Set<String> current = new HashSet<String>();
private void add(int fileCount, File file) throws IOException {
if (skipDuplicates) {
// TODO refactor this thing into a separate class
String signature = file.getName() + '_' + file.length();
if (current.contains(signature)) {
System.err.println("Skipping " + file.getAbsolutePath()
+ " since it appears to be a duplicate file and --skip-duplicate-files is set");
return;
}
current.add(signature);
}
if ( !file.getCanonicalPath().equals(new File(file.getAbsolutePath()).getCanonicalPath())) {
System.err.println("Skipping " + file + " since it appears to be a symlink");
return;
}
listener.addedFile(fileCount, file);
SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding));
language.getTokenizer().tokenize(sourceCode, tokens);
source.put(sourceCode.getFileName(), sourceCode);
}
}
|