From 298ece1b781be58ca70d4406a5df84b26869dbe7 Mon Sep 17 00:00:00 2001 From: Evgeny Mandrikov Date: Mon, 30 Apr 2012 12:33:15 +0600 Subject: SONAR-3182 Do not use PMD CPD --- .../java/net/sourceforge/pmd/cpd/CPDListener.java | 39 ---- .../net/sourceforge/pmd/cpd/CPDNullListener.java | 34 ---- .../main/java/net/sourceforge/pmd/cpd/Tokens.java | 14 -- .../java/net/sourceforge/pmd/util/FileFinder.java | 62 ------- .../main/java/org/sonar/duplications/cpd/CPD.java | 120 ------------- .../java/org/sonar/duplications/cpd/Match.java | 199 --------------------- .../org/sonar/duplications/cpd/MatchAlgorithm.java | 150 ---------------- .../org/sonar/duplications/cpd/MatchCollector.java | 180 ------------------- 8 files changed, 798 deletions(-) delete mode 100644 sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/CPDListener.java delete mode 100644 sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/CPDNullListener.java delete mode 100644 sonar-duplications/src/main/java/net/sourceforge/pmd/util/FileFinder.java delete mode 100644 sonar-duplications/src/main/java/org/sonar/duplications/cpd/CPD.java delete mode 100644 sonar-duplications/src/main/java/org/sonar/duplications/cpd/Match.java delete mode 100644 sonar-duplications/src/main/java/org/sonar/duplications/cpd/MatchAlgorithm.java delete mode 100644 sonar-duplications/src/main/java/org/sonar/duplications/cpd/MatchCollector.java (limited to 'sonar-duplications/src/main/java') diff --git a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/CPDListener.java b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/CPDListener.java deleted file mode 100644 index 8832c31ea2f..00000000000 --- a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/CPDListener.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Sonar, open source software quality management tool. - * Copyright (C) 2008-2012 SonarSource - * mailto:contact AT sonarsource DOT com - * - * Sonar is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * Sonar is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Sonar; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 - */ - -/** - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ -package net.sourceforge.pmd.cpd; - -import java.io.File; - -public interface CPDListener { - - int INIT = 0; - int HASH = 1; - int MATCH = 2; - int GROUPING = 3; - int DONE = 4; - - void addedFile(int fileCount, File file); - - void phaseUpdate(int phase); -} diff --git a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/CPDNullListener.java b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/CPDNullListener.java deleted file mode 100644 index 8bf581b1fef..00000000000 --- a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/CPDNullListener.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Sonar, open source software quality management tool. - * Copyright (C) 2008-2012 SonarSource - * mailto:contact AT sonarsource DOT com - * - * Sonar is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * Sonar is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Sonar; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 - */ - -/** - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ -package net.sourceforge.pmd.cpd; - -import java.io.File; - -public class CPDNullListener implements CPDListener { - public void addedFile(int fileCount, File file) { - } - - public void phaseUpdate(int phase) { - } -} diff --git a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokens.java b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokens.java index 1f63af26923..a9398dba4d9 100644 --- a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokens.java +++ b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/Tokens.java @@ -23,8 +23,6 @@ */ package net.sourceforge.pmd.cpd; -import org.sonar.duplications.cpd.Match; - import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -46,22 +44,10 @@ public class Tokens { return tokens.iterator(); } - private TokenEntry get(int index) { - return tokens.get(index); - } - public int size() { return tokens.size(); } - public int getLineCount(TokenEntry mark, Match match) { - TokenEntry endTok = get(mark.getIndex() + match.getTokenCount() - 1); - if (endTok == TokenEntry.EOF) { - endTok = get(mark.getIndex() + match.getTokenCount() - 2); - } - return endTok.getBeginLine() - mark.getBeginLine() + 1; - } - public List getTokens() { return tokens; } diff --git a/sonar-duplications/src/main/java/net/sourceforge/pmd/util/FileFinder.java b/sonar-duplications/src/main/java/net/sourceforge/pmd/util/FileFinder.java deleted file mode 100644 index fc6b365e5f9..00000000000 --- a/sonar-duplications/src/main/java/net/sourceforge/pmd/util/FileFinder.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Sonar, open source software quality management tool. - * Copyright (C) 2008-2012 SonarSource - * mailto:contact AT sonarsource DOT com - * - * Sonar is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * Sonar is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Sonar; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 - */ - -package net.sourceforge.pmd.util; - -import java.io.File; -import java.io.FilenameFilter; -import java.util.ArrayList; -import java.util.List; - -/** - * A utility class for finding files within a directory. - */ -public class FileFinder { - - private FilenameFilter filter; - private static final String FILE_SEP = System.getProperty("file.separator"); - - public List findFilesFrom(String dir, FilenameFilter filter, boolean recurse) { - this.filter = filter; - List files = new ArrayList(); - scanDirectory(new File(dir), files, recurse); - return files; - } - - /** - * Implements a tail recursive file scanner - */ - private void scanDirectory(File dir, List list, boolean recurse) { - String[] candidates = dir.list(filter); - if (candidates == null) { - return; - } - for (int i = 0; i < candidates.length; i++) { - File tmp = new File(dir + FILE_SEP + candidates[i]); - if (tmp.isDirectory()) { - if (recurse) { - scanDirectory(tmp, list, true); - } - } else { - list.add(new File(dir + FILE_SEP + candidates[i])); - } - } - } -} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/cpd/CPD.java b/sonar-duplications/src/main/java/org/sonar/duplications/cpd/CPD.java deleted file mode 100644 index 61ec8107555..00000000000 --- a/sonar-duplications/src/main/java/org/sonar/duplications/cpd/CPD.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Sonar, open source software quality management tool. - * Copyright (C) 2008-2012 SonarSource - * mailto:contact AT sonarsource DOT com - * - * Sonar is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * Sonar is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Sonar; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 - */ - -/** - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ -package org.sonar.duplications.cpd; - -import net.sourceforge.pmd.cpd.*; -import net.sourceforge.pmd.util.FileFinder; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -/** - * @deprecated since 2.14, will be removed soon, and in any case should not be used for unit tests in Sonar plugins: - * instead of using this class for tests, you should test only your implementation of {@link Tokenizer} - */ -@Deprecated -public class CPD { - - private Map source = new HashMap(); - private CPDListener listener = new CPDNullListener(); - private Tokens tokens = new Tokens(); - private int minimumTileSize; - private MatchAlgorithm matchAlgorithm; - private Language language; - private boolean loadSourceCodeSlices = true; - private String encoding = System.getProperty("file.encoding"); - - public CPD(int minimumTileSize, Language language) { - TokenEntry.clearImages(); // workaround for bug 1947823 - this.minimumTileSize = minimumTileSize; - this.language = language; - } - - public void setCpdListener(CPDListener cpdListener) { - this.listener = cpdListener; - } - - public void setEncoding(String encoding) { - this.encoding = encoding; - } - - public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) { - this.loadSourceCodeSlices = loadSourceCodeSlices; - } - - public void go() { - TokenEntry.clearImages(); - matchAlgorithm = new MatchAlgorithm(source, tokens, minimumTileSize, listener); - matchAlgorithm.setLoadSourceCodeSlices(loadSourceCodeSlices); - matchAlgorithm.findMatches(); - } - - public Iterator getMatches() { - return matchAlgorithm.matches(); - } - - public void add(File file) throws IOException { - add(1, file); - } - - public void addAllInDirectory(String dir) throws IOException { - addDirectory(dir, false); - } - - public void addRecursively(String dir) throws IOException { - addDirectory(dir, true); - } - - public void add(List files) throws IOException { - for (File f : files) { - add(files.size(), f); - } - } - - private void addDirectory(String dir, boolean recurse) throws IOException { - if (!(new File(dir)).exists()) { - throw new FileNotFoundException("Couldn't find directory " + dir); - } - FileFinder finder = new FileFinder(); - add(finder.findFilesFrom(dir, language.getFileFilter(), recurse)); - } - - private void add(int fileCount, File file) throws IOException { - if (!file.getCanonicalPath().equals(new File(file.getAbsolutePath()).getCanonicalPath())) { - System.out.println("Skipping " + file + " since it appears to be a symlink"); - return; - } - - listener.addedFile(fileCount, file); - SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding)); - language.getTokenizer().tokenize(sourceCode, tokens); - source.put(sourceCode.getFileName(), sourceCode); - } - -} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/cpd/Match.java b/sonar-duplications/src/main/java/org/sonar/duplications/cpd/Match.java deleted file mode 100644 index 02012a0d6fe..00000000000 --- a/sonar-duplications/src/main/java/org/sonar/duplications/cpd/Match.java +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Sonar, open source software quality management tool. - * Copyright (C) 2008-2012 SonarSource - * mailto:contact AT sonarsource DOT com - * - * Sonar is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * Sonar is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Sonar; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 - */ - -/** - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ -package org.sonar.duplications.cpd; - -import net.sourceforge.pmd.cpd.TokenEntry; - -import java.util.Comparator; -import java.util.Iterator; -import java.util.Set; -import java.util.TreeSet; - -public class Match implements Comparable { - - public static final String EOL = System.getProperty("line.separator", "\n"); - - private int tokenCount; - private int lineCount; - private Set markSet = new TreeSet(); - private TokenEntry[] marks = new TokenEntry[2]; - private String code; - private MatchCode mc; - private String label; - - public static final Comparator MatchesComparator = new Comparator() { - - public int compare(Match ma, Match mb) { - return mb.getMarkCount() - ma.getMarkCount(); - } - }; - - public static final Comparator LinesComparator = new Comparator() { - - public int compare(Match ma, Match mb) { - return mb.getLineCount() - ma.getLineCount(); - } - }; - - public static final Comparator LabelComparator = new Comparator() { - - public int compare(Match ma, Match mb) { - if (ma.getLabel() == null) { - return 1; - } - if (mb.getLabel() == null) { - return -1; - } - return mb.getLabel().compareTo(ma.getLabel()); - } - }; - - public static final Comparator LengthComparator = new Comparator() { - - public int compare(Match ma, Match mb) { - return mb.getLineCount() - ma.getLineCount(); - } - }; - - public static class MatchCode { - - private int first; - private int second; - - public MatchCode() { - } - - public MatchCode(TokenEntry m1, TokenEntry m2) { - first = m1.getIndex(); - second = m2.getIndex(); - } - - @Override - public int hashCode() { - return first + 37 * second; - } - - @Override - public boolean equals(Object other) { - if (!(other instanceof MatchCode)) { - return false; - } - MatchCode mc = (MatchCode) other; - return mc.first == first && mc.second == second; - } - - public void setFirst(int first) { - this.first = first; - } - - public void setSecond(int second) { - this.second = second; - } - - } - - public Match(int tokenCount, TokenEntry first, TokenEntry second) { - markSet.add(first); - markSet.add(second); - marks[0] = first; - marks[1] = second; - this.tokenCount = tokenCount; - } - - public int getMarkCount() { - return markSet.size(); - } - - public void setLineCount(int lineCount) { - this.lineCount = lineCount; - } - - public int getLineCount() { - return this.lineCount; - } - - public int getTokenCount() { - return this.tokenCount; - } - - public String getSourceCodeSlice() { - return this.code; - } - - public void setSourceCodeSlice(String code) { - this.code = code; - } - - public Iterator iterator() { - return markSet.iterator(); - } - - public int compareTo(Match other) { - int diff = other.getTokenCount() - getTokenCount(); // NOSONAR Bad practice - Class defines compareTo(...) and uses Object.equals() - if (diff != 0) { - return diff; - } - return other.getFirstMark().getIndex() - getFirstMark().getIndex(); - } - - public TokenEntry getFirstMark() { - return marks[0]; - } - - public TokenEntry getSecondMark() { - return marks[1]; - } - - @Override - public String toString() { - return "Match: " + EOL + "tokenCount = " + tokenCount + EOL + "marks = " + markSet.size(); - } - - public Set getMarkSet() { - return markSet; - } - - public MatchCode getMatchCode() { - if (mc == null) { - mc = new MatchCode(marks[0], marks[1]); - } - return mc; - } - - public int getEndIndex() { - return marks[1].getIndex() + getTokenCount() - 1; - } - - public void setMarkSet(Set markSet) { - this.markSet = markSet; - } - - public void setLabel(String aLabel) { - label = aLabel; - } - - public String getLabel() { - return label; - } -} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/cpd/MatchAlgorithm.java b/sonar-duplications/src/main/java/org/sonar/duplications/cpd/MatchAlgorithm.java deleted file mode 100644 index 0b25bbcfca5..00000000000 --- a/sonar-duplications/src/main/java/org/sonar/duplications/cpd/MatchAlgorithm.java +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Sonar, open source software quality management tool. - * Copyright (C) 2008-2012 SonarSource - * mailto:contact AT sonarsource DOT com - * - * Sonar is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * Sonar is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Sonar; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 - */ - -/** - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ -package org.sonar.duplications.cpd; - -import net.sourceforge.pmd.cpd.*; - -import java.util.*; - -public class MatchAlgorithm { - - private static final int MOD = 37; - private int lastHash; - private int lastMod = 1; - - private List matches; - private Map source; - private Tokens tokens; - private List code; - private CPDListener cpdListener; - private int min; - private boolean loadSourceCodeSlices = true; - - public MatchAlgorithm(Map sourceCode, Tokens tokens, int min) { - this(sourceCode, tokens, min, new CPDNullListener()); - } - - public MatchAlgorithm(Map sourceCode, Tokens tokens, int min, CPDListener listener) { - this.source = sourceCode; - this.tokens = tokens; - this.code = tokens.getTokens(); - this.min = min; - this.cpdListener = listener; - for (int i = 0; i < min; i++) { - lastMod *= MOD; - } - } - - public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) { - this.loadSourceCodeSlices = loadSourceCodeSlices; - } - - public void setListener(CPDListener listener) { - this.cpdListener = listener; - } - - public Iterator matches() { - return matches.iterator(); - } - - public TokenEntry tokenAt(int offset, TokenEntry m) { - return code.get(offset + m.getIndex()); - } - - public int getMinimumTileSize() { - return this.min; - } - - public void findMatches() { - cpdListener.phaseUpdate(CPDListener.HASH); - Map markGroups = hash(); - - cpdListener.phaseUpdate(CPDListener.MATCH); - MatchCollector matchCollector = new MatchCollector(this); - for (Iterator i = markGroups.values().iterator(); i.hasNext();) { - Object o = i.next(); - if (o instanceof List) { - List l = (List) o; - - Collections.reverse(l); - matchCollector.collect(l); - } - i.remove(); - } - cpdListener.phaseUpdate(CPDListener.GROUPING); - matches = matchCollector.getMatches(); - - for (Match match : matches) { - for (Iterator occurrences = match.iterator(); occurrences.hasNext();) { - TokenEntry mark = occurrences.next(); - match.setLineCount(tokens.getLineCount(mark, match)); - if (loadSourceCodeSlices && !occurrences.hasNext()) { - int start = mark.getBeginLine(); - int end = start + match.getLineCount() - 1; - SourceCode sourceCode = source.get(mark.getTokenSrcID()); - match.setSourceCodeSlice(sourceCode.getSlice(start, end)); - } - } - } - cpdListener.phaseUpdate(CPDListener.DONE); - } - - private Map hash() { - Map markGroups = new HashMap(tokens.size()); - for (int i = code.size() - 1; i >= 0; i--) { - TokenEntry token = code.get(i); - if (token != TokenEntry.EOF) { - int last = tokenAt(min, token).getIdentifier(); - lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last; - token.setHashCode(lastHash); - Object o = markGroups.get(token); - - // Note that this insertion method is worthwhile since the vast majority - // markGroup keys will have only one value. - if (o == null) { - markGroups.put(token, token); - } else if (o instanceof TokenEntry) { - List l = new ArrayList(); - l.add((TokenEntry) o); - l.add(token); - markGroups.put(token, l); - } else { - List l = (List) o; - l.add(token); - } - } else { - lastHash = 0; - for (int end = Math.max(0, i - min + 1); i > end; i--) { - token = code.get(i - 1); - lastHash = MOD * lastHash + token.getIdentifier(); - if (token == TokenEntry.EOF) { - break; - } - } - } - } - return markGroups; - } - -} diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/cpd/MatchCollector.java b/sonar-duplications/src/main/java/org/sonar/duplications/cpd/MatchCollector.java deleted file mode 100644 index f83b183b5b2..00000000000 --- a/sonar-duplications/src/main/java/org/sonar/duplications/cpd/MatchCollector.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Sonar, open source software quality management tool. - * Copyright (C) 2008-2012 SonarSource - * mailto:contact AT sonarsource DOT com - * - * Sonar is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 3 of the License, or (at your option) any later version. - * - * Sonar is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Sonar; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 - */ - -/** - * BSD-style license; for more info see http://pmd.sourceforge.net/license.html - */ -package org.sonar.duplications.cpd; - -import net.sourceforge.pmd.cpd.TokenEntry; - -import java.util.*; - -public class MatchCollector { - - private MatchAlgorithm ma; - private Map startMap = new HashMap(); - private Map> fileMap = new HashMap>(); - - public MatchCollector(MatchAlgorithm ma) { - this.ma = ma; - } - - public void collect(List marks) { - // first get a pairwise collection of all maximal matches - for (int i = 0; i < marks.size() - 1; i++) { - TokenEntry mark1 = marks.get(i); - for (int j = i + 1; j < marks.size(); j++) { - TokenEntry mark2 = marks.get(j); - int diff = mark1.getIndex() - mark2.getIndex(); - if (-diff < ma.getMinimumTileSize()) { - continue; - } - if (hasPreviousDupe(mark1, mark2)) { - continue; - } - - // "match too small" check - int dupes = countDuplicateTokens(mark1, mark2); - if (dupes < ma.getMinimumTileSize()) { - continue; - } - // is it still too close together - if (diff + dupes >= 1) { - continue; - } - determineMatch(mark1, mark2, dupes); - } - } - } - - @SuppressWarnings("PMD.CompareObjectsWithEquals") - public List getMatches() { - List matchList = new ArrayList(startMap.values()); - Collections.sort(matchList); - Set matchSet = new HashSet(); - Match.MatchCode matchCode = new Match.MatchCode(); - for (int i = matchList.size(); i > 1; i--) { - Match match1 = matchList.get(i - 1); - TokenEntry mark1 = match1.getMarkSet().iterator().next(); - matchSet.clear(); - matchSet.add(match1.getMatchCode()); - for (int j = i - 1; j > 0; j--) { - Match match2 = matchList.get(j - 1); - if (match1.getTokenCount() != match2.getTokenCount()) { - break; - } - TokenEntry mark2 = null; - for (Iterator iter = match2.getMarkSet().iterator(); iter.hasNext();) { - mark2 = iter.next(); - if (mark2 != mark1) { - break; - } - } - int dupes = countDuplicateTokens(mark1, mark2); - if (dupes < match1.getTokenCount()) { - break; - } - matchSet.add(match2.getMatchCode()); - match1.getMarkSet().addAll(match2.getMarkSet()); - matchList.remove(i - 2); - i--; - } - if (matchSet.size() == 1) { - continue; - } - // prune the mark set - Set pruned = match1.getMarkSet(); - boolean done = false; - ArrayList a1 = new ArrayList(match1.getMarkSet()); - Collections.sort(a1); - for (int outer = 0; outer < a1.size() - 1 && !done; outer++) { - TokenEntry cmark1 = a1.get(outer); - for (int inner = outer + 1; inner < a1.size() && !done; inner++) { - TokenEntry cmark2 = a1.get(inner); - matchCode.setFirst(cmark1.getIndex()); - matchCode.setSecond(cmark2.getIndex()); - if (!matchSet.contains(matchCode)) { - if (pruned.size() > 2) { - pruned.remove(cmark2); - } - if (pruned.size() == 2) { - done = true; - } - } - } - } - } - return matchList; - } - - /** - * A greedy algorithm for determining non-overlapping matches - */ - private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) { - Match match = new Match(dupes, mark1, mark2); - String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID(); - List pairMatches = fileMap.get(fileKey); - if (pairMatches == null) { - pairMatches = new ArrayList(); - fileMap.put(fileKey, pairMatches); - } - boolean add = true; - for (int i = 0; i < pairMatches.size(); i++) { - Match other = pairMatches.get(i); - if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex() > 0) { - boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0; - if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0)) - || (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) { - if (other.getTokenCount() >= match.getTokenCount()) { - add = false; - break; - } else { - pairMatches.remove(i); - startMap.remove(other.getMatchCode()); - } - } - } - } - if (add) { - pairMatches.add(match); - startMap.put(match.getMatchCode(), match); - } - } - - private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) { - if (mark1.getIndex() == 0) { - return false; - } - return !matchEnded(ma.tokenAt(-1, mark1), ma.tokenAt(-1, mark2)); - } - - private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) { - int index = 0; - while (!matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) { - index++; - } - return index; - } - - private boolean matchEnded(TokenEntry token1, TokenEntry token2) { - return token1.getIdentifier() != token2.getIdentifier() || token1 == TokenEntry.EOF || token2 == TokenEntry.EOF; - } -} -- cgit v1.2.3