From f0d96c1053f6345e82351522c5bd8ce5633008bd Mon Sep 17 00:00:00 2001
From: Evgeny Mandrikov
+ * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link #CLONEPART_COMPARATOR}), + * so running time - O(|A|+|B|). + *
+ */ + private static boolean containsIn(CloneGroup first, CloneGroup second) { + // TODO Godin: must prove that this is unused + // if (!first.getOriginPart().getResourceId().equals(second.getOriginPart().getResourceId())) { + // return false; + // } + if (first.getCloneUnitLength() > second.getCloneUnitLength()) { + return false; + } + List+ * Note that this is not equal to number of nodes from root to this node, + * because in a compact suffix-tree edge can span multiple symbols - see {@link Edge#getSpan()}. + *
+ * Depth of {@link #suffixNode} is always equal to this depth minus one. + *
+ */ + int depth; + + int startSize, endSize; + + public Node(Node node, Node suffixNode) { + this(node.suffixTree, suffixNode); + } + + public Node(SuffixTree suffixTree, Node suffixNode) { + this.suffixTree = suffixTree; + this.suffixNode = suffixNode; + edges = new HashMap+ * y: 2 3 4 5 + * z: 3 4 + * x: 1 2 3 4 5 6 + *+ * Expected: + *
+ * x-y (2 3 4 5) + * x-y-z (3 4) + *+ */ + @Test + public void exampleFromPaper() { + CloneIndex index = createIndex( + newBlocks("y", "2 3 4 5"), + newBlocks("z", "3 4")); + List
+ * a: 2 3 4 5 + * b: 3 4 + * c: 1 2 3 4 5 6 + *+ * Expected: + *
+ * c-a (2 3 4 5) + * c-a-b (3 4) + *+ */ + @Test + public void exampleFromPaperWithModifiedResourceIds() { + CloneIndex cloneIndex = createIndex( + newBlocks("a", "2 3 4 5"), + newBlocks("b", "3 4")); + List
+ * b: 3 4 5 6 + * c: 5 6 7 + * a: 1 2 3 4 5 6 7 8 9 + *+ * Expected: + *
+ * a-b (3 4 5 6) + * a-b-c (5 6) + * a-c (5 6 7) + *+ */ + @Test + public void example1() { + CloneIndex index = createIndex( + newBlocks("b", "3 4 5 6"), + newBlocks("c", "5 6 7")); + List
+ * b: 1 2 3 4 1 2 3 4 1 2 3 4 + * c: 1 2 3 4 + * a: 1 2 3 5 + *+ * Expected: + *
+ * a-b-b-b-c (1 2 3) + *+ */ + @Test + public void example2() { + CloneIndex index = createIndex( + newBlocks("b", "1 2 3 4 1 2 3 4 1 2 3 4"), + newBlocks("c", "1 2 3 4")); + List
+ * a: 1 2 3 1 2 4 + *+ * Expected only one clone: + *
+ * a-a (1 2) + *+ */ + @Test + public void clonesInFileItself() { + CloneIndex index = createIndex(); + List
+ * b: 1 2 1 2 + * a: 1 2 1 + *+ * Expected: + *
+ * a-b-b (1 2) + * a-b (1 2 1) + *+ * "a-a-b-b (1)" should not be reported, because fully covered by "a-b (1 2 1)" + */ + @Test + public void covered() { + CloneIndex index = createIndex( + newBlocks("b", "1 2 1 2")); + List
+ * b: 1 2 1 2 1 2 1 + * a: 1 2 1 2 1 2 + *+ * Expected: + *
+ * a-b-b (1 2 1 2 1) - note that there is overlapping among parts for "b" + * a-b (1 2 1 2 1 2) + *+ */ + @Test + public void problemWithNestedCloneGroups() { + CloneIndex index = createIndex( + newBlocks("b", "1 2 1 2 1 2 1")); + List
+ * a: 1 2 3 + * b: 1 2 4 + * a: 1 2 5 + *+ * Expected: + *
+ * a-b (1 2) - instead of "a-a-b", which will be the case if file from index not ignored + *+ */ + @Test + public void fileAlreadyInIndex() { + CloneIndex index = createIndex( + newBlocks("a", "1 2 3"), + newBlocks("b", "1 2 4")); + // Note about blocks with hashes "3", "4" and "5": those blocks here in order to not face another problem - with EOF (see separate test) + List
+ * b: 1 2 3 4 + * a: 1 2 3 + *+ * Expected clone which ends at the end of file "a": + *
+ * a-b (1 2 3) + *+ */ + @Test + public void problemWithEndOfFile() { + CloneIndex cloneIndex = createIndex( + newBlocks("b", "1 2 3 4")); + List
+ * 0: A,B,A,B + * 1: A,B,A + *+ * with block size 5 each block will span both lines, and hashes will be: + *
+ * A,B,A,B,A=1 + * B,A,B,A,B=2 + * A,B,A,B,A=1 + *+ * Expected: one clone with two parts, which contain exactly the same lines + */ + @Test + public void same_lines_but_different_indexes() { + CloneIndex cloneIndex = createIndex(); + List
- * y: 2 3 4 5 - * z: 3 4 - * x: 1 2 3 4 5 6 - *- * Expected: - *
- * x-y (2 3 4 5) - * x-y-z (3 4) - *- */ - @Test - public void exampleFromPaper() { - CloneIndex cloneIndex = createIndex( - blocksForResource("y").withHashes("2", "3", "4", "5"), - blocksForResource("z").withHashes("3", "4")); - List
- * a: 2 3 4 5 - * b: 3 4 - * c: 1 2 3 4 5 6 - *- * Expected: - *
- * c-a (2 3 4 5) - * c-a-b (3 4) - *- */ - @Test - public void exampleFromPaperWithModifiedResourceIds() { - CloneIndex cloneIndex = createIndex( - blocksForResource("a").withHashes("2", "3", "4", "5"), - blocksForResource("b").withHashes("3", "4")); - List
- * b: 3 4 5 6 - * c: 5 6 7 - * a: 1 2 3 4 5 6 7 8 9 - *- * Expected: - *
- * a-b (3 4 5 6) - * a-b-c (5 6) - * a-c (5 6 7) - *- */ - @Test - public void example1() { - CloneIndex cloneIndex = createIndex( - blocksForResource("b").withHashes("3", "4", "5", "6"), - blocksForResource("c").withHashes("5", "6", "7")); - List
- * b: 1 2 3 4 1 2 3 4 1 2 3 4 - * c: 1 2 3 4 - * a: 1 2 3 4 5 - *- * Expected: - *
- * a-b-b-b-c (1 2 3 4) - *- */ - @Test - public void example2() { - CloneIndex cloneIndex = createIndex( - blocksForResource("b").withHashes("1", "2", "3", "4", "1", "2", "3", "4", "1", "2", "3", "4"), - blocksForResource("c").withHashes("1", "2", "3", "4")); - List
- * b: 1 2 3 4 - * a: 1 2 3 - *- * Expected clone which ends at the end of file "a": - *
- * a-b (1 2 3) - *- */ - @Test - public void problemWithEndOfFile() { - CloneIndex cloneIndex = createIndex( - blocksForResource("b").withHashes("1", "2", "3", "4")); - List
- * a: 1 2 3 1 2 4 - *- * Expected only one clone: - *
- * a-a (1 2) - *- */ - @Test - public void clonesInFileItself() { - CloneIndex cloneIndex = createIndex(); - List
- * b: 1 2 1 2 - * a: 1 2 1 - *- * Expected: - *
- * a-b-b (1 2) - * a-b (1 2 1) - *- * "a-a-b-b (1)" should not be reported, because fully covered by "a-b (1 2 1)" - */ - @Test - public void covered() { - CloneIndex cloneIndex = createIndex( - blocksForResource("b").withHashes("1", "2", "1", "2")); - List
- * b: 1 2 1 2 1 2 1 - * a: 1 2 1 2 1 2 - *- * Expected: - *
- * a-b-b (1 2 1 2 1) - note that there is overlapping among parts for "b" - * a-b (1 2 1 2 1 2) - *- */ - @Test - public void problemWithNestedCloneGroups() { - CloneIndex cloneIndex = createIndex( - blocksForResource("b").withHashes("1", "2", "1", "2", "1", "2", "1")); - List
- * a: 1 2 3 - * b: 1 2 4 - * a: 1 2 5 - *- * Expected: - *
- * a-b (1 2) - instead of "a-a-b", which will be the case if file from index not ignored - *- */ - @Test - public void fileAlreadyInIndex() { - CloneIndex cloneIndex = createIndex( - blocksForResource("a").withHashes("1", "2", "3"), - blocksForResource("b").withHashes("1", "2", "4")); - // Note about blocks with hashes "3", "4" and "5": those blocks here in order to not face another problem - with EOF (see separate test) - List
- * 0: A,B,A,B - * 1: A,B,A - *- * with block size 5 each block will span both lines, and hashes will be: - *
- * A,B,A,B,A=1 - * B,A,B,A,B=2 - * A,B,A,B,A=1 - *- * Expected: one clone with two parts, which contain exactly the same lines - */ - @Test - public void same_lines_but_different_indexes() { - CloneIndex cloneIndex = createIndex(); - List
+ * x: a 2 b 2 c 2 2 2 + *+ * Expected: + *
+ * x-x (2 2) + * x-x-x-x-x (2) + *+ */ + @Test + public void myTest() { + CloneIndex index = createIndex(); + ListfileBlocks = newBlocks("x", "a 2 b 2 c 2 2 2"); + List result = detect(index, fileBlocks); + + print(result); + assertEquals(2, result.size()); + + assertThat(result, hasCloneGroup(2, + newClonePart("x", 5, 2), + newClonePart("x", 6, 2))); + + assertThat(result, hasCloneGroup(1, + newClonePart("x", 1, 1), + newClonePart("x", 3, 1), + newClonePart("x", 5, 1), + newClonePart("x", 6, 1), + newClonePart("x", 7, 1))); + } + + /** + * Given: + * + * x: a 2 3 b 2 3 c 2 3 d 2 3 2 3 2 3 + *+ * Expected: + *+ * x-x (2 3 2 3) + * x-x-x-x-x-x (2 3) + *+ */ + @Test + public void myTest2() { + CloneIndex index = createIndex(); + ListfileBlocks = newBlocks("x", "a 2 3 b 2 3 c 2 3 d 2 3 2 3 2 3"); + List result = detect(index, fileBlocks); + + print(result); + assertEquals(2, result.size()); + + assertThat(result, hasCloneGroup(4, + newClonePart("x", 10, 4), + newClonePart("x", 12, 4))); + + assertThat(result, hasCloneGroup(2, + newClonePart("x", 1, 2), + newClonePart("x", 4, 2), + newClonePart("x", 7, 2), + newClonePart("x", 10, 2), + newClonePart("x", 12, 2), + newClonePart("x", 14, 2))); + } + + @Override + protected List detect(CloneIndex index, List fileBlocks) { + return SuffixTreeCloneDetectionAlgorithm.detect(index, fileBlocks); + } + +} -- cgit v1.2.3