You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

SimilarityIndexTest.java 4.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. /*
  2. * Copyright (C) 2010, Google Inc. and others
  3. *
  4. * This program and the accompanying materials are made available under the
  5. * terms of the Eclipse Distribution License v. 1.0 which is available at
  6. * https://www.eclipse.org/org/documents/edl-v10.php.
  7. *
  8. * SPDX-License-Identifier: BSD-3-Clause
  9. */
  10. package org.eclipse.jgit.diff;
  11. import static java.nio.charset.StandardCharsets.UTF_8;
  12. import static org.junit.Assert.assertEquals;
  13. import static org.junit.Assert.assertTrue;
  14. import java.io.ByteArrayInputStream;
  15. import java.io.IOException;
  16. import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
  17. import org.eclipse.jgit.lib.Constants;
  18. import org.junit.Test;
  19. public class SimilarityIndexTest {
  20. @Test
  21. public void testIndexingSmallObject() throws TableFullException {
  22. SimilarityIndex si = hash("" //
  23. + "A\n" //
  24. + "B\n" //
  25. + "D\n" //
  26. + "B\n" //
  27. );
  28. int key_A = keyFor("A\n");
  29. int key_B = keyFor("B\n");
  30. int key_D = keyFor("D\n");
  31. assertTrue(key_A != key_B && key_A != key_D && key_B != key_D);
  32. assertEquals(3, si.size());
  33. assertEquals(2, si.count(si.findIndex(key_A)));
  34. assertEquals(4, si.count(si.findIndex(key_B)));
  35. assertEquals(2, si.count(si.findIndex(key_D)));
  36. }
  37. @Test
  38. public void testIndexingLargeObject() throws IOException,
  39. TableFullException {
  40. byte[] in = ("" //
  41. + "A\n" //
  42. + "B\n" //
  43. + "B\n" //
  44. + "B\n").getBytes(UTF_8);
  45. SimilarityIndex si = new SimilarityIndex();
  46. si.hash(new ByteArrayInputStream(in), in.length, false);
  47. assertEquals(2, si.size());
  48. }
  49. @Test
  50. public void testCommonScore_SameFiles() throws TableFullException {
  51. String text = "" //
  52. + "A\n" //
  53. + "B\n" //
  54. + "D\n" //
  55. + "B\n";
  56. SimilarityIndex src = hash(text);
  57. SimilarityIndex dst = hash(text);
  58. assertEquals(8, src.common(dst));
  59. assertEquals(8, dst.common(src));
  60. assertEquals(100, src.score(dst, 100));
  61. assertEquals(100, dst.score(src, 100));
  62. }
  63. @Test
  64. public void testCommonScore_SameFiles_CR_canonicalization()
  65. throws TableFullException {
  66. String text = "" //
  67. + "A\r\n" //
  68. + "B\r\n" //
  69. + "D\r\n" //
  70. + "B\r\n";
  71. SimilarityIndex src = hash(text);
  72. SimilarityIndex dst = hash(text.replace("\r", ""));
  73. assertEquals(8, src.common(dst));
  74. assertEquals(8, dst.common(src));
  75. assertEquals(100, src.score(dst, 100));
  76. assertEquals(100, dst.score(src, 100));
  77. }
  78. @Test
  79. public void testCommonScoreLargeObject_SameFiles_CR_canonicalization()
  80. throws TableFullException, IOException {
  81. String text = "" //
  82. + "A\r\n" //
  83. + "B\r\n" //
  84. + "D\r\n" //
  85. + "B\r\n";
  86. SimilarityIndex src = new SimilarityIndex();
  87. byte[] bytes1 = text.getBytes(UTF_8);
  88. src.hash(new ByteArrayInputStream(bytes1), bytes1.length, true);
  89. src.sort();
  90. SimilarityIndex dst = new SimilarityIndex();
  91. byte[] bytes2 = text.replace("\r", "").getBytes(UTF_8);
  92. dst.hash(new ByteArrayInputStream(bytes2), bytes2.length, true);
  93. dst.sort();
  94. assertEquals(8, src.common(dst));
  95. assertEquals(8, dst.common(src));
  96. assertEquals(100, src.score(dst, 100));
  97. assertEquals(100, dst.score(src, 100));
  98. }
  99. @Test
  100. public void testCommonScore_EmptyFiles() throws TableFullException {
  101. SimilarityIndex src = hash("");
  102. SimilarityIndex dst = hash("");
  103. assertEquals(0, src.common(dst));
  104. assertEquals(0, dst.common(src));
  105. }
  106. @Test
  107. public void testCommonScore_TotallyDifferentFiles()
  108. throws TableFullException {
  109. SimilarityIndex src = hash("A\n");
  110. SimilarityIndex dst = hash("D\n");
  111. assertEquals(0, src.common(dst));
  112. assertEquals(0, dst.common(src));
  113. }
  114. @Test
  115. public void testCommonScore_SimiliarBy75() throws TableFullException {
  116. SimilarityIndex src = hash("A\nB\nC\nD\n");
  117. SimilarityIndex dst = hash("A\nB\nC\nQ\n");
  118. assertEquals(6, src.common(dst));
  119. assertEquals(6, dst.common(src));
  120. assertEquals(75, src.score(dst, 100));
  121. assertEquals(75, dst.score(src, 100));
  122. }
  123. private static SimilarityIndex hash(String text) throws TableFullException {
  124. SimilarityIndex src = new SimilarityIndex();
  125. byte[] raw = Constants.encode(text);
  126. src.hash(raw, 0, raw.length);
  127. src.sort();
  128. return src;
  129. }
  130. private static int keyFor(String line) throws TableFullException {
  131. SimilarityIndex si = hash(line);
  132. assertEquals("single line scored", 1, si.size());
  133. return si.key(0);
  134. }
  135. }