You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

SimilarityIndexTest.java 5.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. /*
  2. * Copyright (C) 2010, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.diff;
  44. import static java.nio.charset.StandardCharsets.UTF_8;
  45. import static org.junit.Assert.assertEquals;
  46. import static org.junit.Assert.assertTrue;
  47. import java.io.ByteArrayInputStream;
  48. import java.io.IOException;
  49. import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
  50. import org.eclipse.jgit.lib.Constants;
  51. import org.junit.Test;
  52. public class SimilarityIndexTest {
  53. @Test
  54. public void testIndexingSmallObject() throws TableFullException {
  55. SimilarityIndex si = hash("" //
  56. + "A\n" //
  57. + "B\n" //
  58. + "D\n" //
  59. + "B\n" //
  60. );
  61. int key_A = keyFor("A\n");
  62. int key_B = keyFor("B\n");
  63. int key_D = keyFor("D\n");
  64. assertTrue(key_A != key_B && key_A != key_D && key_B != key_D);
  65. assertEquals(3, si.size());
  66. assertEquals(2, si.count(si.findIndex(key_A)));
  67. assertEquals(4, si.count(si.findIndex(key_B)));
  68. assertEquals(2, si.count(si.findIndex(key_D)));
  69. }
  70. @Test
  71. public void testIndexingLargeObject() throws IOException,
  72. TableFullException {
  73. byte[] in = ("" //
  74. + "A\n" //
  75. + "B\n" //
  76. + "B\n" //
  77. + "B\n").getBytes(UTF_8);
  78. SimilarityIndex si = new SimilarityIndex();
  79. si.hash(new ByteArrayInputStream(in), in.length, false);
  80. assertEquals(2, si.size());
  81. }
  82. @Test
  83. public void testCommonScore_SameFiles() throws TableFullException {
  84. String text = "" //
  85. + "A\n" //
  86. + "B\n" //
  87. + "D\n" //
  88. + "B\n";
  89. SimilarityIndex src = hash(text);
  90. SimilarityIndex dst = hash(text);
  91. assertEquals(8, src.common(dst));
  92. assertEquals(8, dst.common(src));
  93. assertEquals(100, src.score(dst, 100));
  94. assertEquals(100, dst.score(src, 100));
  95. }
  96. @Test
  97. public void testCommonScore_SameFiles_CR_canonicalization()
  98. throws TableFullException {
  99. String text = "" //
  100. + "A\r\n" //
  101. + "B\r\n" //
  102. + "D\r\n" //
  103. + "B\r\n";
  104. SimilarityIndex src = hash(text);
  105. SimilarityIndex dst = hash(text.replace("\r", ""));
  106. assertEquals(8, src.common(dst));
  107. assertEquals(8, dst.common(src));
  108. assertEquals(100, src.score(dst, 100));
  109. assertEquals(100, dst.score(src, 100));
  110. }
  111. @Test
  112. public void testCommonScoreLargeObject_SameFiles_CR_canonicalization()
  113. throws TableFullException, IOException {
  114. String text = "" //
  115. + "A\r\n" //
  116. + "B\r\n" //
  117. + "D\r\n" //
  118. + "B\r\n";
  119. SimilarityIndex src = new SimilarityIndex();
  120. byte[] bytes1 = text.getBytes(UTF_8);
  121. src.hash(new ByteArrayInputStream(bytes1), bytes1.length, true);
  122. src.sort();
  123. SimilarityIndex dst = new SimilarityIndex();
  124. byte[] bytes2 = text.replace("\r", "").getBytes(UTF_8);
  125. dst.hash(new ByteArrayInputStream(bytes2), bytes2.length, true);
  126. dst.sort();
  127. assertEquals(8, src.common(dst));
  128. assertEquals(8, dst.common(src));
  129. assertEquals(100, src.score(dst, 100));
  130. assertEquals(100, dst.score(src, 100));
  131. }
  132. @Test
  133. public void testCommonScore_EmptyFiles() throws TableFullException {
  134. SimilarityIndex src = hash("");
  135. SimilarityIndex dst = hash("");
  136. assertEquals(0, src.common(dst));
  137. assertEquals(0, dst.common(src));
  138. }
  139. @Test
  140. public void testCommonScore_TotallyDifferentFiles()
  141. throws TableFullException {
  142. SimilarityIndex src = hash("A\n");
  143. SimilarityIndex dst = hash("D\n");
  144. assertEquals(0, src.common(dst));
  145. assertEquals(0, dst.common(src));
  146. }
  147. @Test
  148. public void testCommonScore_SimiliarBy75() throws TableFullException {
  149. SimilarityIndex src = hash("A\nB\nC\nD\n");
  150. SimilarityIndex dst = hash("A\nB\nC\nQ\n");
  151. assertEquals(6, src.common(dst));
  152. assertEquals(6, dst.common(src));
  153. assertEquals(75, src.score(dst, 100));
  154. assertEquals(75, dst.score(src, 100));
  155. }
  156. private static SimilarityIndex hash(String text) throws TableFullException {
  157. SimilarityIndex src = new SimilarityIndex();
  158. byte[] raw = Constants.encode(text);
  159. src.hash(raw, 0, raw.length);
  160. src.sort();
  161. return src;
  162. }
  163. private static int keyFor(String line) throws TableFullException {
  164. SimilarityIndex si = hash(line);
  165. assertEquals("single line scored", 1, si.size());
  166. return si.key(0);
  167. }
  168. }