You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

RawTextComparator.java 9.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. /*
  2. * Copyright (C) 2009-2010, Google Inc.
  3. * Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de>
  4. * and other copyright owners as documented in the project's IP log.
  5. *
  6. * This program and the accompanying materials are made available
  7. * under the terms of the Eclipse Distribution License v1.0 which
  8. * accompanies this distribution, is reproduced below, and is
  9. * available at http://www.eclipse.org/org/documents/edl-v10.php
  10. *
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * - Neither the name of the Eclipse Foundation, Inc. nor the
  26. * names of its contributors may be used to endorse or promote
  27. * products derived from this software without specific prior
  28. * written permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. */
  44. package org.eclipse.jgit.diff;
  45. import static org.eclipse.jgit.util.RawCharUtil.isWhitespace;
  46. import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace;
  47. import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace;
  48. import org.eclipse.jgit.util.IntList;
  49. /** Equivalence function for {@link RawText}. */
  50. public abstract class RawTextComparator extends SequenceComparator<RawText> {
  51. /** No special treatment. */
  52. public static final RawTextComparator DEFAULT = new RawTextComparator() {
  53. @Override
  54. public boolean equals(RawText a, int ai, RawText b, int bi) {
  55. ai++;
  56. bi++;
  57. int as = a.lines.get(ai);
  58. int bs = b.lines.get(bi);
  59. final int ae = a.lines.get(ai + 1);
  60. final int be = b.lines.get(bi + 1);
  61. if (ae - as != be - bs)
  62. return false;
  63. while (as < ae) {
  64. if (a.content[as++] != b.content[bs++])
  65. return false;
  66. }
  67. return true;
  68. }
  69. @Override
  70. protected int hashRegion(final byte[] raw, int ptr, final int end) {
  71. int hash = 5381;
  72. for (; ptr < end; ptr++)
  73. hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
  74. return hash;
  75. }
  76. };
  77. /** Ignores all whitespace. */
  78. public static final RawTextComparator WS_IGNORE_ALL = new RawTextComparator() {
  79. @Override
  80. public boolean equals(RawText a, int ai, RawText b, int bi) {
  81. ai++;
  82. bi++;
  83. int as = a.lines.get(ai);
  84. int bs = b.lines.get(bi);
  85. int ae = a.lines.get(ai + 1);
  86. int be = b.lines.get(bi + 1);
  87. ae = trimTrailingWhitespace(a.content, as, ae);
  88. be = trimTrailingWhitespace(b.content, bs, be);
  89. while (as < ae && bs < be) {
  90. byte ac = a.content[as];
  91. byte bc = b.content[bs];
  92. while (as < ae - 1 && isWhitespace(ac)) {
  93. as++;
  94. ac = a.content[as];
  95. }
  96. while (bs < be - 1 && isWhitespace(bc)) {
  97. bs++;
  98. bc = b.content[bs];
  99. }
  100. if (ac != bc)
  101. return false;
  102. as++;
  103. bs++;
  104. }
  105. return as == ae && bs == be;
  106. }
  107. @Override
  108. protected int hashRegion(byte[] raw, int ptr, int end) {
  109. int hash = 5381;
  110. for (; ptr < end; ptr++) {
  111. byte c = raw[ptr];
  112. if (!isWhitespace(c))
  113. hash = ((hash << 5) + hash) + (c & 0xff);
  114. }
  115. return hash;
  116. }
  117. };
  118. /** Ignores leading whitespace. */
  119. public static final RawTextComparator WS_IGNORE_LEADING = new RawTextComparator() {
  120. @Override
  121. public boolean equals(RawText a, int ai, RawText b, int bi) {
  122. ai++;
  123. bi++;
  124. int as = a.lines.get(ai);
  125. int bs = b.lines.get(bi);
  126. int ae = a.lines.get(ai + 1);
  127. int be = b.lines.get(bi + 1);
  128. as = trimLeadingWhitespace(a.content, as, ae);
  129. bs = trimLeadingWhitespace(b.content, bs, be);
  130. if (ae - as != be - bs)
  131. return false;
  132. while (as < ae) {
  133. if (a.content[as++] != b.content[bs++])
  134. return false;
  135. }
  136. return true;
  137. }
  138. @Override
  139. protected int hashRegion(final byte[] raw, int ptr, int end) {
  140. int hash = 5381;
  141. ptr = trimLeadingWhitespace(raw, ptr, end);
  142. for (; ptr < end; ptr++)
  143. hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
  144. return hash;
  145. }
  146. };
  147. /** Ignores trailing whitespace. */
  148. public static final RawTextComparator WS_IGNORE_TRAILING = new RawTextComparator() {
  149. @Override
  150. public boolean equals(RawText a, int ai, RawText b, int bi) {
  151. ai++;
  152. bi++;
  153. int as = a.lines.get(ai);
  154. int bs = b.lines.get(bi);
  155. int ae = a.lines.get(ai + 1);
  156. int be = b.lines.get(bi + 1);
  157. ae = trimTrailingWhitespace(a.content, as, ae);
  158. be = trimTrailingWhitespace(b.content, bs, be);
  159. if (ae - as != be - bs)
  160. return false;
  161. while (as < ae) {
  162. if (a.content[as++] != b.content[bs++])
  163. return false;
  164. }
  165. return true;
  166. }
  167. @Override
  168. protected int hashRegion(final byte[] raw, int ptr, int end) {
  169. int hash = 5381;
  170. end = trimTrailingWhitespace(raw, ptr, end);
  171. for (; ptr < end; ptr++)
  172. hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
  173. return hash;
  174. }
  175. };
  176. /** Ignores whitespace occurring between non-whitespace characters. */
  177. public static final RawTextComparator WS_IGNORE_CHANGE = new RawTextComparator() {
  178. @Override
  179. public boolean equals(RawText a, int ai, RawText b, int bi) {
  180. ai++;
  181. bi++;
  182. int as = a.lines.get(ai);
  183. int bs = b.lines.get(bi);
  184. int ae = a.lines.get(ai + 1);
  185. int be = b.lines.get(bi + 1);
  186. ae = trimTrailingWhitespace(a.content, as, ae);
  187. be = trimTrailingWhitespace(b.content, bs, be);
  188. while (as < ae && bs < be) {
  189. byte ac = a.content[as];
  190. byte bc = b.content[bs];
  191. if (ac != bc)
  192. return false;
  193. if (isWhitespace(ac))
  194. as = trimLeadingWhitespace(a.content, as, ae);
  195. else
  196. as++;
  197. if (isWhitespace(bc))
  198. bs = trimLeadingWhitespace(b.content, bs, be);
  199. else
  200. bs++;
  201. }
  202. return as == ae && bs == be;
  203. }
  204. @Override
  205. protected int hashRegion(final byte[] raw, int ptr, int end) {
  206. int hash = 5381;
  207. end = trimTrailingWhitespace(raw, ptr, end);
  208. while (ptr < end) {
  209. byte c = raw[ptr];
  210. hash = ((hash << 5) + hash) + (c & 0xff);
  211. if (isWhitespace(c))
  212. ptr = trimLeadingWhitespace(raw, ptr, end);
  213. else
  214. ptr++;
  215. }
  216. return hash;
  217. }
  218. };
  219. @Override
  220. public int hash(RawText seq, int lno) {
  221. final int begin = seq.lines.get(lno + 1);
  222. final int end = seq.lines.get(lno + 2);
  223. return hashRegion(seq.content, begin, end);
  224. }
  225. @Override
  226. public Edit reduceCommonStartEnd(RawText a, RawText b, Edit e) {
  227. // This is a faster exact match based form that tries to improve
  228. // performance for the common case of the header and trailer of
  229. // a text file not changing at all. After this fast path we use
  230. // the slower path based on the super class' using equals() to
  231. // allow for whitespace ignore modes to still work.
  232. if (e.beginA == e.endA || e.beginB == e.endB)
  233. return e;
  234. byte[] aRaw = a.content;
  235. byte[] bRaw = b.content;
  236. int aPtr = a.lines.get(e.beginA + 1);
  237. int bPtr = a.lines.get(e.beginB + 1);
  238. int aEnd = a.lines.get(e.endA + 1);
  239. int bEnd = b.lines.get(e.endB + 1);
  240. // This can never happen, but the JIT doesn't know that. If we
  241. // define this assertion before the tight while loops below it
  242. // should be able to skip the array bound checks on access.
  243. //
  244. if (aPtr < 0 || bPtr < 0 || aEnd > aRaw.length || bEnd > bRaw.length)
  245. throw new ArrayIndexOutOfBoundsException();
  246. while (aPtr < aEnd && bPtr < bEnd && aRaw[aPtr] == bRaw[bPtr]) {
  247. aPtr++;
  248. bPtr++;
  249. }
  250. while (aPtr < aEnd && bPtr < bEnd && aRaw[aEnd - 1] == bRaw[bEnd - 1]) {
  251. aEnd--;
  252. bEnd--;
  253. }
  254. e.beginA = findForwardLine(a.lines, e.beginA, aPtr);
  255. e.beginB = findForwardLine(b.lines, e.beginB, bPtr);
  256. e.endA = findReverseLine(a.lines, e.endA, aEnd);
  257. final boolean partialA = aEnd < a.lines.get(e.endA + 1);
  258. if (partialA)
  259. bEnd += a.lines.get(e.endA + 1) - aEnd;
  260. e.endB = findReverseLine(b.lines, e.endB, bEnd);
  261. if (!partialA && bEnd < b.lines.get(e.endB + 1))
  262. e.endA++;
  263. return super.reduceCommonStartEnd(a, b, e);
  264. }
  265. private static int findForwardLine(IntList lines, int idx, int ptr) {
  266. final int end = lines.size() - 2;
  267. while (idx < end && lines.get(idx + 2) < ptr)
  268. idx++;
  269. return idx;
  270. }
  271. private static int findReverseLine(IntList lines, int idx, int ptr) {
  272. while (0 < idx && ptr <= lines.get(idx))
  273. idx--;
  274. return idx;
  275. }
  276. /**
  277. * Compute a hash code for a region.
  278. *
  279. * @param raw
  280. * the raw file content.
  281. * @param ptr
  282. * first byte of the region to hash.
  283. * @param end
  284. * 1 past the last byte of the region.
  285. * @return hash code for the region <code>[ptr, end)</code> of raw.
  286. */
  287. protected abstract int hashRegion(byte[] raw, int ptr, int end);
  288. }