You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

RawTextComparator.java 9.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. /*
  2. * Copyright (C) 2009-2010, Google Inc.
  3. * Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de>
  4. * and other copyright owners as documented in the project's IP log.
  5. *
  6. * This program and the accompanying materials are made available
  7. * under the terms of the Eclipse Distribution License v1.0 which
  8. * accompanies this distribution, is reproduced below, and is
  9. * available at http://www.eclipse.org/org/documents/edl-v10.php
  10. *
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * - Neither the name of the Eclipse Foundation, Inc. nor the
  26. * names of its contributors may be used to endorse or promote
  27. * products derived from this software without specific prior
  28. * written permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. */
  44. package org.eclipse.jgit.diff;
  45. import static org.eclipse.jgit.util.RawCharUtil.isWhitespace;
  46. import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace;
  47. import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace;
  48. import org.eclipse.jgit.util.IntList;
  49. /**
  50. * Equivalence function for {@link org.eclipse.jgit.diff.RawText}.
  51. */
  52. public abstract class RawTextComparator extends SequenceComparator<RawText> {
  53. /** No special treatment. */
  54. public static final RawTextComparator DEFAULT = new RawTextComparator() {
  55. @Override
  56. public boolean equals(RawText a, int ai, RawText b, int bi) {
  57. ai++;
  58. bi++;
  59. int as = a.lines.get(ai);
  60. int bs = b.lines.get(bi);
  61. final int ae = a.lines.get(ai + 1);
  62. final int be = b.lines.get(bi + 1);
  63. if (ae - as != be - bs)
  64. return false;
  65. while (as < ae) {
  66. if (a.content[as++] != b.content[bs++])
  67. return false;
  68. }
  69. return true;
  70. }
  71. @Override
  72. protected int hashRegion(byte[] raw, int ptr, int end) {
  73. int hash = 5381;
  74. for (; ptr < end; ptr++)
  75. hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
  76. return hash;
  77. }
  78. };
  79. /** Ignores all whitespace. */
  80. public static final RawTextComparator WS_IGNORE_ALL = new RawTextComparator() {
  81. @Override
  82. public boolean equals(RawText a, int ai, RawText b, int bi) {
  83. ai++;
  84. bi++;
  85. int as = a.lines.get(ai);
  86. int bs = b.lines.get(bi);
  87. int ae = a.lines.get(ai + 1);
  88. int be = b.lines.get(bi + 1);
  89. ae = trimTrailingWhitespace(a.content, as, ae);
  90. be = trimTrailingWhitespace(b.content, bs, be);
  91. while (as < ae && bs < be) {
  92. byte ac = a.content[as];
  93. byte bc = b.content[bs];
  94. while (as < ae - 1 && isWhitespace(ac)) {
  95. as++;
  96. ac = a.content[as];
  97. }
  98. while (bs < be - 1 && isWhitespace(bc)) {
  99. bs++;
  100. bc = b.content[bs];
  101. }
  102. if (ac != bc)
  103. return false;
  104. as++;
  105. bs++;
  106. }
  107. return as == ae && bs == be;
  108. }
  109. @Override
  110. protected int hashRegion(byte[] raw, int ptr, int end) {
  111. int hash = 5381;
  112. for (; ptr < end; ptr++) {
  113. byte c = raw[ptr];
  114. if (!isWhitespace(c))
  115. hash = ((hash << 5) + hash) + (c & 0xff);
  116. }
  117. return hash;
  118. }
  119. };
  120. /**
  121. * Ignore leading whitespace.
  122. **/
  123. public static final RawTextComparator WS_IGNORE_LEADING = new RawTextComparator() {
  124. @Override
  125. public boolean equals(RawText a, int ai, RawText b, int bi) {
  126. ai++;
  127. bi++;
  128. int as = a.lines.get(ai);
  129. int bs = b.lines.get(bi);
  130. int ae = a.lines.get(ai + 1);
  131. int be = b.lines.get(bi + 1);
  132. as = trimLeadingWhitespace(a.content, as, ae);
  133. bs = trimLeadingWhitespace(b.content, bs, be);
  134. if (ae - as != be - bs)
  135. return false;
  136. while (as < ae) {
  137. if (a.content[as++] != b.content[bs++])
  138. return false;
  139. }
  140. return true;
  141. }
  142. @Override
  143. protected int hashRegion(byte[] raw, int ptr, int end) {
  144. int hash = 5381;
  145. ptr = trimLeadingWhitespace(raw, ptr, end);
  146. for (; ptr < end; ptr++)
  147. hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
  148. return hash;
  149. }
  150. };
  151. /** Ignores trailing whitespace. */
  152. public static final RawTextComparator WS_IGNORE_TRAILING = new RawTextComparator() {
  153. @Override
  154. public boolean equals(RawText a, int ai, RawText b, int bi) {
  155. ai++;
  156. bi++;
  157. int as = a.lines.get(ai);
  158. int bs = b.lines.get(bi);
  159. int ae = a.lines.get(ai + 1);
  160. int be = b.lines.get(bi + 1);
  161. ae = trimTrailingWhitespace(a.content, as, ae);
  162. be = trimTrailingWhitespace(b.content, bs, be);
  163. if (ae - as != be - bs)
  164. return false;
  165. while (as < ae) {
  166. if (a.content[as++] != b.content[bs++])
  167. return false;
  168. }
  169. return true;
  170. }
  171. @Override
  172. protected int hashRegion(byte[] raw, int ptr, int end) {
  173. int hash = 5381;
  174. end = trimTrailingWhitespace(raw, ptr, end);
  175. for (; ptr < end; ptr++)
  176. hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
  177. return hash;
  178. }
  179. };
  180. /** Ignores whitespace occurring between non-whitespace characters. */
  181. public static final RawTextComparator WS_IGNORE_CHANGE = new RawTextComparator() {
  182. @Override
  183. public boolean equals(RawText a, int ai, RawText b, int bi) {
  184. ai++;
  185. bi++;
  186. int as = a.lines.get(ai);
  187. int bs = b.lines.get(bi);
  188. int ae = a.lines.get(ai + 1);
  189. int be = b.lines.get(bi + 1);
  190. ae = trimTrailingWhitespace(a.content, as, ae);
  191. be = trimTrailingWhitespace(b.content, bs, be);
  192. while (as < ae && bs < be) {
  193. byte ac = a.content[as];
  194. byte bc = b.content[bs];
  195. if (ac != bc)
  196. return false;
  197. if (isWhitespace(ac))
  198. as = trimLeadingWhitespace(a.content, as, ae);
  199. else
  200. as++;
  201. if (isWhitespace(bc))
  202. bs = trimLeadingWhitespace(b.content, bs, be);
  203. else
  204. bs++;
  205. }
  206. return as == ae && bs == be;
  207. }
  208. @Override
  209. protected int hashRegion(byte[] raw, int ptr, int end) {
  210. int hash = 5381;
  211. end = trimTrailingWhitespace(raw, ptr, end);
  212. while (ptr < end) {
  213. byte c = raw[ptr];
  214. hash = ((hash << 5) + hash) + (c & 0xff);
  215. if (isWhitespace(c))
  216. ptr = trimLeadingWhitespace(raw, ptr, end);
  217. else
  218. ptr++;
  219. }
  220. return hash;
  221. }
  222. };
  223. @Override
  224. public int hash(RawText seq, int lno) {
  225. final int begin = seq.lines.get(lno + 1);
  226. final int end = seq.lines.get(lno + 2);
  227. return hashRegion(seq.content, begin, end);
  228. }
  229. /** {@inheritDoc} */
  230. @Override
  231. public Edit reduceCommonStartEnd(RawText a, RawText b, Edit e) {
  232. // This is a faster exact match based form that tries to improve
  233. // performance for the common case of the header and trailer of
  234. // a text file not changing at all. After this fast path we use
  235. // the slower path based on the super class' using equals() to
  236. // allow for whitespace ignore modes to still work.
  237. if (e.beginA == e.endA || e.beginB == e.endB)
  238. return e;
  239. byte[] aRaw = a.content;
  240. byte[] bRaw = b.content;
  241. int aPtr = a.lines.get(e.beginA + 1);
  242. int bPtr = a.lines.get(e.beginB + 1);
  243. int aEnd = a.lines.get(e.endA + 1);
  244. int bEnd = b.lines.get(e.endB + 1);
  245. // This can never happen, but the JIT doesn't know that. If we
  246. // define this assertion before the tight while loops below it
  247. // should be able to skip the array bound checks on access.
  248. //
  249. if (aPtr < 0 || bPtr < 0 || aEnd > aRaw.length || bEnd > bRaw.length)
  250. throw new ArrayIndexOutOfBoundsException();
  251. while (aPtr < aEnd && bPtr < bEnd && aRaw[aPtr] == bRaw[bPtr]) {
  252. aPtr++;
  253. bPtr++;
  254. }
  255. while (aPtr < aEnd && bPtr < bEnd && aRaw[aEnd - 1] == bRaw[bEnd - 1]) {
  256. aEnd--;
  257. bEnd--;
  258. }
  259. e.beginA = findForwardLine(a.lines, e.beginA, aPtr);
  260. e.beginB = findForwardLine(b.lines, e.beginB, bPtr);
  261. e.endA = findReverseLine(a.lines, e.endA, aEnd);
  262. final boolean partialA = aEnd < a.lines.get(e.endA + 1);
  263. if (partialA)
  264. bEnd += a.lines.get(e.endA + 1) - aEnd;
  265. e.endB = findReverseLine(b.lines, e.endB, bEnd);
  266. if (!partialA && bEnd < b.lines.get(e.endB + 1))
  267. e.endA++;
  268. return super.reduceCommonStartEnd(a, b, e);
  269. }
  270. private static int findForwardLine(IntList lines, int idx, int ptr) {
  271. final int end = lines.size() - 2;
  272. while (idx < end && lines.get(idx + 2) < ptr)
  273. idx++;
  274. return idx;
  275. }
  276. private static int findReverseLine(IntList lines, int idx, int ptr) {
  277. while (0 < idx && ptr <= lines.get(idx))
  278. idx--;
  279. return idx;
  280. }
  281. /**
  282. * Compute a hash code for a region.
  283. *
  284. * @param raw
  285. * the raw file content.
  286. * @param ptr
  287. * first byte of the region to hash.
  288. * @param end
  289. * 1 past the last byte of the region.
  290. * @return hash code for the region <code>[ptr, end)</code> of raw.
  291. */
  292. protected abstract int hashRegion(byte[] raw, int ptr, int end);
  293. }