You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DeltaEncoder.java 8.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. /*
  2. * Copyright (C) 2010, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.internal.storage.pack;
  44. import java.io.IOException;
  45. import java.io.OutputStream;
  46. import org.eclipse.jgit.lib.Constants;
  47. /** Encodes an instruction stream for {@link BinaryDelta}. */
  48. public class DeltaEncoder {
  49. /**
  50. * Maximum number of bytes to be copied in pack v2 format.
  51. * <p>
  52. * Historical limitations have this at 64k, even though current delta
  53. * decoders recognize larger copy instructions.
  54. */
  55. private static final int MAX_V2_COPY = 0x10000;
  56. /*
  57. * Maximum number of bytes to be copied in pack v3 format.
  58. *
  59. * Current delta decoders can recognize a copy instruction with a count that
  60. * is this large, but the historical limitation of {@link MAX_V2_COPY} is
  61. * still used.
  62. */
  63. // private static final int MAX_V3_COPY = (0xff << 16) | (0xff << 8) | 0xff;
  64. /** Maximum number of bytes used by a copy instruction. */
  65. private static final int MAX_COPY_CMD_SIZE = 8;
  66. /** Maximum length that an an insert command can encode at once. */
  67. private static final int MAX_INSERT_DATA_SIZE = 127;
  68. private final OutputStream out;
  69. private final byte[] buf = new byte[MAX_COPY_CMD_SIZE * 4];
  70. private final int limit;
  71. private int size;
  72. /**
  73. * Create an encoder with no upper bound on the instruction stream size.
  74. *
  75. * @param out
  76. * buffer to store the instructions written.
  77. * @param baseSize
  78. * size of the base object, in bytes.
  79. * @param resultSize
  80. * size of the resulting object, after applying this instruction
  81. * stream to the base object, in bytes.
  82. * @throws IOException
  83. * the output buffer cannot store the instruction stream's
  84. * header with the size fields.
  85. */
  86. public DeltaEncoder(OutputStream out, long baseSize, long resultSize)
  87. throws IOException {
  88. this(out, baseSize, resultSize, 0);
  89. }
  90. /**
  91. * Create an encoder with an upper limit on the instruction size.
  92. *
  93. * @param out
  94. * buffer to store the instructions written.
  95. * @param baseSize
  96. * size of the base object, in bytes.
  97. * @param resultSize
  98. * size of the resulting object, after applying this instruction
  99. * stream to the base object, in bytes.
  100. * @param limit
  101. * maximum number of bytes to write to the out buffer declaring
  102. * the stream is over limit and should be discarded. May be 0 to
  103. * specify an infinite limit.
  104. * @throws IOException
  105. * the output buffer cannot store the instruction stream's
  106. * header with the size fields.
  107. */
  108. public DeltaEncoder(OutputStream out, long baseSize, long resultSize,
  109. int limit) throws IOException {
  110. this.out = out;
  111. this.limit = limit;
  112. writeVarint(baseSize);
  113. writeVarint(resultSize);
  114. }
  115. private void writeVarint(long sz) throws IOException {
  116. int p = 0;
  117. while (sz >= 0x80) {
  118. buf[p++] = (byte) (0x80 | (((int) sz) & 0x7f));
  119. sz >>>= 7;
  120. }
  121. buf[p++] = (byte) (((int) sz) & 0x7f);
  122. size += p;
  123. if (limit == 0 || size < limit)
  124. out.write(buf, 0, p);
  125. }
  126. /** @return current size of the delta stream, in bytes. */
  127. public int getSize() {
  128. return size;
  129. }
  130. /**
  131. * Insert a literal string of text, in UTF-8 encoding.
  132. *
  133. * @param text
  134. * the string to insert.
  135. * @return true if the insert fits within the limit; false if the insert
  136. * would cause the instruction stream to exceed the limit.
  137. * @throws IOException
  138. * the instruction buffer can't store the instructions.
  139. */
  140. public boolean insert(String text) throws IOException {
  141. return insert(Constants.encode(text));
  142. }
  143. /**
  144. * Insert a literal binary sequence.
  145. *
  146. * @param text
  147. * the binary to insert.
  148. * @return true if the insert fits within the limit; false if the insert
  149. * would cause the instruction stream to exceed the limit.
  150. * @throws IOException
  151. * the instruction buffer can't store the instructions.
  152. */
  153. public boolean insert(byte[] text) throws IOException {
  154. return insert(text, 0, text.length);
  155. }
  156. /**
  157. * Insert a literal binary sequence.
  158. *
  159. * @param text
  160. * the binary to insert.
  161. * @param off
  162. * offset within {@code text} to start copying from.
  163. * @param cnt
  164. * number of bytes to insert.
  165. * @return true if the insert fits within the limit; false if the insert
  166. * would cause the instruction stream to exceed the limit.
  167. * @throws IOException
  168. * the instruction buffer can't store the instructions.
  169. */
  170. public boolean insert(byte[] text, int off, int cnt)
  171. throws IOException {
  172. if (cnt <= 0)
  173. return true;
  174. if (limit != 0) {
  175. int hdrs = cnt / MAX_INSERT_DATA_SIZE;
  176. if (cnt % MAX_INSERT_DATA_SIZE != 0)
  177. hdrs++;
  178. if (limit < size + hdrs + cnt)
  179. return false;
  180. }
  181. do {
  182. int n = Math.min(MAX_INSERT_DATA_SIZE, cnt);
  183. out.write((byte) n);
  184. out.write(text, off, n);
  185. off += n;
  186. cnt -= n;
  187. size += 1 + n;
  188. } while (0 < cnt);
  189. return true;
  190. }
  191. /**
  192. * Create a copy instruction to copy from the base object.
  193. *
  194. * @param offset
  195. * position in the base object to copy from. This is absolute,
  196. * from the beginning of the base.
  197. * @param cnt
  198. * number of bytes to copy.
  199. * @return true if the copy fits within the limit; false if the copy
  200. * would cause the instruction stream to exceed the limit.
  201. * @throws IOException
  202. * the instruction buffer cannot store the instructions.
  203. */
  204. public boolean copy(long offset, int cnt) throws IOException {
  205. if (cnt == 0)
  206. return true;
  207. int p = 0;
  208. // We cannot encode more than MAX_V2_COPY bytes in a single
  209. // command, so encode that much and start a new command.
  210. // This limit is imposed by the pack file format rules.
  211. //
  212. while (MAX_V2_COPY < cnt) {
  213. p = encodeCopy(p, offset, MAX_V2_COPY);
  214. offset += MAX_V2_COPY;
  215. cnt -= MAX_V2_COPY;
  216. if (buf.length < p + MAX_COPY_CMD_SIZE) {
  217. if (limit != 0 && limit < size + p)
  218. return false;
  219. out.write(buf, 0, p);
  220. size += p;
  221. p = 0;
  222. }
  223. }
  224. p = encodeCopy(p, offset, cnt);
  225. if (limit != 0 && limit < size + p)
  226. return false;
  227. out.write(buf, 0, p);
  228. size += p;
  229. return true;
  230. }
  231. private int encodeCopy(int p, long offset, int cnt) {
  232. int cmd = 0x80;
  233. final int cmdPtr = p++; // save room for the command
  234. byte b;
  235. if ((b = (byte) (offset & 0xff)) != 0) {
  236. cmd |= 0x01;
  237. buf[p++] = b;
  238. }
  239. if ((b = (byte) ((offset >>> 8) & 0xff)) != 0) {
  240. cmd |= 0x02;
  241. buf[p++] = b;
  242. }
  243. if ((b = (byte) ((offset >>> 16) & 0xff)) != 0) {
  244. cmd |= 0x04;
  245. buf[p++] = b;
  246. }
  247. if ((b = (byte) ((offset >>> 24) & 0xff)) != 0) {
  248. cmd |= 0x08;
  249. buf[p++] = b;
  250. }
  251. if (cnt != MAX_V2_COPY) {
  252. if ((b = (byte) (cnt & 0xff)) != 0) {
  253. cmd |= 0x10;
  254. buf[p++] = b;
  255. }
  256. if ((b = (byte) ((cnt >>> 8) & 0xff)) != 0) {
  257. cmd |= 0x20;
  258. buf[p++] = b;
  259. }
  260. if ((b = (byte) ((cnt >>> 16) & 0xff)) != 0) {
  261. cmd |= 0x40;
  262. buf[p++] = b;
  263. }
  264. }
  265. buf[cmdPtr] = (byte) cmd;
  266. return p;
  267. }
  268. }