You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

BinaryDeltaInputStream.java 5.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. /*
  2. * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
  3. *
  4. * This program and the accompanying materials are made available under the
  5. * terms of the Eclipse Distribution License v. 1.0 which is available at
  6. * https://www.eclipse.org/org/documents/edl-v10.php.
  7. *
  8. * SPDX-License-Identifier: BSD-3-Clause
  9. */
  10. package org.eclipse.jgit.util.io;
  11. import java.io.EOFException;
  12. import java.io.IOException;
  13. import java.io.InputStream;
  14. import java.io.StreamCorruptedException;
  15. import java.text.MessageFormat;
  16. import org.eclipse.jgit.internal.JGitText;
  17. /**
  18. * An {@link InputStream} that applies a binary delta to a base on the fly.
  19. * <p>
  20. * Delta application to a base needs random access to the base data. The delta
  21. * is expressed as a sequence of copy and insert instructions. A copy
  22. * instruction has the form "COPY fromOffset length" and says "copy length bytes
  23. * from the base, starting at offset fromOffset, to the result". An insert
  24. * instruction has the form "INSERT length" followed by length bytes and says
  25. * "copy the next length bytes from the delta to the result".
  26. * </p>
  27. * <p>
  28. * These instructions are generated using a content-defined chunking algorithm
  29. * (currently C git uses the standard Rabin variant; but there are others that
  30. * could be used) that identifies equal chunks. It is entirely possible that a
  31. * later copy instruction has a fromOffset that is before the fromOffset of an
  32. * earlier copy instruction.
  33. * </p>
  34. * <p>
  35. * This makes it impossible to stream the base.
  36. * </p>
  37. * <p>
  38. * JGit is limited to 2GB maximum size for the base since array indices are
  39. * signed 32bit values.
  40. *
  41. * @since 5.12
  42. */
  43. public class BinaryDeltaInputStream extends InputStream {
  44. private final byte[] base;
  45. private final InputStream delta;
  46. private long resultLength;
  47. private long toDeliver = -1;
  48. private int fromBase;
  49. private int fromDelta;
  50. private int baseOffset = -1;
  51. /**
  52. * Creates a new {@link BinaryDeltaInputStream} that applies {@code delta}
  53. * to {@code base}.
  54. *
  55. * @param base
  56. * data to apply the delta to
  57. * @param delta
  58. * {@link InputStream} delivering the delta to apply
  59. */
  60. public BinaryDeltaInputStream(byte[] base, InputStream delta) {
  61. this.base = base;
  62. this.delta = delta;
  63. }
  64. @Override
  65. public int read() throws IOException {
  66. int b = readNext();
  67. if (b >= 0) {
  68. toDeliver--;
  69. }
  70. return b;
  71. }
  72. @Override
  73. public int read(byte[] b, int off, int len) throws IOException {
  74. return super.read(b, off, len);
  75. }
  76. private void initialize() throws IOException {
  77. long baseSize = readVarInt(delta);
  78. if (baseSize > Integer.MAX_VALUE || baseSize < 0
  79. || (int) baseSize != base.length) {
  80. throw new IOException(MessageFormat.format(
  81. JGitText.get().binaryDeltaBaseLengthMismatch,
  82. Integer.valueOf(base.length), Long.valueOf(baseSize)));
  83. }
  84. resultLength = readVarInt(delta);
  85. if (resultLength < 0) {
  86. throw new StreamCorruptedException(
  87. JGitText.get().binaryDeltaInvalidResultLength);
  88. }
  89. toDeliver = resultLength;
  90. baseOffset = 0;
  91. }
  92. private int readNext() throws IOException {
  93. if (baseOffset < 0) {
  94. initialize();
  95. }
  96. if (fromBase > 0) {
  97. fromBase--;
  98. return base[baseOffset++] & 0xFF;
  99. } else if (fromDelta > 0) {
  100. fromDelta--;
  101. return delta.read();
  102. }
  103. int command = delta.read();
  104. if (command < 0) {
  105. return -1;
  106. }
  107. if ((command & 0x80) != 0) {
  108. // Decode offset and length to read from base
  109. long copyOffset = 0;
  110. for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) {
  111. if ((command & i) != 0) {
  112. copyOffset |= ((long) next(delta)) << shift;
  113. }
  114. }
  115. int copySize = 0;
  116. for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) {
  117. if ((command & i) != 0) {
  118. copySize |= next(delta) << shift;
  119. }
  120. }
  121. if (copySize == 0) {
  122. copySize = 0x10000;
  123. }
  124. if (copyOffset > base.length - copySize) {
  125. throw new StreamCorruptedException(MessageFormat.format(
  126. JGitText.get().binaryDeltaInvalidOffset,
  127. Long.valueOf(copyOffset), Integer.valueOf(copySize)));
  128. }
  129. baseOffset = (int) copyOffset;
  130. fromBase = copySize;
  131. return readNext();
  132. } else if (command != 0) {
  133. // The next 'command' bytes come from the delta
  134. fromDelta = command - 1;
  135. return delta.read();
  136. } else {
  137. // Zero is reserved
  138. throw new StreamCorruptedException(
  139. JGitText.get().unsupportedCommand0);
  140. }
  141. }
  142. private int next(InputStream in) throws IOException {
  143. int b = in.read();
  144. if (b < 0) {
  145. throw new EOFException();
  146. }
  147. return b;
  148. }
  149. private long readVarInt(InputStream in) throws IOException {
  150. long val = 0;
  151. int shift = 0;
  152. int b;
  153. do {
  154. b = next(in);
  155. val |= ((long) (b & 0x7f)) << shift;
  156. shift += 7;
  157. } while ((b & 0x80) != 0);
  158. return val;
  159. }
  160. /**
  161. * Tells the expected size of the final result.
  162. *
  163. * @return the size
  164. * @throws IOException
  165. * if the size cannot be determined from {@code delta}
  166. */
  167. public long getExpectedResultSize() throws IOException {
  168. if (baseOffset < 0) {
  169. initialize();
  170. }
  171. return resultLength;
  172. }
  173. /**
  174. * Tells whether the delta has been fully consumed, and the expected number
  175. * of bytes for the combined result have been read from this
  176. * {@link BinaryDeltaInputStream}.
  177. *
  178. * @return whether delta application was successful
  179. */
  180. public boolean isFullyConsumed() {
  181. try {
  182. return toDeliver == 0 && delta.read() < 0;
  183. } catch (IOException e) {
  184. return toDeliver == 0;
  185. }
  186. }
  187. @Override
  188. public void close() throws IOException {
  189. delta.close();
  190. }
  191. }