You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

BinaryDeltaInputStream.java 5.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. /*
  2. * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
  3. *
  4. * This program and the accompanying materials are made available under the
  5. * terms of the Eclipse Distribution License v. 1.0 which is available at
  6. * https://www.eclipse.org/org/documents/edl-v10.php.
  7. *
  8. * SPDX-License-Identifier: BSD-3-Clause
  9. */
  10. package org.eclipse.jgit.util.io;
  11. import java.io.EOFException;
  12. import java.io.IOException;
  13. import java.io.InputStream;
  14. import java.io.StreamCorruptedException;
  15. import java.text.MessageFormat;
  16. import org.eclipse.jgit.internal.JGitText;
  17. /**
  18. * An {@link InputStream} that applies a binary delta to a base on the fly.
  19. * <p>
  20. * Delta application to a base needs random access to the base data. The delta
  21. * is expressed as a sequence of copy and insert instructions. A copy
  22. * instruction has the form "COPY fromOffset length" and says "copy length bytes
  23. * from the base, starting at offset fromOffset, to the result". An insert
  24. * instruction has the form "INSERT length" followed by length bytes and says
  25. * "copy the next length bytes from the delta to the result".
  26. * </p>
  27. * <p>
  28. * These instructions are generated using a content-defined chunking algorithm
  29. * (currently C git uses the standard Rabin variant; but there are others that
  30. * could be used) that identifies equal chunks. It is entirely possible that a
  31. * later copy instruction has a fromOffset that is before the fromOffset of an
  32. * earlier copy instruction.
  33. * </p>
  34. * <p>
  35. * This makes it impossible to stream the base.
  36. * </p>
  37. * <p>
  38. * JGit is limited to 2GB maximum size for the base since array indices are
  39. * signed 32bit values.
  40. *
  41. * @since 5.12
  42. */
  43. public class BinaryDeltaInputStream extends InputStream {
  44. private final byte[] base;
  45. private final InputStream delta;
  46. private long resultLength;
  47. private long toDeliver = -1;
  48. private int fromBase;
  49. private int fromDelta;
  50. private int baseOffset = -1;
  51. /**
  52. * Creates a new {@link BinaryDeltaInputStream} that applies {@code delta}
  53. * to {@code base}.
  54. *
  55. * @param base
  56. * data to apply the delta to
  57. * @param delta
  58. * {@link InputStream} delivering the delta to apply
  59. */
  60. public BinaryDeltaInputStream(byte[] base, InputStream delta) {
  61. this.base = base;
  62. this.delta = delta;
  63. }
  64. @Override
  65. public int read() throws IOException {
  66. int b = readNext();
  67. if (b >= 0) {
  68. toDeliver--;
  69. }
  70. return b;
  71. }
  72. private void initialize() throws IOException {
  73. long baseSize = readVarInt(delta);
  74. if (baseSize > Integer.MAX_VALUE || baseSize < 0
  75. || (int) baseSize != base.length) {
  76. throw new IOException(MessageFormat.format(
  77. JGitText.get().binaryDeltaBaseLengthMismatch,
  78. Integer.valueOf(base.length), Long.valueOf(baseSize)));
  79. }
  80. resultLength = readVarInt(delta);
  81. if (resultLength < 0) {
  82. throw new StreamCorruptedException(
  83. JGitText.get().binaryDeltaInvalidResultLength);
  84. }
  85. toDeliver = resultLength;
  86. baseOffset = 0;
  87. }
  88. private int readNext() throws IOException {
  89. if (baseOffset < 0) {
  90. initialize();
  91. }
  92. if (fromBase > 0) {
  93. fromBase--;
  94. return base[baseOffset++] & 0xFF;
  95. } else if (fromDelta > 0) {
  96. fromDelta--;
  97. return delta.read();
  98. }
  99. int command = delta.read();
  100. if (command < 0) {
  101. return -1;
  102. }
  103. if ((command & 0x80) != 0) {
  104. // Decode offset and length to read from base
  105. long copyOffset = 0;
  106. for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) {
  107. if ((command & i) != 0) {
  108. copyOffset |= ((long) next(delta)) << shift;
  109. }
  110. }
  111. int copySize = 0;
  112. for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) {
  113. if ((command & i) != 0) {
  114. copySize |= next(delta) << shift;
  115. }
  116. }
  117. if (copySize == 0) {
  118. copySize = 0x10000;
  119. }
  120. if (copyOffset > base.length - copySize) {
  121. throw new StreamCorruptedException(MessageFormat.format(
  122. JGitText.get().binaryDeltaInvalidOffset,
  123. Long.valueOf(copyOffset), Integer.valueOf(copySize)));
  124. }
  125. baseOffset = (int) copyOffset;
  126. fromBase = copySize;
  127. return readNext();
  128. } else if (command != 0) {
  129. // The next 'command' bytes come from the delta
  130. fromDelta = command - 1;
  131. return delta.read();
  132. } else {
  133. // Zero is reserved
  134. throw new StreamCorruptedException(
  135. JGitText.get().unsupportedCommand0);
  136. }
  137. }
  138. private int next(InputStream in) throws IOException {
  139. int b = in.read();
  140. if (b < 0) {
  141. throw new EOFException();
  142. }
  143. return b;
  144. }
  145. private long readVarInt(InputStream in) throws IOException {
  146. long val = 0;
  147. int shift = 0;
  148. int b;
  149. do {
  150. b = next(in);
  151. val |= ((long) (b & 0x7f)) << shift;
  152. shift += 7;
  153. } while ((b & 0x80) != 0);
  154. return val;
  155. }
  156. /**
  157. * Tells the expected size of the final result.
  158. *
  159. * @return the size
  160. * @throws IOException
  161. * if the size cannot be determined from {@code delta}
  162. */
  163. public long getExpectedResultSize() throws IOException {
  164. if (baseOffset < 0) {
  165. initialize();
  166. }
  167. return resultLength;
  168. }
  169. /**
  170. * Tells whether the delta has been fully consumed, and the expected number
  171. * of bytes for the combined result have been read from this
  172. * {@link BinaryDeltaInputStream}.
  173. *
  174. * @return whether delta application was successful
  175. */
  176. public boolean isFullyConsumed() {
  177. try {
  178. return toDeliver == 0 && delta.read() < 0;
  179. } catch (IOException e) {
  180. return toDeliver == 0;
  181. }
  182. }
  183. @Override
  184. public void close() throws IOException {
  185. delta.close();
  186. }
  187. }