aboutsummaryrefslogtreecommitdiffstats
path: root/3rdparty/granite/git/object/packed.php
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/granite/git/object/packed.php')
-rw-r--r--3rdparty/granite/git/object/packed.php304
1 files changed, 304 insertions, 0 deletions
diff --git a/3rdparty/granite/git/object/packed.php b/3rdparty/granite/git/object/packed.php
new file mode 100644
index 00000000000..7e8d663b32e
--- /dev/null
+++ b/3rdparty/granite/git/object/packed.php
@@ -0,0 +1,304 @@
+<?php
+/**
+ * Packed - provides a 'packed object' object
+ *
+ * PHP version 5.3
+ *
+ * @category Git
+ * @package Granite
+ * @author Craig Roberts <craig0990@googlemail.com>
+ * @license http://www.opensource.org/licenses/mit-license.php MIT Expat License
+ * @link http://craig0990.github.com/Granite/
+ */
+
+namespace Granite\Git\Object;
+use \UnexpectedValueException as UnexpectedValueException;
+
+/**
+ * Packed represents a packed object in the Git repository
+ *
+ * @category Git
+ * @package Granite
+ * @author Craig Roberts <craig0990@googlemail.com>
+ * @license http://www.opensource.org/licenses/mit-license.php MIT Expat License
+ * @link http://craig0990.github.com/Granite/
+ */
+class Packed extends Raw
+{
+
+ /**
+ * The name of the packfile being read
+ */
+ private $_packfile;
+
+ /**
+ * Added to the object size to make a 'best-guess' effort at how much compressed
+ * data to read - should be reimplemented, ideally with streams.
+ */
+ const OBJ_PADDING = 512;
+
+ /**
+ * Reads the object data from the compressed data at $offset in $packfile
+ *
+ * @param string $packfile The path to the packfile
+ * @param int $offset The offset of the object data
+ */
+ public function __construct($packfile, $offset)
+ {
+ $this->_packfile = $packfile;
+
+ list($this->type, $this->size, $this->content)
+ = $this->_readPackedObject($offset);
+ }
+
+ /**
+ * Reads the object data at $this->_offset
+ *
+ * @param int $offset Offset of the object header
+ *
+ * @return array Containing the type, size and object data
+ */
+ private function _readPackedObject($offset)
+ {
+ $file = fopen($this->_packfile, 'rb');
+ fseek($file, $offset);
+ // Read the type and uncompressed size from the object header
+ list($type, $size) = $this->_readHeader($file, $offset);
+ $object_offset = ftell($file);
+
+ if ($type == self::OBJ_OFS_DELTA || $type == self::OBJ_REF_DELTA) {
+ return $this->_unpackDeltified(
+ $file, $offset, $object_offset, $type, $size
+ );
+ }
+
+ $content = gzuncompress(fread($file, $size + self::OBJ_PADDING), $size);
+
+ return array($type, $size, $content);
+ }
+
+ /**
+ * Reads a packed object header, returning the type and the size. For more
+ * detailed information, refer to the @see tag.
+ *
+ * From the @see tag: "Each byte is really 7 bits of data, with the first bit
+ * being used to say if that hunk is the last one or not before the data starts.
+ * If the first bit is a 1, you will read another byte, otherwise the data starts
+ * next. The first 3 bits in the first byte specifies the type of data..."
+ *
+ * @param handle $file File handle to read
+ * @param int $offset Offset of the object header
+ *
+ * @return array Containing the type and the size
+ * @see http://book.git-scm.com/7_the_packfile.html
+ */
+ private function _readHeader($file, $offset)
+ {
+ // Read the object header byte-by-byte
+ fseek($file, $offset);
+ $byte = ord(fgetc($file));
+ /**
+ * Bit-shift right by four, then ignore the first bit with a bitwise AND
+ * This gives us the object type in binary:
+ * 001 commit self::OBJ_COMMIT
+ * 010 tree self::OBJ_TREE
+ * 011 blob self::OBJ_BLOB
+ * 100 tag self::OBJ_TAG
+ * 110 offset delta self::OBJ_OFS_DELTA
+ * 111 ref delta self::OBJ_REF_DELTA
+ *
+ * (000 is undefined, 101 is not currently in use)
+ * See http://book.git-scm.com/7_the_packfile.html for details
+ */
+ $type = ($byte >> 4) & 0x07;
+
+ // Read the last four bits of the first byte, used to find the size
+ $size = $byte & 0x0F;
+
+ /**
+ * $shift initially set to four, since we use the last four bits of the first
+ * byte
+ *
+ * $byte & 0x80 checks the initial bit is set to 1 (i.e. keep reading data)
+ *
+ * Finally, $shift is incremented by seven for each consecutive byte (because
+ * we ignore the initial bit)
+ */
+ for ($shift = 4; $byte & 0x80; $shift += 7) {
+ $byte = ord(fgetc($file));
+ /**
+ * The size is ANDed against 0x7F to strip the initial bit, then
+ * bitshifted by left $shift (4 or 7, depending on whether it's the
+ * initial byte) and ORed against the existing binary $size. This
+ * continuously increments the $size variable.
+ */
+ $size |= (($byte & 0x7F) << $shift);
+ }
+
+ return array($type, $size);
+ }
+
+ /**
+ * Unpacks a deltified object located at $offset in $file
+ *
+ * @param handle $file File handle to read
+ * @param int $offset Offset of the object data
+ * @param int $object_offset Offset of the object data, past the header
+ * @param int $type The object type, either OBJ_REF_DELTA
+ or OBJ_OFS_DELTA
+ * @param int $size The expected size of the uncompressed data
+ *
+ * @return array Containing the type, size and object data
+ */
+ private function _unpackDeltified($file, $offset, $object_offset, $type, $size)
+ {
+ fseek($file, $object_offset);
+
+ if ($type == self::OBJ_REF_DELTA) {
+
+ $base_sha = bin2hex(fread($file, 20));
+
+ $path = substr($this->_packfile, 0, strpos($this->_packfile, '.git')+5);
+ $base = Raw::factory($path, $base_sha);
+ $type = $base->type();
+ $base = $base->content();
+
+ $delta = gzuncompress(
+ fread($file, $size + self::OBJ_PADDING), $size
+ );
+
+ $content = $this->_applyDelta($base, $delta);
+
+ } elseif ($type == self::OBJ_OFS_DELTA) {
+
+ // 20 = maximum varint size according to Glip
+ $data = fread($file, $size + self::OBJ_PADDING + 20);
+
+ list($base_offset, $length) = $this->_bigEndianNumber($data);
+
+ $delta = gzuncompress(substr($data, $length), $size);
+ unset($data);
+
+ $base_offset = $offset - $base_offset;
+ list($type, $size, $base) = $this->_readPackedObject($base_offset);
+
+ $content = $this->_applyDelta($base, $delta);
+
+ } else {
+ throw new UnexpectedValueException(
+ "Unknown type $type for deltified object"
+ );
+ }
+
+ return array($type, strlen($content), $content);
+ }
+
+ /**
+ * Applies the $delta byte-sequence to $base and returns the
+ * resultant binary string.
+ *
+ * This code is modified from Grit (see below), the Ruby
+ * implementation used for GitHub under an MIT license.
+ *
+ * @param string $base The base string for the delta to be applied to
+ * @param string $delta The delta string to apply
+ *
+ * @return string The patched binary string
+ * @see
+ * https://github.com/mojombo/grit/blob/master/lib/grit/git-ruby/internal/pack.rb
+ */
+ private function _applyDelta($base, $delta)
+ {
+ $pos = 0;
+ $src_size = $this->_varint($delta, $pos);
+ $dst_size = $this->_varint($delta, $pos);
+
+ if ($src_size !== strlen($base)) {
+ throw new UnexpectedValueException(
+ 'Expected base delta size ' . strlen($base) . ' does not match the expected '
+ . "value $src_size"
+ );
+ }
+
+ $dest = "";
+ while ($pos < strlen($delta)) {
+ $byte = ord($delta{$pos++});
+
+ if ($byte & 0x80) {
+ /* copy a part of $base */
+ $offset = 0;
+ if ($byte & 0x01) $offset = ord($delta{$pos++});
+ if ($byte & 0x02) $offset |= ord($delta{$pos++}) << 8;
+ if ($byte & 0x04) $offset |= ord($delta{$pos++}) << 16;
+ if ($byte & 0x08) $offset |= ord($delta{$pos++}) << 24;
+ $length = 0;
+ if ($byte & 0x10) $length = ord($delta{$pos++});
+ if ($byte & 0x20) $length |= ord($delta{$pos++}) << 8;
+ if ($byte & 0x40) $length |= ord($delta{$pos++}) << 16;
+ if ($length == 0) $length = 0x10000;
+ $dest .= substr($base, $offset, $length);
+ } else {
+ /* take the next $byte bytes as they are */
+ $dest .= substr($delta, $pos, $byte);
+ $pos += $byte;
+ }
+ }
+
+ if (strlen($dest) !== $dst_size) {
+ throw new UnexpectedValueException(
+ "Deltified string expected to be $dst_size bytes, but actually "
+ . strlen($dest) . ' bytes'
+ );
+ }
+
+ return $dest;
+ }
+
+ /**
+ * Parse a Git varint (variable-length integer). Used in the `_applyDelta()`
+ * method to read the delta header.
+ *
+ * @param string $string The string to parse
+ * @param int &$pos The position in the string to read from
+ *
+ * @return int The integer value
+ */
+ private function _varint($string, &$pos = 0)
+ {
+ $varint = 0;
+ $bitmask = 0x80;
+ for ($i = 0; $bitmask & 0x80; $i += 7) {
+ $bitmask = ord($string{$pos++});
+ $varint |= (($bitmask & 0x7F) << $i);
+ }
+ return $varint;
+ }
+
+ /**
+ * Decodes a big endian modified base 128 number (refer to @see tag); this only
+ * appears to be used in one place, the offset delta in packfiles. The offset
+ * is the number of bytes to seek back from the start of the delta object to find
+ * the base object.
+ *
+ * This code has been implemented using the C code given in the @see tag below.
+ *
+ * @param string &$data The data to read from and decode the number
+ *
+ * @return Array Containing the base offset (number of bytes to seek back) and
+ * the length to use when reading the delta
+ * @see http://git.rsbx.net/Documents/Git_Data_Formats.txt
+ */
+ private function _bigEndianNumber(&$data)
+ {
+ $i = 0;
+ $byte = ord($data{$i++});
+ $number = $byte & 0x7F;
+ while ($byte & 0x80) {
+ $byte = ord($data{$i++});
+ $number = (($number + 1) << 7) | ($byte & 0x7F);
+ }
+
+ return array($number, $i);
+ }
+
+}