diff options
Diffstat (limited to '3rdparty/granite/git/object/packed.php')
-rw-r--r-- | 3rdparty/granite/git/object/packed.php | 304 |
1 files changed, 304 insertions, 0 deletions
diff --git a/3rdparty/granite/git/object/packed.php b/3rdparty/granite/git/object/packed.php new file mode 100644 index 00000000000..7e8d663b32e --- /dev/null +++ b/3rdparty/granite/git/object/packed.php @@ -0,0 +1,304 @@ +<?php +/** + * Packed - provides a 'packed object' object + * + * PHP version 5.3 + * + * @category Git + * @package Granite + * @author Craig Roberts <craig0990@googlemail.com> + * @license http://www.opensource.org/licenses/mit-license.php MIT Expat License + * @link http://craig0990.github.com/Granite/ + */ + +namespace Granite\Git\Object; +use \UnexpectedValueException as UnexpectedValueException; + +/** + * Packed represents a packed object in the Git repository + * + * @category Git + * @package Granite + * @author Craig Roberts <craig0990@googlemail.com> + * @license http://www.opensource.org/licenses/mit-license.php MIT Expat License + * @link http://craig0990.github.com/Granite/ + */ +class Packed extends Raw +{ + + /** + * The name of the packfile being read + */ + private $_packfile; + + /** + * Added to the object size to make a 'best-guess' effort at how much compressed + * data to read - should be reimplemented, ideally with streams. + */ + const OBJ_PADDING = 512; + + /** + * Reads the object data from the compressed data at $offset in $packfile + * + * @param string $packfile The path to the packfile + * @param int $offset The offset of the object data + */ + public function __construct($packfile, $offset) + { + $this->_packfile = $packfile; + + list($this->type, $this->size, $this->content) + = $this->_readPackedObject($offset); + } + + /** + * Reads the object data at $this->_offset + * + * @param int $offset Offset of the object header + * + * @return array Containing the type, size and object data + */ + private function _readPackedObject($offset) + { + $file = fopen($this->_packfile, 'rb'); + fseek($file, $offset); + // Read the type and uncompressed size from the object header + list($type, $size) = $this->_readHeader($file, $offset); + $object_offset = ftell($file); + + if ($type == self::OBJ_OFS_DELTA || $type == self::OBJ_REF_DELTA) { + return $this->_unpackDeltified( + $file, $offset, $object_offset, $type, $size + ); + } + + $content = gzuncompress(fread($file, $size + self::OBJ_PADDING), $size); + + return array($type, $size, $content); + } + + /** + * Reads a packed object header, returning the type and the size. For more + * detailed information, refer to the @see tag. + * + * From the @see tag: "Each byte is really 7 bits of data, with the first bit + * being used to say if that hunk is the last one or not before the data starts. + * If the first bit is a 1, you will read another byte, otherwise the data starts + * next. The first 3 bits in the first byte specifies the type of data..." + * + * @param handle $file File handle to read + * @param int $offset Offset of the object header + * + * @return array Containing the type and the size + * @see http://book.git-scm.com/7_the_packfile.html + */ + private function _readHeader($file, $offset) + { + // Read the object header byte-by-byte + fseek($file, $offset); + $byte = ord(fgetc($file)); + /** + * Bit-shift right by four, then ignore the first bit with a bitwise AND + * This gives us the object type in binary: + * 001 commit self::OBJ_COMMIT + * 010 tree self::OBJ_TREE + * 011 blob self::OBJ_BLOB + * 100 tag self::OBJ_TAG + * 110 offset delta self::OBJ_OFS_DELTA + * 111 ref delta self::OBJ_REF_DELTA + * + * (000 is undefined, 101 is not currently in use) + * See http://book.git-scm.com/7_the_packfile.html for details + */ + $type = ($byte >> 4) & 0x07; + + // Read the last four bits of the first byte, used to find the size + $size = $byte & 0x0F; + + /** + * $shift initially set to four, since we use the last four bits of the first + * byte + * + * $byte & 0x80 checks the initial bit is set to 1 (i.e. keep reading data) + * + * Finally, $shift is incremented by seven for each consecutive byte (because + * we ignore the initial bit) + */ + for ($shift = 4; $byte & 0x80; $shift += 7) { + $byte = ord(fgetc($file)); + /** + * The size is ANDed against 0x7F to strip the initial bit, then + * bitshifted by left $shift (4 or 7, depending on whether it's the + * initial byte) and ORed against the existing binary $size. This + * continuously increments the $size variable. + */ + $size |= (($byte & 0x7F) << $shift); + } + + return array($type, $size); + } + + /** + * Unpacks a deltified object located at $offset in $file + * + * @param handle $file File handle to read + * @param int $offset Offset of the object data + * @param int $object_offset Offset of the object data, past the header + * @param int $type The object type, either OBJ_REF_DELTA + or OBJ_OFS_DELTA + * @param int $size The expected size of the uncompressed data + * + * @return array Containing the type, size and object data + */ + private function _unpackDeltified($file, $offset, $object_offset, $type, $size) + { + fseek($file, $object_offset); + + if ($type == self::OBJ_REF_DELTA) { + + $base_sha = bin2hex(fread($file, 20)); + + $path = substr($this->_packfile, 0, strpos($this->_packfile, '.git')+5); + $base = Raw::factory($path, $base_sha); + $type = $base->type(); + $base = $base->content(); + + $delta = gzuncompress( + fread($file, $size + self::OBJ_PADDING), $size + ); + + $content = $this->_applyDelta($base, $delta); + + } elseif ($type == self::OBJ_OFS_DELTA) { + + // 20 = maximum varint size according to Glip + $data = fread($file, $size + self::OBJ_PADDING + 20); + + list($base_offset, $length) = $this->_bigEndianNumber($data); + + $delta = gzuncompress(substr($data, $length), $size); + unset($data); + + $base_offset = $offset - $base_offset; + list($type, $size, $base) = $this->_readPackedObject($base_offset); + + $content = $this->_applyDelta($base, $delta); + + } else { + throw new UnexpectedValueException( + "Unknown type $type for deltified object" + ); + } + + return array($type, strlen($content), $content); + } + + /** + * Applies the $delta byte-sequence to $base and returns the + * resultant binary string. + * + * This code is modified from Grit (see below), the Ruby + * implementation used for GitHub under an MIT license. + * + * @param string $base The base string for the delta to be applied to + * @param string $delta The delta string to apply + * + * @return string The patched binary string + * @see + * https://github.com/mojombo/grit/blob/master/lib/grit/git-ruby/internal/pack.rb + */ + private function _applyDelta($base, $delta) + { + $pos = 0; + $src_size = $this->_varint($delta, $pos); + $dst_size = $this->_varint($delta, $pos); + + if ($src_size !== strlen($base)) { + throw new UnexpectedValueException( + 'Expected base delta size ' . strlen($base) . ' does not match the expected ' + . "value $src_size" + ); + } + + $dest = ""; + while ($pos < strlen($delta)) { + $byte = ord($delta{$pos++}); + + if ($byte & 0x80) { + /* copy a part of $base */ + $offset = 0; + if ($byte & 0x01) $offset = ord($delta{$pos++}); + if ($byte & 0x02) $offset |= ord($delta{$pos++}) << 8; + if ($byte & 0x04) $offset |= ord($delta{$pos++}) << 16; + if ($byte & 0x08) $offset |= ord($delta{$pos++}) << 24; + $length = 0; + if ($byte & 0x10) $length = ord($delta{$pos++}); + if ($byte & 0x20) $length |= ord($delta{$pos++}) << 8; + if ($byte & 0x40) $length |= ord($delta{$pos++}) << 16; + if ($length == 0) $length = 0x10000; + $dest .= substr($base, $offset, $length); + } else { + /* take the next $byte bytes as they are */ + $dest .= substr($delta, $pos, $byte); + $pos += $byte; + } + } + + if (strlen($dest) !== $dst_size) { + throw new UnexpectedValueException( + "Deltified string expected to be $dst_size bytes, but actually " + . strlen($dest) . ' bytes' + ); + } + + return $dest; + } + + /** + * Parse a Git varint (variable-length integer). Used in the `_applyDelta()` + * method to read the delta header. + * + * @param string $string The string to parse + * @param int &$pos The position in the string to read from + * + * @return int The integer value + */ + private function _varint($string, &$pos = 0) + { + $varint = 0; + $bitmask = 0x80; + for ($i = 0; $bitmask & 0x80; $i += 7) { + $bitmask = ord($string{$pos++}); + $varint |= (($bitmask & 0x7F) << $i); + } + return $varint; + } + + /** + * Decodes a big endian modified base 128 number (refer to @see tag); this only + * appears to be used in one place, the offset delta in packfiles. The offset + * is the number of bytes to seek back from the start of the delta object to find + * the base object. + * + * This code has been implemented using the C code given in the @see tag below. + * + * @param string &$data The data to read from and decode the number + * + * @return Array Containing the base offset (number of bytes to seek back) and + * the length to use when reading the delta + * @see http://git.rsbx.net/Documents/Git_Data_Formats.txt + */ + private function _bigEndianNumber(&$data) + { + $i = 0; + $byte = ord($data{$i++}); + $number = $byte & 0x7F; + while ($byte & 0x80) { + $byte = ord($data{$i++}); + $number = (($number + 1) << 7) | ($byte & 0x7F); + } + + return array($number, $i); + } + +} |