* @license http://www.opensource.org/licenses/mit-license.php MIT Expat License * @link http://craig0990.github.com/Granite/ */ namespace Granite\Git\Object; use \UnexpectedValueException as UnexpectedValueException; /** * Packed represents a packed object in the Git repository * * @category Git * @package Granite * @author Craig Roberts * @license http://www.opensource.org/licenses/mit-license.php MIT Expat License * @link http://craig0990.github.com/Granite/ */ class Packed extends Raw { /** * The name of the packfile being read */ private $_packfile; /** * Added to the object size to make a 'best-guess' effort at how much compressed * data to read - should be reimplemented, ideally with streams. */ const OBJ_PADDING = 512; /** * Reads the object data from the compressed data at $offset in $packfile * * @param string $packfile The path to the packfile * @param int $offset The offset of the object data */ public function __construct($packfile, $offset) { $this->_packfile = $packfile; list($this->type, $this->size, $this->content) = $this->_readPackedObject($offset); } /** * Reads the object data at $this->_offset * * @param int $offset Offset of the object header * * @return array Containing the type, size and object data */ private function _readPackedObject($offset) { $file = fopen($this->_packfile, 'rb'); fseek($file, $offset); // Read the type and uncompressed size from the object header list($type, $size) = $this->_readHeader($file, $offset); $object_offset = ftell($file); if ($type == self::OBJ_OFS_DELTA || $type == self::OBJ_REF_DELTA) { return $this->_unpackDeltified( $file, $offset, $object_offset, $type, $size ); } $content = gzuncompress(fread($file, $size + self::OBJ_PADDING), $size); return array($type, $size, $content); } /** * Reads a packed object header, returning the type and the size. For more * detailed information, refer to the @see tag. * * From the @see tag: "Each byte is really 7 bits of data, with the first bit * being used to say if that hunk is the last one or not before the data starts. * If the first bit is a 1, you will read another byte, otherwise the data starts * next. The first 3 bits in the first byte specifies the type of data..." * * @param handle $file File handle to read * @param int $offset Offset of the object header * * @return array Containing the type and the size * @see http://book.git-scm.com/7_the_packfile.html */ private function _readHeader($file, $offset) { // Read the object header byte-by-byte fseek($file, $offset); $byte = ord(fgetc($file)); /** * Bit-shift right by four, then ignore the first bit with a bitwise AND * This gives us the object type in binary: * 001 commit self::OBJ_COMMIT * 010 tree self::OBJ_TREE * 011 blob self::OBJ_BLOB * 100 tag self::OBJ_TAG * 110 offset delta self::OBJ_OFS_DELTA * 111 ref delta self::OBJ_REF_DELTA * * (000 is undefined, 101 is not currently in use) * See http://book.git-scm.com/7_the_packfile.html for details */ $type = ($byte >> 4) & 0x07; // Read the last four bits of the first byte, used to find the size $size = $byte & 0x0F; /** * $shift initially set to four, since we use the last four bits of the first * byte * * $byte & 0x80 checks the initial bit is set to 1 (i.e. keep reading data) * * Finally, $shift is incremented by seven for each consecutive byte (because * we ignore the initial bit) */ for ($shift = 4; $byte & 0x80; $shift += 7) { $byte = ord(fgetc($file)); /** * The size is ANDed against 0x7F to strip the initial bit, then * bitshifted by left $shift (4 or 7, depending on whether it's the * initial byte) and ORed against the existing binary $size. This * continuously increments the $size variable. */ $size |= (($byte & 0x7F) << $shift); } return array($type, $size); } /** * Unpacks a deltified object located at $offset in $file * * @param handle $file File handle to read * @param int $offset Offset of the object data * @param int $object_offset Offset of the object data, past the header * @param int $type The object type, either OBJ_REF_DELTA or OBJ_OFS_DELTA * @param int $size The expected size of the uncompressed data * * @return array Containing the type, size and object data */ private function _unpackDeltified($file, $offset, $object_offset, $type, $size) { fseek($file, $object_offset); if ($type == self::OBJ_REF_DELTA) { $base_sha = bin2hex(fread($file, 20)); $path = substr($this->_packfile, 0, strpos($this->_packfile, '.git')+5); $base = Raw::factory($path, $base_sha); $type = $base->type(); $base = $base->content(); $delta = gzuncompress( fread($file, $size + self::OBJ_PADDING), $size ); $content = $this->_applyDelta($base, $delta); } elseif ($type == self::OBJ_OFS_DELTA) { // 20 = maximum varint size according to Glip $data = fread($file, $size + self::OBJ_PADDING + 20); list($base_offset, $length) = $this->_bigEndianNumber($data); $delta = gzuncompress(substr($data, $length), $size); unset($data); $base_offset = $offset - $base_offset; list($type, $size, $base) = $this->_readPackedObject($base_offset); $content = $this->_applyDelta($base, $delta); } else { throw new UnexpectedValueException( "Unknown type $type for deltified object" ); } return array($type, strlen($content), $content); } /** * Applies the $delta byte-sequence to $base and returns the * resultant binary string. * * This code is modified from Grit (see below), the Ruby * implementation used for GitHub under an MIT license. * * @param string $base The base string for the delta to be applied to * @param string $delta The delta string to apply * * @return string The patched binary string * @see * https://github.com/mojombo/grit/blob/master/lib/grit/git-ruby/internal/pack.rb */ private function _applyDelta($base, $delta) { $pos = 0; $src_size = $this->_varint($delta, $pos); $dst_size = $this->_varint($delta, $pos); if ($src_size !== strlen($base)) { throw new UnexpectedValueException( 'Expected base delta size ' . strlen($base) . ' does not match the expected ' . "value $src_size" ); } $dest = ""; while ($pos < strlen($delta)) { $byte = ord($delta{$pos++}); if ($byte & 0x80) { /* copy a part of $base */ $offset = 0; if ($byte & 0x01) $offset = ord($delta{$pos++}); if ($byte & 0x02) $offset |= ord($delta{$pos++}) << 8; if ($byte & 0x04) $offset |= ord($delta{$pos++}) << 16; if ($byte & 0x08) $offset |= ord($delta{$pos++}) << 24; $length = 0; if ($byte & 0x10) $length = ord($delta{$pos++}); if ($byte & 0x20) $length |= ord($delta{$pos++}) << 8; if ($byte & 0x40) $length |= ord($delta{$pos++}) << 16; if ($length == 0) $length = 0x10000; $dest .= substr($base, $offset, $length); } else { /* take the next $byte bytes as they are */ $dest .= substr($delta, $pos, $byte); $pos += $byte; } } if (strlen($dest) !== $dst_size) { throw new UnexpectedValueException( "Deltified string expected to be $dst_size bytes, but actually " . strlen($dest) . ' bytes' ); } return $dest; } /** * Parse a Git varint (variable-length integer). Used in the `_applyDelta()` * method to read the delta header. * * @param string $string The string to parse * @param int &$pos The position in the string to read from * * @return int The integer value */ private function _varint($string, &$pos = 0) { $varint = 0; $bitmask = 0x80; for ($i = 0; $bitmask & 0x80; $i += 7) { $bitmask = ord($string{$pos++}); $varint |= (($bitmask & 0x7F) << $i); } return $varint; } /** * Decodes a big endian modified base 128 number (refer to @see tag); this only * appears to be used in one place, the offset delta in packfiles. The offset * is the number of bytes to seek back from the start of the delta object to find * the base object. * * This code has been implemented using the C code given in the @see tag below. * * @param string &$data The data to read from and decode the number * * @return Array Containing the base offset (number of bytes to seek back) and * the length to use when reading the delta * @see http://git.rsbx.net/Documents/Git_Data_Formats.txt */ private function _bigEndianNumber(&$data) { $i = 0; $byte = ord($data{$i++}); $number = $byte & 0x7F; while ($byte & 0x80) { $byte = ord($data{$i++}); $number = (($number + 1) << 7) | ($byte & 0x7F); } return array($number, $i); } }