1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
|
<?php
/**
* Packed - provides a 'packed object' object
*
* PHP version 5.3
*
* @category Git
* @package Granite
* @author Craig Roberts <craig0990@googlemail.com>
* @license http://www.opensource.org/licenses/mit-license.php MIT Expat License
* @link http://craig0990.github.com/Granite/
*/
namespace Granite\Git\Object;
use \UnexpectedValueException as UnexpectedValueException;
/**
* Packed represents a packed object in the Git repository
*
* @category Git
* @package Granite
* @author Craig Roberts <craig0990@googlemail.com>
* @license http://www.opensource.org/licenses/mit-license.php MIT Expat License
* @link http://craig0990.github.com/Granite/
*/
class Packed extends Raw
{
/**
* The name of the packfile being read
*/
private $_packfile;
/**
* Added to the object size to make a 'best-guess' effort at how much compressed
* data to read - should be reimplemented, ideally with streams.
*/
const OBJ_PADDING = 512;
/**
* Reads the object data from the compressed data at $offset in $packfile
*
* @param string $packfile The path to the packfile
* @param int $offset The offset of the object data
*/
public function __construct($packfile, $offset)
{
$this->_packfile = $packfile;
list($this->type, $this->size, $this->content)
= $this->_readPackedObject($offset);
}
/**
* Reads the object data at $this->_offset
*
* @param int $offset Offset of the object header
*
* @return array Containing the type, size and object data
*/
private function _readPackedObject($offset)
{
$file = fopen($this->_packfile, 'rb');
fseek($file, $offset);
// Read the type and uncompressed size from the object header
list($type, $size) = $this->_readHeader($file, $offset);
$object_offset = ftell($file);
if ($type == self::OBJ_OFS_DELTA || $type == self::OBJ_REF_DELTA) {
return $this->_unpackDeltified(
$file, $offset, $object_offset, $type, $size
);
}
$content = gzuncompress(fread($file, $size + self::OBJ_PADDING), $size);
return array($type, $size, $content);
}
/**
* Reads a packed object header, returning the type and the size. For more
* detailed information, refer to the @see tag.
*
* From the @see tag: "Each byte is really 7 bits of data, with the first bit
* being used to say if that hunk is the last one or not before the data starts.
* If the first bit is a 1, you will read another byte, otherwise the data starts
* next. The first 3 bits in the first byte specifies the type of data..."
*
* @param handle $file File handle to read
* @param int $offset Offset of the object header
*
* @return array Containing the type and the size
* @see http://book.git-scm.com/7_the_packfile.html
*/
private function _readHeader($file, $offset)
{
// Read the object header byte-by-byte
fseek($file, $offset);
$byte = ord(fgetc($file));
/**
* Bit-shift right by four, then ignore the first bit with a bitwise AND
* This gives us the object type in binary:
* 001 commit self::OBJ_COMMIT
* 010 tree self::OBJ_TREE
* 011 blob self::OBJ_BLOB
* 100 tag self::OBJ_TAG
* 110 offset delta self::OBJ_OFS_DELTA
* 111 ref delta self::OBJ_REF_DELTA
*
* (000 is undefined, 101 is not currently in use)
* See http://book.git-scm.com/7_the_packfile.html for details
*/
$type = ($byte >> 4) & 0x07;
// Read the last four bits of the first byte, used to find the size
$size = $byte & 0x0F;
/**
* $shift initially set to four, since we use the last four bits of the first
* byte
*
* $byte & 0x80 checks the initial bit is set to 1 (i.e. keep reading data)
*
* Finally, $shift is incremented by seven for each consecutive byte (because
* we ignore the initial bit)
*/
for ($shift = 4; $byte & 0x80; $shift += 7) {
$byte = ord(fgetc($file));
/**
* The size is ANDed against 0x7F to strip the initial bit, then
* bitshifted by left $shift (4 or 7, depending on whether it's the
* initial byte) and ORed against the existing binary $size. This
* continuously increments the $size variable.
*/
$size |= (($byte & 0x7F) << $shift);
}
return array($type, $size);
}
/**
* Unpacks a deltified object located at $offset in $file
*
* @param handle $file File handle to read
* @param int $offset Offset of the object data
* @param int $object_offset Offset of the object data, past the header
* @param int $type The object type, either OBJ_REF_DELTA
or OBJ_OFS_DELTA
* @param int $size The expected size of the uncompressed data
*
* @return array Containing the type, size and object data
*/
private function _unpackDeltified($file, $offset, $object_offset, $type, $size)
{
fseek($file, $object_offset);
if ($type == self::OBJ_REF_DELTA) {
$base_sha = bin2hex(fread($file, 20));
$path = substr($this->_packfile, 0, strpos($this->_packfile, '.git')+5);
$base = Raw::factory($path, $base_sha);
$type = $base->type();
$base = $base->content();
$delta = gzuncompress(
fread($file, $size + self::OBJ_PADDING), $size
);
$content = $this->_applyDelta($base, $delta);
} elseif ($type == self::OBJ_OFS_DELTA) {
// 20 = maximum varint size according to Glip
$data = fread($file, $size + self::OBJ_PADDING + 20);
list($base_offset, $length) = $this->_bigEndianNumber($data);
$delta = gzuncompress(substr($data, $length), $size);
unset($data);
$base_offset = $offset - $base_offset;
list($type, $size, $base) = $this->_readPackedObject($base_offset);
$content = $this->_applyDelta($base, $delta);
} else {
throw new UnexpectedValueException(
"Unknown type $type for deltified object"
);
}
return array($type, strlen($content), $content);
}
/**
* Applies the $delta byte-sequence to $base and returns the
* resultant binary string.
*
* This code is modified from Grit (see below), the Ruby
* implementation used for GitHub under an MIT license.
*
* @param string $base The base string for the delta to be applied to
* @param string $delta The delta string to apply
*
* @return string The patched binary string
* @see
* https://github.com/mojombo/grit/blob/master/lib/grit/git-ruby/internal/pack.rb
*/
private function _applyDelta($base, $delta)
{
$pos = 0;
$src_size = $this->_varint($delta, $pos);
$dst_size = $this->_varint($delta, $pos);
if ($src_size !== strlen($base)) {
throw new UnexpectedValueException(
'Expected base delta size ' . strlen($base) . ' does not match the expected '
. "value $src_size"
);
}
$dest = "";
while ($pos < strlen($delta)) {
$byte = ord($delta{$pos++});
if ($byte & 0x80) {
/* copy a part of $base */
$offset = 0;
if ($byte & 0x01) $offset = ord($delta{$pos++});
if ($byte & 0x02) $offset |= ord($delta{$pos++}) << 8;
if ($byte & 0x04) $offset |= ord($delta{$pos++}) << 16;
if ($byte & 0x08) $offset |= ord($delta{$pos++}) << 24;
$length = 0;
if ($byte & 0x10) $length = ord($delta{$pos++});
if ($byte & 0x20) $length |= ord($delta{$pos++}) << 8;
if ($byte & 0x40) $length |= ord($delta{$pos++}) << 16;
if ($length == 0) $length = 0x10000;
$dest .= substr($base, $offset, $length);
} else {
/* take the next $byte bytes as they are */
$dest .= substr($delta, $pos, $byte);
$pos += $byte;
}
}
if (strlen($dest) !== $dst_size) {
throw new UnexpectedValueException(
"Deltified string expected to be $dst_size bytes, but actually "
. strlen($dest) . ' bytes'
);
}
return $dest;
}
/**
* Parse a Git varint (variable-length integer). Used in the `_applyDelta()`
* method to read the delta header.
*
* @param string $string The string to parse
* @param int &$pos The position in the string to read from
*
* @return int The integer value
*/
private function _varint($string, &$pos = 0)
{
$varint = 0;
$bitmask = 0x80;
for ($i = 0; $bitmask & 0x80; $i += 7) {
$bitmask = ord($string{$pos++});
$varint |= (($bitmask & 0x7F) << $i);
}
return $varint;
}
/**
* Decodes a big endian modified base 128 number (refer to @see tag); this only
* appears to be used in one place, the offset delta in packfiles. The offset
* is the number of bytes to seek back from the start of the delta object to find
* the base object.
*
* This code has been implemented using the C code given in the @see tag below.
*
* @param string &$data The data to read from and decode the number
*
* @return Array Containing the base offset (number of bytes to seek back) and
* the length to use when reading the delta
* @see http://git.rsbx.net/Documents/Git_Data_Formats.txt
*/
private function _bigEndianNumber(&$data)
{
$i = 0;
$byte = ord($data{$i++});
$number = $byte & 0x7F;
while ($byte & 0x80) {
$byte = ord($data{$i++});
$number = (($number + 1) << 7) | ($byte & 0x7F);
}
return array($number, $i);
}
}
|