Skip to content

Commit d9d4eaa

Browse files
authored
Merge pull request #37 from zozlak/master
A bunch of enhancements for dealing with large tar archives
2 parents 460c205 + 7b1936c commit d9d4eaa

File tree

2 files changed

+172
-14
lines changed

2 files changed

+172
-14
lines changed

src/Tar.php

Lines changed: 121 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616
class Tar extends Archive
1717
{
18+
const READ_CHUNK_SIZE = 1048576; // 1MB
1819

1920
protected $file = '';
2021
protected $comptype = Archive::COMPRESS_AUTO;
@@ -23,6 +24,9 @@ class Tar extends Archive
2324
protected $memory = '';
2425
protected $closed = true;
2526
protected $writeaccess = false;
27+
protected $position = 0;
28+
protected $contentUntil = 0;
29+
protected $skipUntil = 0;
2630

2731
/**
2832
* Sets the compression to use
@@ -72,6 +76,7 @@ public function open($file)
7276
throw new ArchiveIOException('Could not open file for reading: '.$this->file);
7377
}
7478
$this->closed = false;
79+
$this->position = 0;
7580
}
7681

7782
/**
@@ -118,12 +123,37 @@ public function yieldContents()
118123
continue;
119124
}
120125

121-
$this->skipbytes(ceil($header['size'] / 512) * 512);
126+
$this->contentUntil = $this->position + $header['size'];
127+
$this->skipUntil = $this->position + ceil($header['size'] / 512) * 512;
128+
122129
yield $this->header2fileinfo($header);
130+
131+
$skip = $this->skipUntil - $this->position;
132+
if ($skip > 0) {
133+
$this->skipbytes($skip);
134+
}
123135
}
124136

125137
$this->close();
138+
}
126139

140+
/**
141+
* Reads content of a current archive entry.
142+
*
143+
* Works only when iterating trough the archive using the generator returned
144+
* by the yieldContents().
145+
*
146+
* @param int $length maximum number of bytes to read
147+
*
148+
* @return string
149+
*/
150+
public function readCurrentEntry($length = PHP_INT_MAX)
151+
{
152+
$length = (int) min($length, $this->contentUntil - $this->position);
153+
if ($length === 0) {
154+
return '';
155+
}
156+
return $this->readbytes($length);
127157
}
128158

129159
/**
@@ -290,16 +320,27 @@ public function addFile($file, $fileinfo = '')
290320
throw new ArchiveIOException('Could not open file for reading: ' . $file);
291321
}
292322
while (!feof($fp)) {
293-
$data = fread($fp, 512);
294-
$read += strlen($data);
323+
// for performance reasons read bigger chunks at once
324+
$data = fread($fp, self::READ_CHUNK_SIZE);
295325
if ($data === false) {
296326
break;
297327
}
298328
if ($data === '') {
299329
break;
300330
}
301-
$packed = pack("a512", $data);
302-
$this->writebytes($packed);
331+
$dataLen = strlen($data);
332+
$read += $dataLen;
333+
// how much of data read fully fills 512-byte blocks?
334+
$passLen = ($dataLen >> 9) << 9;
335+
if ($passLen === $dataLen) {
336+
// all - just write the data
337+
$this->writebytes($data);
338+
} else {
339+
// directly write what fills 512-byte blocks fully
340+
$this->writebytes(substr($data, 0, $passLen));
341+
// pad the reminder to 512 bytes
342+
$this->writebytes(pack("a512", substr($data, $passLen)));
343+
}
303344
}
304345
fclose($fp);
305346

@@ -335,8 +376,11 @@ public function addData($fileinfo, $data)
335376
$fileinfo->setSize($len);
336377
$this->writeFileHeader($fileinfo);
337378

338-
for ($s = 0; $s < $len; $s += 512) {
339-
$this->writebytes(pack("a512", substr($data, $s, 512)));
379+
// write directly everything but the last block which needs padding
380+
$passLen = ($len >> 9) << 9;
381+
$this->writebytes(substr($data, 0, $passLen));
382+
if ($passLen < $len) {
383+
$this->writebytes(pack("a512", substr($data, $passLen, 512)));
340384
}
341385

342386
if (is_callable($this->callback)) {
@@ -439,12 +483,14 @@ public function save($file)
439483
protected function readbytes($length)
440484
{
441485
if ($this->comptype === Archive::COMPRESS_GZIP) {
442-
return @gzread($this->fh, $length);
486+
$ret = @gzread($this->fh, $length);
443487
} elseif ($this->comptype === Archive::COMPRESS_BZIP) {
444-
return @bzread($this->fh, $length);
488+
$ret = @bzread($this->fh, $length);
445489
} else {
446-
return @fread($this->fh, $length);
490+
$ret = @fread($this->fh, $length);
447491
}
492+
$this->position += strlen($ret);
493+
return $ret;
448494
}
449495

450496
/**
@@ -494,6 +540,7 @@ protected function skipbytes($bytes)
494540
} else {
495541
@fseek($this->fh, $bytes, SEEK_CUR);
496542
}
543+
$this->position += $bytes;
497544
}
498545

499546
/**
@@ -553,8 +600,8 @@ protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $
553600
$uid = sprintf("%6s ", decoct($uid));
554601
$gid = sprintf("%6s ", decoct($gid));
555602
$perm = sprintf("%6s ", decoct($perm));
556-
$size = sprintf("%11s ", decoct($size));
557-
$mtime = sprintf("%11s", decoct($mtime));
603+
$size = self::numberEncode($size, 12);
604+
$mtime = self::numberEncode($size, 12);
558605

559606
$data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime);
560607
$data_last = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, "");
@@ -614,8 +661,8 @@ protected function parseHeader($block)
614661
$return['perm'] = OctDec(trim($header['perm']));
615662
$return['uid'] = OctDec(trim($header['uid']));
616663
$return['gid'] = OctDec(trim($header['gid']));
617-
$return['size'] = OctDec(trim($header['size']));
618-
$return['mtime'] = OctDec(trim($header['mtime']));
664+
$return['size'] = self::numberDecode($header['size']);
665+
$return['mtime'] = self::numberDecode($header['mtime']);
619666
$return['typeflag'] = $header['typeflag'];
620667
$return['link'] = trim($header['link']);
621668
$return['uname'] = trim($header['uname']);
@@ -713,4 +760,64 @@ public function filetype($file)
713760
return Archive::COMPRESS_NONE;
714761
}
715762

763+
/**
764+
* Decodes numeric values according to the
765+
* https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions
766+
* (basically with support for big numbers)
767+
*
768+
* @param string $field
769+
* $return int
770+
*/
771+
static public function numberDecode($field)
772+
{
773+
$firstByte = ord(substr($field, 0, 1));
774+
if ($firstByte === 255) {
775+
$value = -1 << (8 * strlen($field));
776+
$shift = 0;
777+
for ($i = strlen($field) - 1; $i >= 0; $i--) {
778+
$value += ord(substr($field, $i, 1)) << $shift;
779+
$shift += 8;
780+
}
781+
} elseif ($firstByte === 128) {
782+
$value = 0;
783+
$shift = 0;
784+
for ($i = strlen($field) - 1; $i > 0; $i--) {
785+
$value += ord(substr($field, $i, 1)) << $shift;
786+
$shift += 8;
787+
}
788+
} else {
789+
$value = octdec(trim($field));
790+
}
791+
return $value;
792+
}
793+
794+
/**
795+
* Encodes numeric values according to the
796+
* https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions
797+
* (basically with support for big numbers)
798+
*
799+
* @param int $value
800+
* @param int $length field length
801+
* @return string
802+
*/
803+
static public function numberEncode($value, $length)
804+
{
805+
// old implementations leave last byte empty
806+
// octal encoding encodes three bits per byte
807+
$maxValue = 1 << (($length - 1) * 3);
808+
if ($value < 0) {
809+
// PHP already stores integers as 2's complement
810+
$value = pack(PHP_INT_SIZE === 8 ? 'J' : 'N', (int) $value);
811+
$encoded = str_repeat(chr(255), max(1, $length - PHP_INT_SIZE));
812+
$encoded .= substr($value, max(0, PHP_INT_SIZE - $length + 1));
813+
} elseif ($value >= $maxValue) {
814+
$value = pack(PHP_INT_SIZE === 8 ? 'J' : 'N', (int) $value);
815+
$encoded = chr(128) . str_repeat(chr(0), max(0, $length - PHP_INT_SIZE - 1));
816+
$encoded .= substr($value, max(0, PHP_INT_SIZE - $length + 1));
817+
} else {
818+
$encoded = sprintf("%" . ($length - 1) . "s ", decoct($value));
819+
}
820+
return $encoded;
821+
}
716822
}
823+

tests/TarTestCase.php

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,57 @@ public function testSaveWithInvalidDestinationFile()
778778
$this->assertTrue(true); // succeed if no exception, yet
779779
}
780780

781+
public function testNumberEncodeDecode()
782+
{
783+
// 2^34 + 17 = 2^2 * 2^32 + 17
784+
$refValue = (1 << 34) + 17;
785+
$encoded = Tar::numberEncode($refValue, 12);
786+
$this->assertEquals(pack('CCnNN', 128, 0, 0, 1 << 2, 17), $encoded);
787+
$decoded = Tar::numberDecode($encoded);
788+
$this->assertEquals($refValue, $decoded);
789+
790+
$encoded = Tar::numberEncode($refValue, 7);
791+
$this->assertEquals(pack('CnN', 128, 1 << 2, 17), $encoded);
792+
$decoded = Tar::numberDecode($encoded);
793+
$this->assertEquals($refValue, $decoded);
794+
795+
$refValue = -1234;
796+
$encoded = Tar::numberEncode($refValue, 12);
797+
$this->assertEquals(pack('CCnNN', 0xFF, 0xFF, 0xFFFF, 0xFFFFFFFF, -1234), $encoded);
798+
$decoded = Tar::numberDecode($encoded);
799+
$this->assertEquals($refValue, $decoded);
800+
801+
$encoded = Tar::numberEncode($refValue, 3);
802+
$this->assertEquals(pack('Cn', 0xFF, -1234), $encoded);
803+
$decoded = Tar::numberDecode($encoded);
804+
$this->assertEquals($refValue, $decoded);
805+
}
806+
807+
public function testReadCurrentEntry()
808+
{
809+
$tar = new Tar();
810+
$tar->open(__DIR__ . '/tar/test.tar');
811+
$out = sys_get_temp_dir() . '/dwtartest' . md5(time());
812+
$tar->extract($out);
813+
814+
$tar = new Tar();
815+
$tar->open(__DIR__ . '/tar/test.tar');
816+
$pathsRead = array();
817+
foreach($tar->yieldContents() as $i) {
818+
$this->assertFileExists($out . '/' . $i->getPath());
819+
if ($i->getIsdir()) {
820+
$this->assertEquals('', $tar->readCurrentEntry());
821+
} else {
822+
$this->assertStringEqualsFile($out . '/' . $i->getPath(), $tar->readCurrentEntry());
823+
}
824+
$pathsRead[] = $i->getPath();
825+
}
826+
$pathsReadRef = array('tar', 'tar/testdata1.txt', 'tar/foobar', 'tar/foobar/testdata2.txt');
827+
$this->assertEquals($pathsReadRef, $pathsRead);
828+
829+
self::RDelete($out);
830+
}
831+
781832
/**
782833
* recursive rmdir()/unlink()
783834
*

0 commit comments

Comments
 (0)