From 7508f8be7ab52690cb9f94f4d21490a7c00b36df Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 8 Jun 2025 13:26:23 +0200 Subject: [PATCH 1/2] Improve performance of unpack() with nameless repetitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can avoid creating temporary strings, and then reparsing them into numbers with zend_symtable_update() by using zend_hash_index_update() directly. For the following benchmark on an i7-4790: ```php $file = str_repeat('A', 100000); for ($i=0;$i<100;$i++) unpack('C*',$file); ``` I get: ``` Benchmark 1: ./sapi/cli/php y.php Time (mean ± σ): 85.8 ms ± 1.8 ms [User: 74.5 ms, System: 10.4 ms] Range (min … max): 83.8 ms … 92.4 ms 33 runs Benchmark 2: ./sapi/cli/php_old y.php Time (mean ± σ): 318.3 ms ± 2.7 ms [User: 306.7 ms, System: 9.9 ms] Range (min … max): 314.9 ms … 321.6 ms 10 runs Summary ./sapi/cli/php y.php ran 3.71 ± 0.08 times faster than ./sapi/cli/php_old y.php ``` On an i7-1185G7 I get: ``` Benchmark 1: ./sapi/cli/php test.php Time (mean ± σ): 60.1 ms ± 0.7 ms [User: 47.8 ms, System: 12.0 ms] Range (min … max): 59.2 ms … 63.8 ms 48 runs Warning: Statistical outliers were detected. Consider re-running this benchmark on a quiet system without any interferences from other programs. It might help to use the '--warmup' or '--prepare' options. Benchmark 2: ./sapi/cli/php_old test.php Time (mean ± σ): 220.8 ms ± 2.2 ms [User: 209.6 ms, System: 10.7 ms] Range (min … max): 218.5 ms … 224.5 ms 13 runs Summary ./sapi/cli/php test.php ran 3.67 ± 0.06 times faster than ./sapi/cli/php_old test.php ``` --- UPGRADING | 2 ++ ext/standard/pack.c | 34 +++++++++++++++++----------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/UPGRADING b/UPGRADING index 0a639508ef696..44450084c5bf4 100644 --- a/UPGRADING +++ b/UPGRADING @@ -567,6 +567,8 @@ MBString: . Improved performance of array functions with callbacks (array_find, array_filter, array_map, usort, ...). . Improved performance of urlencode() and rawurlencode(). + . Improved unpack() performance with nameless repetitions by avoiding + creating temporary strings and reparsing them. - XMLReader: . Improved property access performance. diff --git a/ext/standard/pack.c b/ext/standard/pack.c index ec30be436741d..06d936fc3a9a4 100644 --- a/ext/standard/pack.c +++ b/ext/standard/pack.c @@ -885,12 +885,15 @@ PHP_FUNCTION(unpack) if ((inputpos + size) <= inputlen) { zend_string* real_name; + zend_long long_key = 0; zval val; if (repetitions == 1 && namelen > 0) { /* Use a part of the formatarg argument directly as the name. */ real_name = zend_string_init_fast(name, namelen); - + } else if (namelen == 0) { + real_name = NULL; + long_key = i + 1; } else { /* Need to add the 1-based element number to the name */ char buf[MAX_LENGTH_OF_LONG + 1]; @@ -912,7 +915,6 @@ PHP_FUNCTION(unpack) size = len; ZVAL_STRINGL(&val, &input[inputpos], len); - zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } case 'A': { @@ -939,7 +941,6 @@ PHP_FUNCTION(unpack) } ZVAL_STRINGL(&val, &input[inputpos], len + 1); - zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } /* New option added for Z to remain in-line with the Perl implementation */ @@ -964,7 +965,6 @@ PHP_FUNCTION(unpack) len = s; ZVAL_STRINGL(&val, &input[inputpos], len); - zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1016,7 +1016,6 @@ PHP_FUNCTION(unpack) ZSTR_VAL(buf)[len] = '\0'; ZVAL_STR(&val, buf); - zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1026,7 +1025,6 @@ PHP_FUNCTION(unpack) zend_long v = (type == 'c') ? (int8_t) x : x; ZVAL_LONG(&val, v); - zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1046,7 +1044,6 @@ PHP_FUNCTION(unpack) } ZVAL_LONG(&val, v); - zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1062,7 +1059,6 @@ PHP_FUNCTION(unpack) } ZVAL_LONG(&val, v); - zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1082,8 +1078,6 @@ PHP_FUNCTION(unpack) } ZVAL_LONG(&val, v); - zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); - break; } @@ -1104,7 +1098,6 @@ PHP_FUNCTION(unpack) } ZVAL_LONG(&val, v); - zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } #endif @@ -1124,7 +1117,6 @@ PHP_FUNCTION(unpack) } ZVAL_DOUBLE(&val, v); - zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1143,13 +1135,12 @@ PHP_FUNCTION(unpack) } ZVAL_DOUBLE(&val, v); - zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } case 'x': /* Do nothing with input, just skip it */ - break; + goto no_output; case 'X': if (inputpos < size) { @@ -1160,7 +1151,7 @@ PHP_FUNCTION(unpack) php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type); } } - break; + goto no_output; case '@': if (repetitions <= inputlen) { @@ -1170,10 +1161,19 @@ PHP_FUNCTION(unpack) } i = repetitions - 1; /* Done, break out of for loop */ - break; + goto no_output; } - zend_string_release(real_name); + if (real_name) { + zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); + } else { + zend_hash_index_update(Z_ARRVAL_P(return_value), long_key, &val); + } + +no_output: + if (real_name) { + zend_string_release_ex(real_name, false); + } inputpos += size; if (inputpos < 0) { From 366e527e3402d36e030c77d153c7e2929c033d7c Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 9 Jun 2025 11:21:41 +0200 Subject: [PATCH 2/2] fix null deref --- ext/standard/pack.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ext/standard/pack.c b/ext/standard/pack.c index 06d936fc3a9a4..f37804622d826 100644 --- a/ext/standard/pack.c +++ b/ext/standard/pack.c @@ -979,7 +979,9 @@ PHP_FUNCTION(unpack) if (size > INT_MAX / 2) { - zend_string_release(real_name); + if (real_name) { + zend_string_release_ex(real_name, false); + } zend_argument_value_error(1, "repeater must be less than or equal to %d", INT_MAX / 2); RETURN_THROWS(); }