Skip to content

[WIP] ext/bcmath: optimized bcsqrt() #18771

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions ext/bcmath/libbcmath/src/div.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,3 +429,20 @@ bool bc_divide(bc_num numerator, bc_num divisor, bc_num *quot, size_t scale)
*quot = bc_copy_num(BCG(_zero_));
return true;
}

bool bc_divide_vector(
BC_VECTOR *numerator_vectors, size_t numerator_arr_size,
const BC_VECTOR *divisor_vectors, size_t divisor_arr_size, size_t divisor_size,
BC_VECTOR *quot_vectors, size_t quot_arr_size
) {
ZEND_ASSERT(divisor_vectors[divisor_arr_size - 1] != 0);
ZEND_ASSERT(quot_arr_size == numerator_arr_size - divisor_arr_size + 1);

/* Do the division */
if (divisor_arr_size == 1) {
bc_fast_div(numerator_vectors, numerator_arr_size, divisor_vectors[0], quot_vectors, quot_arr_size);
} else {
bc_standard_div(numerator_vectors, numerator_arr_size, divisor_vectors, divisor_arr_size, divisor_size, quot_vectors, quot_arr_size);
}
return true;
}
4 changes: 4 additions & 0 deletions ext/bcmath/libbcmath/src/private.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ bc_num _bc_do_sub (bc_num n1, bc_num n2);
void bc_multiply_vector(
const BC_VECTOR *n1_vector, size_t n1_arr_size, const BC_VECTOR *n2_vector, size_t n2_arr_size,
BC_VECTOR *prod_vector, size_t prod_arr_size);
bool bc_divide_vector(
BC_VECTOR *numerator_vectors, size_t numerator_arr_size,
const BC_VECTOR *divisor_vectors, size_t divisor_arr_size, size_t divisor_size,
BC_VECTOR *quot_vectors, size_t quot_arr_size);
void _bc_rm_leading_zeros (bc_num num);

#endif
318 changes: 264 additions & 54 deletions ext/bcmath/libbcmath/src/sqrt.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,282 @@
*************************************************************************/

#include "bcmath.h"
#include "convert.h"
#include <stdbool.h>
#include <stddef.h>
#include "private.h"

/* Take the square root NUM and return it in NUM with SCALE digits
after the decimal place. */

static inline BC_VECTOR bc_sqrt_get_pow_10(size_t exponent)
{
BC_VECTOR value = 1;
while (exponent >= 8) {
value *= BC_POW_10_LUT[8];
exponent -= 8;
}
value *= BC_POW_10_LUT[exponent];
return value;
}

static BC_VECTOR bc_fast_sqrt_vector(BC_VECTOR n_vector)
{
/* Use a bitwise method for approximating the square root
* as the initial guess for Newton's method. */
union {
uint64_t i;
double d;
} u;
u.d = (double) n_vector;
u.i = (1ULL << 61) + (u.i >> 1) - (1ULL << 50);
BC_VECTOR guess_vector = (BC_VECTOR) u.d;

/* Newton's algorithm. Iterative expression is `x_{n+1} = (x_n + a / x_n) / 2` */
BC_VECTOR guess1_vector;
size_t diff;
do {
guess1_vector = guess_vector;
guess_vector = (guess1_vector + n_vector / guess1_vector) / 2; /* Iterative expression */
diff = guess1_vector > guess_vector ? guess1_vector - guess_vector : guess_vector - guess1_vector;
} while (diff > 1);
return guess_vector;
}

static inline void bc_fast_sqrt(bc_num *num, size_t rscale)
{
BC_VECTOR n_vector = 0;
size_t i = 0;
for (; i < (*num)->n_len + (*num)->n_scale; i++) {
n_vector = n_vector * BASE + (*num)->n_value[i];
}
/* When calculating the square root of a number using only integer operations,
* need to adjust the digit scale accordingly.
* Considering that the original number is the square of the result,
* if the desired scale of the result is 5, the input number should be scaled
* by twice that, i.e., scale 10. */
n_vector *= bc_sqrt_get_pow_10((rscale + 1) * 2 - (*num)->n_scale);

/* Get sqrt */
BC_VECTOR guess_vector = bc_fast_sqrt_vector(n_vector);

size_t full_len = 0;
BC_VECTOR tmp_guess_vector = guess_vector;
do {
tmp_guess_vector /= BASE;
full_len++;
} while (tmp_guess_vector > 0);

size_t ret_ren = full_len > rscale + 1 ? full_len - (rscale + 1) : 1; /* for int zero */
bc_num ret = bc_new_num_nonzeroed(ret_ren, rscale);
char *rptr = ret->n_value;
char *rend = rptr + ret_ren + rscale - 1;

guess_vector /= BASE; /* Since the scale of guess_vector is rscale + 1, reduce the scale by 1. */
while (rend >= rptr) {
*rend-- = guess_vector % BASE;
guess_vector /= BASE;
}
bc_free_num(num);
*num = ret;
}

static inline void bc_standard_sqrt(bc_num *num, size_t rscale, size_t num_calc_full_len)
{
/* allocate memory */
size_t n_arr_size = BC_ARR_SIZE_FROM_LEN(num_calc_full_len);

size_t guess_len = ((*num)->n_len + 1) / 2;
size_t guess_scale = rscale + 1;
size_t guess_full_len = guess_len + guess_scale;
/* Since add the old guess and the new guess together during the calculation,
* there is a chance of overflow, so allocate an extra size. */
size_t guess_arr_size = BC_ARR_SIZE_FROM_LEN(guess_full_len) + 1;

size_t allocate_size = n_arr_size * 2 + guess_arr_size * 3;
BC_VECTOR *buf = safe_emalloc(allocate_size, sizeof(BC_VECTOR), 0);

BC_VECTOR *n_vector = buf;
/* In division by successive approximation, the numerator is modified during the computation,
* so it must be copied each time. */
BC_VECTOR *n_vector_copy = n_vector + n_arr_size;
BC_VECTOR *guess_vector = n_vector_copy + n_arr_size;
BC_VECTOR *guess1_vector = guess_vector + guess_arr_size;
BC_VECTOR *tmp_div_ret_vector = guess1_vector + guess_arr_size;

/* convert num to n_vector */
size_t n_full_len = (*num)->n_len + (*num)->n_scale;
const char *nend = (*num)->n_value + n_full_len - 1;
size_t n_extend_zeros = num_calc_full_len - n_full_len;

bc_convert_to_vector_with_zero_pad(n_vector, nend, n_full_len, n_extend_zeros);

/* Prepare guess_vector. Use `bc_fast_sqrt_vector()` to quickly obtain a highly accurate initial value. */
size_t n_top_len_for_initial_guess = SIZEOF_SIZE_T == 8 ? 18 : 10;

/* Set the number of digits of num to be used as the initial value for Newton's method.
* Just as the square roots of 1000 and 100 differ significantly, the number of digits
* to "ignore" here must be even. */
if (num_calc_full_len & 1) {
n_top_len_for_initial_guess--;
}
BC_VECTOR n_top = n_vector[n_arr_size - 1];
size_t n_top_index = n_arr_size - 2;
size_t n_top_vector_len = num_calc_full_len % BC_VECTOR_SIZE == 0 ? BC_VECTOR_SIZE : num_calc_full_len % BC_VECTOR_SIZE;
size_t count = n_top_len_for_initial_guess - n_top_vector_len;
while (count >= BC_VECTOR_SIZE) {
n_top *= BC_VECTOR_BOUNDARY_NUM;
n_top += n_vector[n_top_index--];
count -= BC_VECTOR_SIZE;
}
if (count > 0) {
n_top *= BC_POW_10_LUT[count];
n_top += n_vector[n_top_index] / BC_POW_10_LUT[BC_VECTOR_SIZE - count];
}

/* Calculate the initial guess. */
BC_VECTOR initial_guess = bc_fast_sqrt_vector(n_top);

/* Set the obtained initial guess to guess_vector. */
size_t initial_guess_len = SIZEOF_SIZE_T == 8 ? 9 : 5;
size_t guess_top_vector_len = guess_full_len % BC_VECTOR_SIZE == 0 ? BC_VECTOR_SIZE : guess_full_len % BC_VECTOR_SIZE;
size_t guess_len_diff = initial_guess_len - guess_top_vector_len;
guess_vector[guess_arr_size - 2] = initial_guess / BC_POW_10_LUT[guess_len_diff];
initial_guess %= BC_POW_10_LUT[guess_len_diff];
guess_vector[guess_arr_size - 3] = initial_guess * BC_POW_10_LUT[BC_VECTOR_SIZE - guess_len_diff];

/* Initialize the uninitialized vector with zeros. */
for (size_t i = 0; i < guess_arr_size - 3; i++) {
guess_vector[i] = 0;
guess1_vector[i] = 0;
}
guess_vector[guess_arr_size - 1] = 0;

BC_VECTOR two[1] = { 2 };

/* The precision (number of vectors) used for the calculation.
* Since the initial value uses two vectors, the initial precision is set to 2. */
size_t guess_precision = 2;
size_t guess_offset = guess_arr_size - 1 - guess_precision;
size_t n_offset = guess_offset * 2;
size_t n_precision = n_arr_size - n_offset;
size_t quot_size = n_precision - (guess_precision) + 1;
size_t guess_use_len = guess_top_vector_len + BC_VECTOR_SIZE;
bool updated_precision = false;

/**
* Newton's algorithm. Iterative expression is `x_{n+1} = (x_n + a / x_n) / 2`
* If break down the calculation into detailed steps, it looks like this:
* 1. quot = a / x_n
* 2. add = x_n + quot1
* 3. x_{n+1} = add / 2
* 4. repeat until the difference between the `x_n` and `x_{n+1}` is less than or equal to 1.
*/
bool done = false;
do {
if (updated_precision) {
guess_offset = guess_arr_size - 1 - guess_precision;
n_offset = guess_offset * 2;
n_precision = n_arr_size - n_offset;
quot_size = n_precision - (guess_precision) + 1;
guess_use_len = guess_top_vector_len + (guess_precision - 1) * BC_VECTOR_SIZE;
updated_precision = false;
}

/* Since the value changes during division by successive approximation, use a copied version of it. */
for (size_t i = n_offset; i < n_arr_size; i++) {
n_vector_copy[i] = n_vector[i];
}

/* 1. quot = a / x_n */
bool div_ret = bc_divide_vector(
n_vector_copy + n_offset, n_precision,
guess_vector + guess_offset, guess_precision, guess_use_len,
tmp_div_ret_vector + guess_offset, quot_size
);
ZEND_ASSERT(div_ret);

BC_VECTOR *tmp_vptr = guess1_vector;
guess1_vector = guess_vector;
guess_vector = tmp_vptr;

/* 2. add = x_n + quot1 */
int carry = 0;
for (size_t i = guess_offset; i < guess_arr_size; i++) {
guess_vector[i] = guess1_vector[i] + tmp_div_ret_vector[i] + carry;
if (guess_vector[i] >= BC_VECTOR_BOUNDARY_NUM) {
guess_vector[i] -= BC_VECTOR_BOUNDARY_NUM;
carry = 1;
} else {
carry = 0;
}
}
guess_vector[guess_arr_size - 1] = carry;

/* 3. x_{n+1} = add / 2 */
div_ret = bc_divide_vector(
guess_vector + guess_offset, guess_precision + 1,
two, 1, 1,
tmp_div_ret_vector + guess_offset, guess_precision + 1
);
ZEND_ASSERT(div_ret);

for (size_t i = guess_offset; i < guess_arr_size; i++) {
guess_vector[i] = tmp_div_ret_vector[i];
}

/* 4. repeat until the difference between the `x_n` and `x_{n+1}` is less than or equal to 1. */
size_t diff = guess_vector[guess_offset] > guess1_vector[guess_offset]
? guess_vector[guess_offset] - guess1_vector[guess_offset]
: guess1_vector[guess_offset] - guess_vector[guess_offset];
if (diff <= 1) {
bool is_same = true;
for (size_t i = guess_offset + 1; i < guess_arr_size - 1; i++) {
if (guess_vector[i] != guess1_vector[i]) {
is_same = false;
break;
}
}
if (is_same) {
if (guess_precision < guess_arr_size - 1) {
/* If the precision has not yet reached the maximum number of digits, it will be increased. */
guess_precision = MIN(guess_precision * 3, guess_arr_size - 1);
updated_precision = true;
} else {
done = is_same;
}
}
}
} while (!done);

bc_num ret = bc_new_num_nonzeroed(guess_len, guess_scale);
char *rptr = ret->n_value;
char *rend = rptr + guess_full_len - 1;

bc_convert_vector_to_char(guess_vector, rptr, rend, guess_arr_size - 1);
ret->n_scale = rscale;

bc_free_num(num);
*num = ret;

efree(buf);
}

bool bc_sqrt(bc_num *num, size_t scale)
{
const bc_num local_num = *num;
/* Initial checks. */
if (bc_is_neg(local_num)) {
if (bc_is_neg(*num)) {
/* Cannot take the square root of a negative number */
return false;
}
/* Square root of 0 is 0 */
if (bc_is_zero(local_num)) {
if (bc_is_zero(*num)) {
bc_free_num (num);
*num = bc_copy_num(BCG(_zero_));
return true;
}

bcmath_compare_result num_cmp_one = bc_compare(local_num, BCG(_one_), local_num->n_scale);
bcmath_compare_result num_cmp_one = bc_compare(*num, BCG(_one_), (*num)->n_scale);
/* Square root of 1 is 1 */
if (num_cmp_one == BCMATH_EQUAL) {
bc_free_num (num);
Expand All @@ -60,58 +314,14 @@ bool bc_sqrt(bc_num *num, size_t scale)
}

/* Initialize the variables. */
size_t cscale;
bc_num guess, guess1, point5, diff;
size_t rscale = MAX(scale, local_num->n_scale);

bc_init_num(&guess1);
bc_init_num(&diff);
point5 = bc_new_num (1, 1);
point5->n_value[1] = 5;


/* Calculate the initial guess. */
if (num_cmp_one == BCMATH_RIGHT_GREATER) {
/* The number is between 0 and 1. Guess should start at 1. */
guess = bc_copy_num(BCG(_one_));
cscale = local_num->n_scale;
} else {
/* The number is greater than 1. Guess should start at 10^(exp/2). */
bc_init_num(&guess);
bc_int2num(&guess, 10);

bc_int2num(&guess1, local_num->n_len);
bc_multiply_ex(guess1, point5, &guess1, 0);
guess1->n_scale = 0;
bc_raise_bc_exponent(guess, guess1, &guess, 0);
bc_free_num (&guess1);
cscale = 3;
}
size_t rscale = MAX(scale, (*num)->n_scale);
size_t num_calc_full_len = (*num)->n_len + (rscale + 1) * 2;

/* Find the square root using Newton's algorithm. */
bool done = false;
while (!done) {
bc_free_num (&guess1);
guess1 = bc_copy_num(guess);
bc_divide(*num, guess, &guess, cscale);
bc_add_ex(guess, guess1, &guess, 0);
bc_multiply_ex(guess, point5, &guess, cscale);
bc_sub_ex(guess, guess1, &diff, cscale + 1);
if (bc_is_near_zero(diff, cscale)) {
if (cscale < rscale + 1) {
cscale = MIN (cscale * 3, rscale + 1);
} else {
done = true;
}
}
if (num_calc_full_len < MAX_LENGTH_OF_LONG) {
bc_fast_sqrt(num, rscale);
} else {
bc_standard_sqrt(num, rscale, num_calc_full_len);
}

/* Assign the number and clean up. */
bc_free_num (num);
bc_divide(guess, BCG(_one_), num, rscale);
bc_free_num (&guess);
bc_free_num (&guess1);
bc_free_num (&point5);
bc_free_num (&diff);
return true;
}