php · SakiTakamachi · Jun 5, 2025 · Jun 5, 2025 · Jun 7, 2025 · Jun 8, 2025
@@ -429,3 +429,20 @@ bool bc_divide(bc_num numerator, bc_num divisor, bc_num *quot, size_t scale)
 	*quot = bc_copy_num(BCG(_zero_));
 	return true;
 }
+
+bool bc_divide_vector(
+	BC_VECTOR *numerator_vectors, size_t numerator_arr_size,
+	const BC_VECTOR *divisor_vectors, size_t divisor_arr_size, size_t divisor_size,
+	BC_VECTOR *quot_vectors, size_t quot_arr_size
+) {
+	ZEND_ASSERT(divisor_vectors[divisor_arr_size - 1] != 0);
+	ZEND_ASSERT(quot_arr_size == numerator_arr_size - divisor_arr_size + 1);
+
+	/* Do the division */
+	if (divisor_arr_size == 1) {
+		bc_fast_div(numerator_vectors, numerator_arr_size, divisor_vectors[0], quot_vectors, quot_arr_size);
+	} else {
+		bc_standard_div(numerator_vectors, numerator_arr_size, divisor_vectors, divisor_arr_size, divisor_size, quot_vectors, quot_arr_size);
+	}
+	return true;
+}
@@ -87,6 +87,10 @@ bc_num _bc_do_sub (bc_num n1, bc_num n2);
 void bc_multiply_vector(
 	const BC_VECTOR *n1_vector, size_t n1_arr_size, const BC_VECTOR *n2_vector, size_t n2_arr_size,
 	BC_VECTOR *prod_vector, size_t prod_arr_size);
+bool bc_divide_vector(
+	BC_VECTOR *numerator_vectors, size_t numerator_arr_size,
+	const BC_VECTOR *divisor_vectors, size_t divisor_arr_size, size_t divisor_size,
+	BC_VECTOR *quot_vectors, size_t quot_arr_size);
 void _bc_rm_leading_zeros (bc_num num);
 
 #endif
@@ -30,28 +30,282 @@
 *************************************************************************/
 
 #include "bcmath.h"
+#include "convert.h"
 #include <stdbool.h>
 #include <stddef.h>
+#include "private.h"
 
 /* Take the square root NUM and return it in NUM with SCALE digits
    after the decimal place. */
 
+static inline BC_VECTOR bc_sqrt_get_pow_10(size_t exponent)
+{
+	BC_VECTOR value = 1;
+	while (exponent >= 8) {
+		value *= BC_POW_10_LUT[8];
+		exponent -= 8;
+	}
+	value *= BC_POW_10_LUT[exponent];
+	return value;
+}
+
+static BC_VECTOR bc_fast_sqrt_vector(BC_VECTOR n_vector)
+{
+	/* Use a bitwise method for approximating the square root
+	 * as the initial guess for Newton's method. */
+	union {
+		uint64_t i;
+		double d;
+	} u;
+	u.d = (double) n_vector;
+	u.i = (1ULL << 61) + (u.i >> 1) - (1ULL << 50);
+	BC_VECTOR guess_vector = (BC_VECTOR) u.d;
+
+	/* Newton's algorithm. Iterative expression is `x_{n+1} = (x_n + a / x_n) / 2` */
+	BC_VECTOR guess1_vector;
+	size_t diff;
+	do {
+		guess1_vector = guess_vector;
+		guess_vector = (guess1_vector + n_vector / guess1_vector) / 2; /* Iterative expression */
+		diff = guess1_vector > guess_vector ? guess1_vector - guess_vector : guess_vector - guess1_vector;
+	} while (diff > 1);
+	return guess_vector;
+}
+
+static inline void bc_fast_sqrt(bc_num *num, size_t rscale)
+{
+	BC_VECTOR n_vector = 0;
+	size_t i = 0;
+	for (; i < (*num)->n_len + (*num)->n_scale; i++) {
+		n_vector = n_vector * BASE + (*num)->n_value[i];
+	}
+	/* When calculating the square root of a number using only integer operations,
+	 * need to adjust the digit scale accordingly.
+	 * Considering that the original number is the square of the result,
+	 * if the desired scale of the result is 5, the input number should be scaled
+	 * by twice that, i.e., scale 10. */
+	n_vector *= bc_sqrt_get_pow_10((rscale + 1) * 2 - (*num)->n_scale);
+
+	/* Get sqrt */
+	BC_VECTOR guess_vector = bc_fast_sqrt_vector(n_vector);
+
+	size_t full_len = 0;
+	BC_VECTOR tmp_guess_vector = guess_vector;
+	do {
+		tmp_guess_vector /= BASE;
+		full_len++;
+	} while (tmp_guess_vector > 0);
+
+	size_t ret_ren = full_len > rscale + 1 ? full_len - (rscale + 1) : 1; /* for int zero */
+	bc_num ret = bc_new_num_nonzeroed(ret_ren, rscale);
+	char *rptr = ret->n_value;
+	char *rend = rptr + ret_ren + rscale - 1;
+
+	guess_vector /= BASE; /* Since the scale of guess_vector is rscale + 1, reduce the scale by 1. */
+	while (rend >= rptr) {
+		*rend-- = guess_vector % BASE;
+		guess_vector /= BASE;
+	}
+	bc_free_num(num);
+	*num = ret;
+}
+
+static inline void bc_standard_sqrt(bc_num *num, size_t rscale, size_t num_calc_full_len)
+{
+	/* allocate memory */
+	size_t n_arr_size = BC_ARR_SIZE_FROM_LEN(num_calc_full_len);
+
+	size_t guess_len = ((*num)->n_len + 1) / 2;
+	size_t guess_scale = rscale + 1;
+	size_t guess_full_len = guess_len + guess_scale;
+	/* Since add the old guess and the new guess together during the calculation,
+	 * there is a chance of overflow, so allocate an extra size. */
+	size_t guess_arr_size = BC_ARR_SIZE_FROM_LEN(guess_full_len) + 1;
+
+	size_t allocate_size = n_arr_size * 2 + guess_arr_size * 3;
+	BC_VECTOR *buf = safe_emalloc(allocate_size, sizeof(BC_VECTOR), 0);
+
+	BC_VECTOR *n_vector = buf;
+	/* In division by successive approximation, the numerator is modified during the computation,
+	 * so it must be copied each time. */
+	BC_VECTOR *n_vector_copy = n_vector + n_arr_size;
+	BC_VECTOR *guess_vector = n_vector_copy + n_arr_size;
+	BC_VECTOR *guess1_vector = guess_vector + guess_arr_size;
+	BC_VECTOR *tmp_div_ret_vector = guess1_vector + guess_arr_size;
+
+	/* convert num to n_vector */
+	size_t n_full_len = (*num)->n_len + (*num)->n_scale;
+	const char *nend = (*num)->n_value + n_full_len - 1;
+	size_t n_extend_zeros = num_calc_full_len - n_full_len;
+
+	bc_convert_to_vector_with_zero_pad(n_vector, nend, n_full_len, n_extend_zeros);
+
+	/* Prepare guess_vector. Use `bc_fast_sqrt_vector()` to quickly obtain a highly accurate initial value. */
+	size_t n_top_len_for_initial_guess = SIZEOF_SIZE_T == 8 ? 18 : 10;
+
+	/* Set the number of digits of num to be used as the initial value for Newton's method.
+	 * Just as the square roots of 1000 and 100 differ significantly, the number of digits
+	 * to "ignore" here must be even. */
+	if (num_calc_full_len & 1) {
+		n_top_len_for_initial_guess--;
+	}
+	BC_VECTOR n_top = n_vector[n_arr_size - 1];
+	size_t n_top_index = n_arr_size - 2;
+	size_t n_top_vector_len = num_calc_full_len % BC_VECTOR_SIZE == 0 ? BC_VECTOR_SIZE : num_calc_full_len % BC_VECTOR_SIZE;
+	size_t count = n_top_len_for_initial_guess - n_top_vector_len;
+	while (count >= BC_VECTOR_SIZE) {
+		n_top *= BC_VECTOR_BOUNDARY_NUM;
+		n_top += n_vector[n_top_index--];
+		count -= BC_VECTOR_SIZE;
+	}
+	if (count > 0) {
+		n_top *= BC_POW_10_LUT[count];
+		n_top += n_vector[n_top_index] / BC_POW_10_LUT[BC_VECTOR_SIZE - count];
+	}
+
+	/* Calculate the initial guess. */
+	BC_VECTOR initial_guess = bc_fast_sqrt_vector(n_top);
+
+	/* Set the obtained initial guess to guess_vector. */
+	size_t initial_guess_len = SIZEOF_SIZE_T == 8 ? 9 : 5;
+	size_t guess_top_vector_len = guess_full_len % BC_VECTOR_SIZE == 0 ? BC_VECTOR_SIZE : guess_full_len % BC_VECTOR_SIZE;
+	size_t guess_len_diff = initial_guess_len - guess_top_vector_len;
+	guess_vector[guess_arr_size - 2] = initial_guess / BC_POW_10_LUT[guess_len_diff];
+	initial_guess %= BC_POW_10_LUT[guess_len_diff];
+	guess_vector[guess_arr_size - 3] = initial_guess * BC_POW_10_LUT[BC_VECTOR_SIZE - guess_len_diff];
+
+	/* Initialize the uninitialized vector with zeros. */
+	for (size_t i = 0; i < guess_arr_size - 3; i++) {
+		guess_vector[i] = 0;
+		guess1_vector[i] = 0;
+	}
+	guess_vector[guess_arr_size - 1] = 0;
+
+	BC_VECTOR two[1] = { 2 };
+
+	/* The precision (number of vectors) used for the calculation.
+	 * Since the initial value uses two vectors, the initial precision is set to 2. */
+	size_t guess_precision = 2;
+	size_t guess_offset = guess_arr_size - 1 - guess_precision;
+	size_t n_offset = guess_offset * 2;
+	size_t n_precision = n_arr_size - n_offset;
+	size_t quot_size = n_precision - (guess_precision) + 1;
+	size_t guess_use_len = guess_top_vector_len + BC_VECTOR_SIZE;
+	bool updated_precision = false;
+
+	/**
+	 * Newton's algorithm. Iterative expression is `x_{n+1} = (x_n + a / x_n) / 2`
+	 * If break down the calculation into detailed steps, it looks like this:
+	 * 1. quot = a / x_n
+	 * 2. add = x_n + quot1
+	 * 3. x_{n+1} = add / 2
+	 * 4. repeat until the difference between the `x_n` and `x_{n+1}` is less than or equal to 1.
+	 */
+	bool done = false;
+	do {
+		if (updated_precision) {
+			guess_offset = guess_arr_size - 1 - guess_precision;
+			n_offset = guess_offset * 2;
+			n_precision = n_arr_size - n_offset;
+			quot_size = n_precision - (guess_precision) + 1;
+			guess_use_len = guess_top_vector_len + (guess_precision - 1) * BC_VECTOR_SIZE;
+			updated_precision = false;
+		}
+
+		/* Since the value changes during division by successive approximation, use a copied version of it. */
+		for (size_t i = n_offset; i < n_arr_size; i++) {
+			n_vector_copy[i] = n_vector[i];
+		}
+
+		/* 1. quot = a / x_n */
+		bool div_ret = bc_divide_vector(
+			n_vector_copy + n_offset, n_precision,
+			guess_vector + guess_offset, guess_precision, guess_use_len,
+			tmp_div_ret_vector + guess_offset, quot_size
+		);
+		ZEND_ASSERT(div_ret);
+
+		BC_VECTOR *tmp_vptr = guess1_vector;
+		guess1_vector = guess_vector;
+		guess_vector = tmp_vptr;
+
+		/* 2. add = x_n + quot1 */
+		int carry = 0;
+		for (size_t i = guess_offset; i < guess_arr_size; i++) {
+			guess_vector[i] = guess1_vector[i] + tmp_div_ret_vector[i] + carry;
+			if (guess_vector[i] >= BC_VECTOR_BOUNDARY_NUM) {
+				guess_vector[i] -= BC_VECTOR_BOUNDARY_NUM;
+				carry = 1;
+			} else {
+				carry = 0;
+			}
+		}
+		guess_vector[guess_arr_size - 1] = carry;
+
+		/* 3. x_{n+1} = add / 2 */
+		div_ret = bc_divide_vector(
+			guess_vector + guess_offset, guess_precision + 1,
+			two, 1, 1,
+			tmp_div_ret_vector + guess_offset, guess_precision + 1
+		);
+		ZEND_ASSERT(div_ret);
+
+		for (size_t i = guess_offset; i < guess_arr_size; i++) {
+			guess_vector[i] = tmp_div_ret_vector[i];
+		}
+
+		/* 4. repeat until the difference between the `x_n` and `x_{n+1}` is less than or equal to 1. */
+		size_t diff = guess_vector[guess_offset] > guess1_vector[guess_offset]
+			? guess_vector[guess_offset] - guess1_vector[guess_offset]
+			: guess1_vector[guess_offset] - guess_vector[guess_offset];
+		if (diff <= 1) {
+			bool is_same = true;
+			for (size_t i = guess_offset + 1; i < guess_arr_size - 1; i++) {
+				if (guess_vector[i] != guess1_vector[i]) {
+					is_same = false;
+					break;
+				}
+			}
+			if (is_same) {
+				if (guess_precision < guess_arr_size - 1) {
+					/* If the precision has not yet reached the maximum number of digits, it will be increased. */
+					guess_precision = MIN(guess_precision * 3, guess_arr_size - 1);
+					updated_precision = true;
+				} else {
+					done = is_same;
+				}
+			}
+		}
+	} while (!done);
+
+	bc_num ret = bc_new_num_nonzeroed(guess_len, guess_scale);
+	char *rptr = ret->n_value;
+	char *rend = rptr + guess_full_len - 1;
+
+	bc_convert_vector_to_char(guess_vector, rptr, rend, guess_arr_size - 1);
+	ret->n_scale = rscale;
+
+	bc_free_num(num);
+	*num = ret;
+
+	efree(buf);
+}
+
 bool bc_sqrt(bc_num *num, size_t scale)
 {
-	const bc_num local_num = *num;
 	/* Initial checks. */
-	if (bc_is_neg(local_num)) {
+	if (bc_is_neg(*num)) {
 		/* Cannot take the square root of a negative number */
 		return false;
 	}
 	/* Square root of 0 is 0 */
-	if (bc_is_zero(local_num)) {
+	if (bc_is_zero(*num)) {
 		bc_free_num (num);
 		*num = bc_copy_num(BCG(_zero_));
 		return true;
 	}
 
-	bcmath_compare_result num_cmp_one = bc_compare(local_num, BCG(_one_), local_num->n_scale);
+	bcmath_compare_result num_cmp_one = bc_compare(*num, BCG(_one_), (*num)->n_scale);
 	/* Square root of 1 is 1 */
 	if (num_cmp_one == BCMATH_EQUAL) {
 		bc_free_num (num);
@@ -60,58 +314,14 @@ bool bc_sqrt(bc_num *num, size_t scale)
 	}
 
 	/* Initialize the variables. */
-	size_t cscale;
-	bc_num guess, guess1, point5, diff;
-	size_t rscale = MAX(scale, local_num->n_scale);
-
-	bc_init_num(&guess1);
-	bc_init_num(&diff);
-	point5 = bc_new_num (1, 1);
-	point5->n_value[1] = 5;
-
-
-	/* Calculate the initial guess. */
-	if (num_cmp_one == BCMATH_RIGHT_GREATER) {
-		/* The number is between 0 and 1.  Guess should start at 1. */
-		guess = bc_copy_num(BCG(_one_));
-		cscale = local_num->n_scale;
-	} else {
-		/* The number is greater than 1.  Guess should start at 10^(exp/2). */
-		bc_init_num(&guess);
-		bc_int2num(&guess, 10);
-
-		bc_int2num(&guess1, local_num->n_len);
-		bc_multiply_ex(guess1, point5, &guess1, 0);
-		guess1->n_scale = 0;
-		bc_raise_bc_exponent(guess, guess1, &guess, 0);
-		bc_free_num (&guess1);
-		cscale = 3;
-	}
+	size_t rscale = MAX(scale, (*num)->n_scale);
+	size_t num_calc_full_len = (*num)->n_len + (rscale + 1) * 2;
 
 	/* Find the square root using Newton's algorithm. */
-	bool done = false;
-	while (!done) {
-		bc_free_num (&guess1);
-		guess1 = bc_copy_num(guess);
-		bc_divide(*num, guess, &guess, cscale);
-		bc_add_ex(guess, guess1, &guess, 0);
-		bc_multiply_ex(guess, point5, &guess, cscale);
-		bc_sub_ex(guess, guess1, &diff, cscale + 1);
-		if (bc_is_near_zero(diff, cscale)) {
-			if (cscale < rscale + 1) {
-				cscale = MIN (cscale * 3, rscale + 1);
-			} else {
-				done = true;
-			}
-		}
+	if (num_calc_full_len < MAX_LENGTH_OF_LONG) {
+		bc_fast_sqrt(num, rscale);
+	} else {
+		bc_standard_sqrt(num, rscale, num_calc_full_len);
 	}
-
-	/* Assign the number and clean up. */
-	bc_free_num (num);
-	bc_divide(guess, BCG(_one_), num, rscale);
-	bc_free_num (&guess);
-	bc_free_num (&guess1);
-	bc_free_num (&point5);
-	bc_free_num (&diff);
 	return true;
 }