Context Navigation

source: mainline/uspace/lib/math/generic/sqrt.c@ ca113cf

Visit:

Last change on this file since ca113cf was ca113cf, checked in by Maurizio Lombardi <mlombard@…>, 4 years ago
math: sync sqrt() to FreeBSD 11.2
Property mode set to `100644`
File size: 14.0 KB

Rev	Line
[048a6e9]	1	/*
	2	* ====================================================
	3	* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
	4	*
	5	* Developed at SunSoft, a Sun Microsystems, Inc. business.
	6	* Permission to use, copy, modify, and distribute this
	7	* software is freely granted, provided that this notice
	8	* is preserved.
	9	* ====================================================
	10	*/
	11
	12	/** @addtogroup libmath
	13	* @{
	14	*/
	15	/** @file sqrt mathematical function
	16	*/
	17
	18
	19	/* __ieee754_sqrt(x)
	20	* Return correctly rounded sqrt.
	21	* ------------------------------------------
	22	* \| Use the hardware sqrt if you have one \|
	23	* ------------------------------------------
	24	* Method:
	25	* Bit by bit method using integer arithmetic. (Slow, but portable)
	26	* 1. Normalization
	27	* Scale x to y in [1,4) with even powers of 2:
	28	* find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
	29	* sqrt(x) = 2^k * sqrt(y)
	30	* 2. Bit by bit computation
	31	* Let q = sqrt(y) truncated to i bit after binary point (q = 1),
	32	* i 0
	33	* i+1 2
	34	* s = 2q , and y = 2 ( y - q ). (1)
	35	* i i i i
	36	*
	37	* To compute q from q , one checks whether
	38	* i+1 i
	39	*
	40	* -(i+1) 2
	41	* (q + 2 ) <= y. (2)
	42	* i
	43	* -(i+1)
	44	* If (2) is false, then q = q ; otherwise q = q + 2 .
	45	* i+1 i i+1 i
	46	*
	47	* With some algebric manipulation, it is not difficult to see
	48	* that (2) is equivalent to
	49	* -(i+1)
	50	* s + 2 <= y (3)
	51	* i i
	52	*
	53	* The advantage of (3) is that s and y can be computed by
	54	* i i
	55	* the following recurrence formula:
	56	* if (3) is false
	57	*
	58	* s = s , y = y ; (4)
	59	* i+1 i i+1 i
	60	*
	61	* otherwise,
	62	* -i -(i+1)
	63	* s = s + 2 , y = y - s - 2 (5)
	64	* i+1 i i+1 i i
	65	*
	66	* One may easily use induction to prove (4) and (5).
	67	* Note. Since the left hand side of (3) contain only i+2 bits,
	68	* it does not necessary to do a full (53-bit) comparison
	69	* in (3).
	70	* 3. Final rounding
	71	* After generating the 53 bits result, we compute one more bit.
	72	* Together with the remainder, we can decide whether the
	73	* result is exact, bigger than 1/2ulp, or less than 1/2ulp
	74	* (it will never equal to 1/2ulp).
	75	* The rounding mode can be detected by checking whether
	76	* huge + tiny is equal to huge, and whether huge - tiny is
	77	* equal to huge for some floating point number "huge" and "tiny".
	78	*
	79	* Special cases:
	80	* sqrt(+-0) = +-0 ... exact
	81	* sqrt(inf) = inf
	82	* sqrt(-ve) = NaN ... with invalid signal
	83	* sqrt(NaN) = NaN ... with invalid signal for signaling NaN
	84	*
	85	* Other methods : see the appended file at the end of the program below.
	86	*---------------
	87	*/
	88
	89	#include <math.h>
	90	#include <stdint.h>
	91
	92	#include "internal.h"
	93
[ca113cf]	94	static const double one = 1.0, tiny=1.0e-300;
[048a6e9]	95
	96	double sqrt(double x)
	97	{
	98	double z;
	99	int32_t sign = (int)0x80000000;
	100	int32_t ix0,s0,q,m,t,i;
	101	uint32_t r,t1,s1,ix1,q1;
	102
[ca113cf]	103	EXTRACT_WORDS(ix0,ix1,x);
[048a6e9]	104
[ca113cf]	105	/* take care of Inf and NaN */
	106	if((ix0&0x7ff00000)==0x7ff00000) {
	107	return xx+x; / sqrt(NaN)=NaN, sqrt(+inf)=+inf
	108	sqrt(-inf)=sNaN */
	109	}
	110	/* take care of zero */
	111	if(ix0<=0) {
	112	if(((ix0&(~sign))\|ix1)==0) return x;/* sqrt(+-0) = +-0 */
	113	else if(ix0<0)
	114	return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
[048a6e9]	115	}
[ca113cf]	116	/* normalize x */
	117	m = (ix0>>20);
	118	if(m==0) { /* subnormal x */
	119	while(ix0==0) {
	120	m -= 21;
	121	ix0 \|= (ix1>>11); ix1 <<= 21;
	122	}
	123	for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
	124	m -= i-1;
	125	ix0 \|= (ix1>>(32-i));
	126	ix1 <<= i;
[048a6e9]	127	}
[ca113cf]	128	m -= 1023; /* unbias exponent */
[048a6e9]	129	ix0 = (ix0&0x000fffff)\|0x00100000;
[ca113cf]	130	if(m&1){ /* odd m, double x to make it even */
	131	ix0 += ix0 + ((ix1&sign)>>31);
	132	ix1 += ix1;
[048a6e9]	133	}
[ca113cf]	134	m >>= 1; /* m = [m/2] */
[048a6e9]	135
[ca113cf]	136	/* generate sqrt(x) bit by bit */
[048a6e9]	137	ix0 += ix0 + ((ix1&sign)>>31);
	138	ix1 += ix1;
[ca113cf]	139	q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
	140	r = 0x00200000; /* r = moving bit from right to left */
[048a6e9]	141
[ca113cf]	142	while(r!=0) {
	143	t = s0+r;
	144	if(t<=ix0) {
	145	s0 = t+r;
	146	ix0 -= t;
	147	q += r;
	148	}
	149	ix0 += ix0 + ((ix1&sign)>>31);
	150	ix1 += ix1;
	151	r>>=1;
[048a6e9]	152	}
	153
	154	r = sign;
[ca113cf]	155	while(r!=0) {
	156	t1 = s1+r;
	157	t = s0;
	158	if((t<ix0)\|\|((t==ix0)&&(t1<=ix1))) {
	159	s1 = t1+r;
	160	if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
	161	ix0 -= t;
	162	if (ix1 < t1) ix0 -= 1;
	163	ix1 -= t1;
	164	q1 += r;
	165	}
	166	ix0 += ix0 + ((ix1&sign)>>31);
	167	ix1 += ix1;
	168	r>>=1;
[048a6e9]	169	}
	170
[ca113cf]	171	/* use floating add to find out rounding direction */
	172	if((ix0\|ix1)!=0) {
	173	z = one-tiny; /* trigger inexact flag */
	174	if (z>=one) {
	175	z = one+tiny;
	176	if (q1==(uint32_t)0xffffffff) { q1=0; q += 1;}
	177	else if (z>one) {
	178	if (q1==(uint32_t)0xfffffffe) q+=1;
	179	q1+=2;
	180	} else
	181	q1 += (q1&1);
	182	}
[048a6e9]	183	}
[ca113cf]	184	ix0 = (q>>1)+0x3fe00000;
	185	ix1 = q1>>1;
	186	if ((q&1)==1) ix1 \|= sign;
	187	ix0 += (m <<20);
	188	INSERT_WORDS(z,ix0,ix1);
[048a6e9]	189	return z;
	190	}
	191
	192	/*
	193	Other methods (use floating-point arithmetic)
	194	-------------
	195	(This is a copy of a drafted paper by Prof W. Kahan
	196	and K.C. Ng, written in May, 1986)
	197	Two algorithms are given here to implement sqrt(x)
	198	(IEEE double precision arithmetic) in software.
	199	Both supply sqrt(x) correctly rounded. The first algorithm (in
	200	Section A) uses newton iterations and involves four divisions.
	201	The second one uses reciproot iterations to avoid division, but
	202	requires more multiplications. Both algorithms need the ability
	203	to chop results of arithmetic operations instead of round them,
	204	and the INEXACT flag to indicate when an arithmetic operation
	205	is executed exactly with no roundoff error, all part of the
	206	standard (IEEE 754-1985). The ability to perform shift, add,
	207	subtract and logical AND operations upon 32-bit words is needed
	208	too, though not part of the standard.
	209	A. sqrt(x) by Newton Iteration
	210	(1) Initial approximation
	211	Let x0 and x1 be the leading and the trailing 32-bit words of
	212	a floating point number x (in IEEE double format) respectively
	213	1 11 52 ...widths
	214	------------------------------------------------------
	215	x: \|s\| e \| f \|
	216	------------------------------------------------------
	217	msb lsb msb lsb ...order
	218
	219	------------------------ ------------------------
	220	x0: \|s\| e \| f1 \| x1: \| f2 \|
	221	------------------------ ------------------------
	222	By performing shifts and subtracts on x0 and x1 (both regarded
	223	as integers), we obtain an 8-bit approximation of sqrt(x) as
	224	follows.
	225	k := (x0>>1) + 0x1ff80000;
	226	y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits
	227	Here k is a 32-bit integer and T1[] is an integer array containing
	228	correction terms. Now magically the floating value of y (y's
	229	leading 32-bit word is y0, the value of its trailing word is 0)
	230	approximates sqrt(x) to almost 8-bit.
	231	Value of T1:
	232	static int T1[32]= {
	233	0, 1024, 3062, 5746, 9193, 13348, 18162, 23592,
	234	29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215,
	235	83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581,
	236	16499, 12183, 8588, 5674, 3403, 1742, 661, 130,};
	237	(2) Iterative refinement
	238	Apply Heron's rule three times to y, we have y approximates
	239	sqrt(x) to within 1 ulp (Unit in the Last Place):
	240	y := (y+x/y)/2 ... almost 17 sig. bits
	241	y := (y+x/y)/2 ... almost 35 sig. bits
	242	y := y-(y-x/y)/2 ... within 1 ulp
	243	Remark 1.
	244	Another way to improve y to within 1 ulp is:
	245	y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x)
	246	y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x)
	247	2
	248	(x-y )*y
	249	y := y + 2* ---------- ...within 1 ulp
	250	2
	251	3y + x
	252	This formula has one division fewer than the one above; however,
	253	it requires more multiplications and additions. Also x must be
	254	scaled in advance to avoid spurious overflow in evaluating the
	255	expression 3y*y+x. Hence it is not recommended uless division
	256	is slow. If division is very slow, then one should use the
	257	reciproot algorithm given in section B.
	258	(3) Final adjustment
	259	By twiddling y's last bit it is possible to force y to be
	260	correctly rounded according to the prevailing rounding mode
	261	as follows. Let r and i be copies of the rounding mode and
	262	inexact flag before entering the square root program. Also we
	263	use the expression y+-ulp for the next representable floating
	264	numbers (up and down) of y. Note that y+-ulp = either fixed
	265	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
	266	mode.
	267	I := FALSE; ... reset INEXACT flag I
	268	R := RZ; ... set rounding mode to round-toward-zero
	269	z := x/y; ... chopped quotient, possibly inexact
	270	If(not I) then { ... if the quotient is exact
	271	if(z=y) {
	272	I := i; ... restore inexact flag
	273	R := r; ... restore rounded mode
	274	return sqrt(x):=y.
	275	} else {
	276	z := z - ulp; ... special rounding
	277	}
	278	}
	279	i := TRUE; ... sqrt(x) is inexact
	280	If (r=RN) then z=z+ulp ... rounded-to-nearest
	281	If (r=RP) then { ... round-toward-+inf
	282	y = y+ulp; z=z+ulp;
	283	}
	284	y := y+z; ... chopped sum
	285	y0:=y0-0x00100000; ... y := y/2 is correctly rounded.
	286	I := i; ... restore inexact flag
	287	R := r; ... restore rounded mode
	288	return sqrt(x):=y.
	289
	290	(4) Special cases
	291	Square root of +inf, +-0, or NaN is itself;
	292	Square root of a negative number is NaN with invalid signal.
	293	B. sqrt(x) by Reciproot Iteration
	294	(1) Initial approximation
	295	Let x0 and x1 be the leading and the trailing 32-bit words of
	296	a floating point number x (in IEEE double format) respectively
	297	(see section A). By performing shifs and subtracts on x0 and y0,
	298	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
	299	k := 0x5fe80000 - (x0>>1);
	300	y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits
	301	Here k is a 32-bit integer and T2[] is an integer array
	302	containing correction terms. Now magically the floating
	303	value of y (y's leading 32-bit word is y0, the value of
	304	its trailing word y1 is set to zero) approximates 1/sqrt(x)
	305	to almost 7.8-bit.
	306	Value of T2:
	307	static int T2[64]= {
	308	0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
	309	0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
	310	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
	311	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
	312	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
	313	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
	314	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
	315	0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
	316	(2) Iterative refinement
	317	Apply Reciproot iteration three times to y and multiply the
	318	result by x to get an approximation z that matches sqrt(x)
	319	to about 1 ulp. To be exact, we will have
	320	-1ulp < sqrt(x)-z<1.0625ulp.
	321
	322	... set rounding mode to Round-to-nearest
	323	y := y(1.5-0.5xyy) ... almost 15 sig. bits to 1/sqrt(x)
	324	y := y((1.5-2^-30)+0.5xyy)... about 29 sig. bits to 1/sqrt(x)
	325	... special arrangement for better accuracy
	326	z := xy ... 29 bits to sqrt(x), with zy<1
	327	z := z + 0.5z(1-z*y) ... about 1 ulp to sqrt(x)
	328	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
	329	(a) the term z*y in the final iteration is always less than 1;
	330	(b) the error in the final result is biased upward so that
	331	-1 ulp < sqrt(x) - z < 1.0625 ulp
	332	instead of \|sqrt(x)-z\|<1.03125ulp.
	333	(3) Final adjustment
	334	By twiddling y's last bit it is possible to force y to be
	335	correctly rounded according to the prevailing rounding mode
	336	as follows. Let r and i be copies of the rounding mode and
	337	inexact flag before entering the square root program. Also we
	338	use the expression y+-ulp for the next representable floating
	339	numbers (up and down) of y. Note that y+-ulp = either fixed
	340	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
	341	mode.
	342	R := RZ; ... set rounding mode to round-toward-zero
	343	switch(r) {
	344	case RN: ... round-to-nearest
	345	if(x<= z*(z-ulp)...chopped) z = z - ulp; else
	346	if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
	347	break;
	348	case RZ:case RM: ... round-to-zero or round-to--inf
	349	R:=RP; ... reset rounding mod to round-to-+inf
	350	if(x<z*z ... rounded up) z = z - ulp; else
	351	if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
	352	break;
	353	case RP: ... round-to-+inf
	354	if(x>(z+ulp)(z+ulp)...chopped) z = z+2ulp; else
	355	if(x>z*z ...chopped) z = z+ulp;
	356	break;
	357	}
	358	Remark 3. The above comparisons can be done in fixed point. For
	359	example, to compare x and w=z*z chopped, it suffices to compare
	360	x1 and w1 (the trailing parts of x and w), regarding them as
	361	two's complement integers.
	362	...Is z an exact square root?
	363	To determine whether z is an exact square root of x, let z1 be the
	364	trailing part of z, and also let x0 and x1 be the leading and
	365	trailing parts of x.
	366	If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
	367	I := 1; ... Raise Inexact flag: z is not exact
	368	else {
	369	j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2
	370	k := z1 >> 26; ... get z's 25-th and 26-th
	371	fraction bits
	372	I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
	373	}
	374	R:= r ... restore rounded mode
	375	return sqrt(x):=z.
	376	If multiplication is cheaper then the foregoing red tape, the
	377	Inexact flag can be evaluated by
	378	I := i;
	379	I := (z*z!=x) or I.
	380	Note that z*z can overwrite I; this value must be sensed if it is
	381	True.
	382	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
	383	zero.
	384	--------------------
	385	z1: \| f2 \|
	386	--------------------
	387	bit 31 bit 0
	388	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
	389	or even of logb(x) have the following relations:
	390	-------------------------------------------------
	391	bit 27,26 of z1 bit 1,0 of x1 logb(x)
	392	-------------------------------------------------
	393	00 00 odd and even
	394	01 01 even
	395	10 10 odd
	396	10 00 even
	397	11 01 even
	398	-------------------------------------------------
	399	(4) Special cases (see (4) of Section A).
	400	*/
	401
	402	/** @}
	403	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: