Context Navigation

sub.c@ a69cb9a

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since a69cb9a was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

completed double-precision support
added quadruple-precision support
added SPARC quadruple-precision wrappers
added doxygen comments
corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

Property mode set to 100644

File size: 10.5 KB

Rev	Line
[12c6f2d]	1	/*
[df4ed85]	2	* Copyright (c) 2005 Josef Cejka
[c67aff2]	3	* Copyright (c) 2011 Petr Koupy
[12c6f2d]	4	* All rights reserved.
	5	*
	6	* Redistribution and use in source and binary forms, with or without
	7	* modification, are permitted provided that the following conditions
	8	* are met:
	9	*
	10	* - Redistributions of source code must retain the above copyright
	11	* notice, this list of conditions and the following disclaimer.
	12	* - Redistributions in binary form must reproduce the above copyright
	13	* notice, this list of conditions and the following disclaimer in the
	14	* documentation and/or other materials provided with the distribution.
	15	* - The name of the author may not be used to endorse or promote products
	16	* derived from this software without specific prior written permission.
	17	*
	18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	28	*/
	29
[750636a]	30	/** @addtogroup softfloat
[846848a6]	31	* @{
	32	*/
[c67aff2]	33	/** @file Substraction functions.
[846848a6]	34	*/
	35
[750636a]	36	#include <sftypes.h>
	37	#include <sub.h>
	38	#include <comparison.h>
[c67aff2]	39	#include <common.h>
[12c6f2d]	40
[c67aff2]	41	/**
	42	* Subtract two single-precision floats with the same signs.
	43	*
	44	* @param a First input operand.
	45	* @param b Second input operand.
	46	* @return Result of substraction.
[12c6f2d]	47	*/
	48	float32 subFloat32(float32 a, float32 b)
	49	{
	50	int expdiff;
[aa59fa0]	51	uint32_t exp1, exp2, frac1, frac2;
[12c6f2d]	52	float32 result;
	53
	54	result.f = 0;
	55
[a96c570]	56	expdiff = a.parts.exp - b.parts.exp;
[1266543]	57	if ((expdiff < 0 ) \|\| ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) {
[12c6f2d]	58	if (isFloat32NaN(b)) {
[1266543]	59	/* TODO: fix SigNaN */
[12c6f2d]	60	if (isFloat32SigNaN(b)) {
[c67aff2]	61	}
[12c6f2d]	62	return b;
[c67aff2]	63	}
[12c6f2d]	64
[a96c570]	65	if (b.parts.exp == FLOAT32_MAX_EXPONENT) {
	66	b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */
[12c6f2d]	67	return b;
	68	}
	69
	70	result.parts.sign = !a.parts.sign;
	71
[1266543]	72	frac1 = b.parts.fraction;
[a96c570]	73	exp1 = b.parts.exp;
[1266543]	74	frac2 = a.parts.fraction;
[a96c570]	75	exp2 = a.parts.exp;
	76	expdiff *= -1;
[12c6f2d]	77	} else {
	78	if (isFloat32NaN(a)) {
[1266543]	79	/* TODO: fix SigNaN */
[a96c570]	80	if (isFloat32SigNaN(a) \|\| isFloat32SigNaN(b)) {
[c67aff2]	81	}
[12c6f2d]	82	return a;
[c67aff2]	83	}
[12c6f2d]	84
[a96c570]	85	if (a.parts.exp == FLOAT32_MAX_EXPONENT) {
	86	if (b.parts.exp == FLOAT32_MAX_EXPONENT) {
[12c6f2d]	87	/* inf - inf => nan */
[1266543]	88	/* TODO: fix exception */
[12c6f2d]	89	result.binary = FLOAT32_NAN;
	90	return result;
[c67aff2]	91	}
[12c6f2d]	92	return a;
	93	}
	94
	95	result.parts.sign = a.parts.sign;
	96
[1266543]	97	frac1 = a.parts.fraction;
[a96c570]	98	exp1 = a.parts.exp;
[1266543]	99	frac2 = b.parts.fraction;
[a96c570]	100	exp2 = b.parts.exp;
[c67aff2]	101	}
[12c6f2d]	102
[a96c570]	103	if (exp1 == 0) {
[1266543]	104	/* both are denormalized */
[c67aff2]	105	result.parts.fraction = frac1 - frac2;
[1266543]	106	if (result.parts.fraction > frac1) {
	107	/* TODO: underflow exception */
[12c6f2d]	108	return result;
[c67aff2]	109	}
[a96c570]	110	result.parts.exp = 0;
[12c6f2d]	111	return result;
[c67aff2]	112	}
[a96c570]	113
	114	/* add hidden bit */
[1266543]	115	frac1 \|= FLOAT32_HIDDEN_BIT_MASK;
[a96c570]	116
	117	if (exp2 == 0) {
	118	/* denormalized */
	119	--expdiff;
	120	} else {
	121	/* normalized */
[1266543]	122	frac2 \|= FLOAT32_HIDDEN_BIT_MASK;
[c67aff2]	123	}
[a96c570]	124
	125	/* create some space for rounding */
[1266543]	126	frac1 <<= 6;
	127	frac2 <<= 6;
[a96c570]	128
[1266543]	129	if (expdiff > FLOAT32_FRACTION_SIZE + 1) {
[c67aff2]	130	goto done;
	131	}
[12c6f2d]	132
[1266543]	133	frac1 = frac1 - (frac2 >> expdiff);
[c67aff2]	134
[a96c570]	135	done:
[1266543]	136	/* TODO: find first nonzero digit and shift result and detect possibly underflow */
	137	while ((exp1 > 0) && (!(frac1 & (FLOAT32_HIDDEN_BIT_MASK << 6 )))) {
[a96c570]	138	--exp1;
[1266543]	139	frac1 <<= 1;
[c67aff2]	140	/* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
	141	}
[12c6f2d]	142
[1266543]	143	/* rounding - if first bit after fraction is set then round up */
	144	frac1 += 0x20;
[a96c570]	145
[1266543]	146	if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7)) {
[a96c570]	147	++exp1;
[1266543]	148	frac1 >>= 1;
[c67aff2]	149	}
[a96c570]	150
[c67aff2]	151	/* Clear hidden bit and shift */
[1266543]	152	result.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK));
[a96c570]	153	result.parts.exp = exp1;
	154
	155	return result;
	156	}
	157
[c67aff2]	158	/**
	159	* Subtract two double-precision floats with the same signs.
	160	*
	161	* @param a First input operand.
	162	* @param b Second input operand.
	163	* @return Result of substraction.
[a96c570]	164	*/
	165	float64 subFloat64(float64 a, float64 b)
	166	{
	167	int expdiff;
[aa59fa0]	168	uint32_t exp1, exp2;
	169	uint64_t frac1, frac2;
[a96c570]	170	float64 result;
	171
	172	result.d = 0;
	173
	174	expdiff = a.parts.exp - b.parts.exp;
[1266543]	175	if ((expdiff < 0 ) \|\| ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) {
[a96c570]	176	if (isFloat64NaN(b)) {
[1266543]	177	/* TODO: fix SigNaN */
[a96c570]	178	if (isFloat64SigNaN(b)) {
[c67aff2]	179	}
[a96c570]	180	return b;
[c67aff2]	181	}
[a96c570]	182
	183	if (b.parts.exp == FLOAT64_MAX_EXPONENT) {
	184	b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */
	185	return b;
	186	}
	187
	188	result.parts.sign = !a.parts.sign;
	189
[1266543]	190	frac1 = b.parts.fraction;
[a96c570]	191	exp1 = b.parts.exp;
[1266543]	192	frac2 = a.parts.fraction;
[a96c570]	193	exp2 = a.parts.exp;
	194	expdiff *= -1;
	195	} else {
	196	if (isFloat64NaN(a)) {
[1266543]	197	/* TODO: fix SigNaN */
[a96c570]	198	if (isFloat64SigNaN(a) \|\| isFloat64SigNaN(b)) {
[c67aff2]	199	}
[a96c570]	200	return a;
[c67aff2]	201	}
[a96c570]	202
	203	if (a.parts.exp == FLOAT64_MAX_EXPONENT) {
	204	if (b.parts.exp == FLOAT64_MAX_EXPONENT) {
	205	/* inf - inf => nan */
[1266543]	206	/* TODO: fix exception */
[a96c570]	207	result.binary = FLOAT64_NAN;
	208	return result;
[c67aff2]	209	}
[a96c570]	210	return a;
	211	}
	212
	213	result.parts.sign = a.parts.sign;
	214
[1266543]	215	frac1 = a.parts.fraction;
[a96c570]	216	exp1 = a.parts.exp;
[1266543]	217	frac2 = b.parts.fraction;
[a96c570]	218	exp2 = b.parts.exp;
[c67aff2]	219	}
[12c6f2d]	220
[a96c570]	221	if (exp1 == 0) {
[1266543]	222	/* both are denormalized */
	223	result.parts.fraction = frac1 - frac2;
	224	if (result.parts.fraction > frac1) {
	225	/* TODO: underflow exception */
[a96c570]	226	return result;
[c67aff2]	227	}
[a96c570]	228	result.parts.exp = 0;
	229	return result;
[c67aff2]	230	}
[a96c570]	231
	232	/* add hidden bit */
[1266543]	233	frac1 \|= FLOAT64_HIDDEN_BIT_MASK;
[12c6f2d]	234
[a96c570]	235	if (exp2 == 0) {
	236	/* denormalized */
[12c6f2d]	237	--expdiff;
	238	} else {
[a96c570]	239	/* normalized */
[1266543]	240	frac2 \|= FLOAT64_HIDDEN_BIT_MASK;
[c67aff2]	241	}
[12c6f2d]	242
[a96c570]	243	/* create some space for rounding */
[1266543]	244	frac1 <<= 6;
	245	frac2 <<= 6;
[a96c570]	246
[1266543]	247	if (expdiff > FLOAT64_FRACTION_SIZE + 1) {
[c67aff2]	248	goto done;
	249	}
[12c6f2d]	250
[1266543]	251	frac1 = frac1 - (frac2 >> expdiff);
[c67aff2]	252
[12c6f2d]	253	done:
[1266543]	254	/* TODO: find first nonzero digit and shift result and detect possibly underflow */
	255	while ((exp1 > 0) && (!(frac1 & (FLOAT64_HIDDEN_BIT_MASK << 6 )))) {
[a96c570]	256	--exp1;
[1266543]	257	frac1 <<= 1;
[c67aff2]	258	/* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
	259	}
[12c6f2d]	260
[1266543]	261	/* rounding - if first bit after fraction is set then round up */
	262	frac1 += 0x20;
[12c6f2d]	263
[1266543]	264	if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7)) {
[12c6f2d]	265	++exp1;
[1266543]	266	frac1 >>= 1;
[c67aff2]	267	}
[12c6f2d]	268
[c67aff2]	269	/* Clear hidden bit and shift */
[1266543]	270	result.parts.fraction = ((frac1 >> 6) & (~FLOAT64_HIDDEN_BIT_MASK));
[12c6f2d]	271	result.parts.exp = exp1;
	272
	273	return result;
[a96c570]	274	}
[12c6f2d]	275
[c67aff2]	276	/**
	277	* Subtract two quadruple-precision floats with the same signs.
	278	*
	279	* @param a First input operand.
	280	* @param b Second input operand.
	281	* @return Result of substraction.
	282	*/
	283	float128 subFloat128(float128 a, float128 b)
	284	{
	285	int expdiff;
	286	uint32_t exp1, exp2;
	287	uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo;
	288	float128 result;
	289
	290	result.binary.hi = 0;
	291	result.binary.lo = 0;
	292
	293	expdiff = a.parts.exp - b.parts.exp;
	294	if ((expdiff < 0 ) \|\| ((expdiff == 0) &&
	295	lt128(a.parts.frac_hi, a.parts.frac_lo, b.parts.frac_hi, b.parts.frac_lo))) {
	296	if (isFloat128NaN(b)) {
	297	/* TODO: fix SigNaN */
	298	if (isFloat128SigNaN(b)) {
	299	}
	300	return b;
	301	}
	302
	303	if (b.parts.exp == FLOAT128_MAX_EXPONENT) {
	304	b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */
	305	return b;
	306	}
	307
	308	result.parts.sign = !a.parts.sign;
	309
	310	frac1_hi = b.parts.frac_hi;
	311	frac1_lo = b.parts.frac_lo;
	312	exp1 = b.parts.exp;
	313	frac2_hi = a.parts.frac_hi;
	314	frac2_lo = a.parts.frac_lo;
	315	exp2 = a.parts.exp;
	316	expdiff *= -1;
	317	} else {
	318	if (isFloat128NaN(a)) {
	319	/* TODO: fix SigNaN */
	320	if (isFloat128SigNaN(a) \|\| isFloat128SigNaN(b)) {
	321	}
	322	return a;
	323	}
	324
	325	if (a.parts.exp == FLOAT128_MAX_EXPONENT) {
	326	if (b.parts.exp == FLOAT128_MAX_EXPONENT) {
	327	/* inf - inf => nan */
	328	/* TODO: fix exception */
	329	result.binary.hi = FLOAT128_NAN_HI;
	330	result.binary.lo = FLOAT128_NAN_LO;
	331	return result;
	332	}
	333	return a;
	334	}
	335
	336	result.parts.sign = a.parts.sign;
	337
	338	frac1_hi = a.parts.frac_hi;
	339	frac1_lo = a.parts.frac_lo;
	340	exp1 = a.parts.exp;
	341	frac2_hi = b.parts.frac_hi;
	342	frac2_lo = b.parts.frac_lo;
	343	exp2 = b.parts.exp;
	344	}
	345
	346	if (exp1 == 0) {
	347	/* both are denormalized */
	348	sub128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &tmp_hi, &tmp_lo);
	349	result.parts.frac_hi = tmp_hi;
	350	result.parts.frac_lo = tmp_lo;
	351	if (lt128(frac1_hi, frac1_lo, result.parts.frac_hi, result.parts.frac_lo)) {
	352	/* TODO: underflow exception */
	353	return result;
	354	}
	355	result.parts.exp = 0;
	356	return result;
	357	}
	358
	359	/* add hidden bit */
	360	or128(frac1_hi, frac1_lo,
	361	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	362	&frac1_hi, &frac1_lo);
	363
	364	if (exp2 == 0) {
	365	/* denormalized */
	366	--expdiff;
	367	} else {
	368	/* normalized */
	369	or128(frac2_hi, frac2_lo,
	370	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	371	&frac2_hi, &frac2_lo);
	372	}
	373
	374	/* create some space for rounding */
	375	lshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
	376	lshift128(frac2_hi, frac2_lo, 6, &frac2_hi, &frac2_lo);
	377
	378	if (expdiff > FLOAT128_FRACTION_SIZE + 1) {
	379	goto done;
	380	}
	381
	382	rshift128(frac2_hi, frac2_lo, expdiff, &tmp_hi, &tmp_lo);
	383	sub128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &frac1_hi, &frac1_lo);
	384
	385	done:
	386	/* TODO: find first nonzero digit and shift result and detect possibly underflow */
	387	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6,
	388	&tmp_hi, &tmp_lo);
	389	and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
	390	while ((exp1 > 0) && (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
	391	--exp1;
	392	lshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
	393	/* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
	394
	395	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6,
	396	&tmp_hi, &tmp_lo);
	397	and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
	398	}
	399
	400	/* rounding - if first bit after fraction is set then round up */
	401	add128(frac1_hi, frac1_lo, 0x0ll, 0x20ll, &frac1_hi, &frac1_lo);
	402
	403	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7,
	404	&tmp_hi, &tmp_lo);
	405	and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
	406	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
	407	++exp1;
	408	rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
	409	}
	410
	411	/* Clear hidden bit and shift */
	412	rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
	413	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	414	&tmp_hi, &tmp_lo);
	415	and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
	416	result.parts.frac_hi = tmp_hi;
	417	result.parts.frac_lo = tmp_lo;
	418
	419	result.parts.exp = exp1;
	420
	421	return result;
	422	}
	423
[750636a]	424	/** @}
[846848a6]	425	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/softfloat/generic/sub.c@ a69cb9a

Download in other formats: