Context Navigation

sub.c@ 8bcd727

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 8bcd727 was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

completed double-precision support
added quadruple-precision support
added SPARC quadruple-precision wrappers
added doxygen comments
corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

Property mode set to 100644

File size: 10.5 KB

Line
1	/*
2	* Copyright (c) 2005 Josef Cejka
3	* Copyright (c) 2011 Petr Koupy
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup softfloat
31	* @{
32	*/
33	/** @file Substraction functions.
34	*/
35
36	#include <sftypes.h>
37	#include <sub.h>
38	#include <comparison.h>
39	#include <common.h>
40
41	/**
42	* Subtract two single-precision floats with the same signs.
43	*
44	* @param a First input operand.
45	* @param b Second input operand.
46	* @return Result of substraction.
47	*/
48	float32 subFloat32(float32 a, float32 b)
49	{
50	int expdiff;
51	uint32_t exp1, exp2, frac1, frac2;
52	float32 result;
53
54	result.f = 0;
55
56	expdiff = a.parts.exp - b.parts.exp;
57	if ((expdiff < 0 ) \|\| ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) {
58	if (isFloat32NaN(b)) {
59	/* TODO: fix SigNaN */
60	if (isFloat32SigNaN(b)) {
61	}
62	return b;
63	}
64
65	if (b.parts.exp == FLOAT32_MAX_EXPONENT) {
66	b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */
67	return b;
68	}
69
70	result.parts.sign = !a.parts.sign;
71
72	frac1 = b.parts.fraction;
73	exp1 = b.parts.exp;
74	frac2 = a.parts.fraction;
75	exp2 = a.parts.exp;
76	expdiff *= -1;
77	} else {
78	if (isFloat32NaN(a)) {
79	/* TODO: fix SigNaN */
80	if (isFloat32SigNaN(a) \|\| isFloat32SigNaN(b)) {
81	}
82	return a;
83	}
84
85	if (a.parts.exp == FLOAT32_MAX_EXPONENT) {
86	if (b.parts.exp == FLOAT32_MAX_EXPONENT) {
87	/* inf - inf => nan */
88	/* TODO: fix exception */
89	result.binary = FLOAT32_NAN;
90	return result;
91	}
92	return a;
93	}
94
95	result.parts.sign = a.parts.sign;
96
97	frac1 = a.parts.fraction;
98	exp1 = a.parts.exp;
99	frac2 = b.parts.fraction;
100	exp2 = b.parts.exp;
101	}
102
103	if (exp1 == 0) {
104	/* both are denormalized */
105	result.parts.fraction = frac1 - frac2;
106	if (result.parts.fraction > frac1) {
107	/* TODO: underflow exception */
108	return result;
109	}
110	result.parts.exp = 0;
111	return result;
112	}
113
114	/* add hidden bit */
115	frac1 \|= FLOAT32_HIDDEN_BIT_MASK;
116
117	if (exp2 == 0) {
118	/* denormalized */
119	--expdiff;
120	} else {
121	/* normalized */
122	frac2 \|= FLOAT32_HIDDEN_BIT_MASK;
123	}
124
125	/* create some space for rounding */
126	frac1 <<= 6;
127	frac2 <<= 6;
128
129	if (expdiff > FLOAT32_FRACTION_SIZE + 1) {
130	goto done;
131	}
132
133	frac1 = frac1 - (frac2 >> expdiff);
134
135	done:
136	/* TODO: find first nonzero digit and shift result and detect possibly underflow */
137	while ((exp1 > 0) && (!(frac1 & (FLOAT32_HIDDEN_BIT_MASK << 6 )))) {
138	--exp1;
139	frac1 <<= 1;
140	/* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
141	}
142
143	/* rounding - if first bit after fraction is set then round up */
144	frac1 += 0x20;
145
146	if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7)) {
147	++exp1;
148	frac1 >>= 1;
149	}
150
151	/* Clear hidden bit and shift */
152	result.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK));
153	result.parts.exp = exp1;
154
155	return result;
156	}
157
158	/**
159	* Subtract two double-precision floats with the same signs.
160	*
161	* @param a First input operand.
162	* @param b Second input operand.
163	* @return Result of substraction.
164	*/
165	float64 subFloat64(float64 a, float64 b)
166	{
167	int expdiff;
168	uint32_t exp1, exp2;
169	uint64_t frac1, frac2;
170	float64 result;
171
172	result.d = 0;
173
174	expdiff = a.parts.exp - b.parts.exp;
175	if ((expdiff < 0 ) \|\| ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) {
176	if (isFloat64NaN(b)) {
177	/* TODO: fix SigNaN */
178	if (isFloat64SigNaN(b)) {
179	}
180	return b;
181	}
182
183	if (b.parts.exp == FLOAT64_MAX_EXPONENT) {
184	b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */
185	return b;
186	}
187
188	result.parts.sign = !a.parts.sign;
189
190	frac1 = b.parts.fraction;
191	exp1 = b.parts.exp;
192	frac2 = a.parts.fraction;
193	exp2 = a.parts.exp;
194	expdiff *= -1;
195	} else {
196	if (isFloat64NaN(a)) {
197	/* TODO: fix SigNaN */
198	if (isFloat64SigNaN(a) \|\| isFloat64SigNaN(b)) {
199	}
200	return a;
201	}
202
203	if (a.parts.exp == FLOAT64_MAX_EXPONENT) {
204	if (b.parts.exp == FLOAT64_MAX_EXPONENT) {
205	/* inf - inf => nan */
206	/* TODO: fix exception */
207	result.binary = FLOAT64_NAN;
208	return result;
209	}
210	return a;
211	}
212
213	result.parts.sign = a.parts.sign;
214
215	frac1 = a.parts.fraction;
216	exp1 = a.parts.exp;
217	frac2 = b.parts.fraction;
218	exp2 = b.parts.exp;
219	}
220
221	if (exp1 == 0) {
222	/* both are denormalized */
223	result.parts.fraction = frac1 - frac2;
224	if (result.parts.fraction > frac1) {
225	/* TODO: underflow exception */
226	return result;
227	}
228	result.parts.exp = 0;
229	return result;
230	}
231
232	/* add hidden bit */
233	frac1 \|= FLOAT64_HIDDEN_BIT_MASK;
234
235	if (exp2 == 0) {
236	/* denormalized */
237	--expdiff;
238	} else {
239	/* normalized */
240	frac2 \|= FLOAT64_HIDDEN_BIT_MASK;
241	}
242
243	/* create some space for rounding */
244	frac1 <<= 6;
245	frac2 <<= 6;
246
247	if (expdiff > FLOAT64_FRACTION_SIZE + 1) {
248	goto done;
249	}
250
251	frac1 = frac1 - (frac2 >> expdiff);
252
253	done:
254	/* TODO: find first nonzero digit and shift result and detect possibly underflow */
255	while ((exp1 > 0) && (!(frac1 & (FLOAT64_HIDDEN_BIT_MASK << 6 )))) {
256	--exp1;
257	frac1 <<= 1;
258	/* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
259	}
260
261	/* rounding - if first bit after fraction is set then round up */
262	frac1 += 0x20;
263
264	if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7)) {
265	++exp1;
266	frac1 >>= 1;
267	}
268
269	/* Clear hidden bit and shift */
270	result.parts.fraction = ((frac1 >> 6) & (~FLOAT64_HIDDEN_BIT_MASK));
271	result.parts.exp = exp1;
272
273	return result;
274	}
275
276	/**
277	* Subtract two quadruple-precision floats with the same signs.
278	*
279	* @param a First input operand.
280	* @param b Second input operand.
281	* @return Result of substraction.
282	*/
283	float128 subFloat128(float128 a, float128 b)
284	{
285	int expdiff;
286	uint32_t exp1, exp2;
287	uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo;
288	float128 result;
289
290	result.binary.hi = 0;
291	result.binary.lo = 0;
292
293	expdiff = a.parts.exp - b.parts.exp;
294	if ((expdiff < 0 ) \|\| ((expdiff == 0) &&
295	lt128(a.parts.frac_hi, a.parts.frac_lo, b.parts.frac_hi, b.parts.frac_lo))) {
296	if (isFloat128NaN(b)) {
297	/* TODO: fix SigNaN */
298	if (isFloat128SigNaN(b)) {
299	}
300	return b;
301	}
302
303	if (b.parts.exp == FLOAT128_MAX_EXPONENT) {
304	b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */
305	return b;
306	}
307
308	result.parts.sign = !a.parts.sign;
309
310	frac1_hi = b.parts.frac_hi;
311	frac1_lo = b.parts.frac_lo;
312	exp1 = b.parts.exp;
313	frac2_hi = a.parts.frac_hi;
314	frac2_lo = a.parts.frac_lo;
315	exp2 = a.parts.exp;
316	expdiff *= -1;
317	} else {
318	if (isFloat128NaN(a)) {
319	/* TODO: fix SigNaN */
320	if (isFloat128SigNaN(a) \|\| isFloat128SigNaN(b)) {
321	}
322	return a;
323	}
324
325	if (a.parts.exp == FLOAT128_MAX_EXPONENT) {
326	if (b.parts.exp == FLOAT128_MAX_EXPONENT) {
327	/* inf - inf => nan */
328	/* TODO: fix exception */
329	result.binary.hi = FLOAT128_NAN_HI;
330	result.binary.lo = FLOAT128_NAN_LO;
331	return result;
332	}
333	return a;
334	}
335
336	result.parts.sign = a.parts.sign;
337
338	frac1_hi = a.parts.frac_hi;
339	frac1_lo = a.parts.frac_lo;
340	exp1 = a.parts.exp;
341	frac2_hi = b.parts.frac_hi;
342	frac2_lo = b.parts.frac_lo;
343	exp2 = b.parts.exp;
344	}
345
346	if (exp1 == 0) {
347	/* both are denormalized */
348	sub128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &tmp_hi, &tmp_lo);
349	result.parts.frac_hi = tmp_hi;
350	result.parts.frac_lo = tmp_lo;
351	if (lt128(frac1_hi, frac1_lo, result.parts.frac_hi, result.parts.frac_lo)) {
352	/* TODO: underflow exception */
353	return result;
354	}
355	result.parts.exp = 0;
356	return result;
357	}
358
359	/* add hidden bit */
360	or128(frac1_hi, frac1_lo,
361	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
362	&frac1_hi, &frac1_lo);
363
364	if (exp2 == 0) {
365	/* denormalized */
366	--expdiff;
367	} else {
368	/* normalized */
369	or128(frac2_hi, frac2_lo,
370	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
371	&frac2_hi, &frac2_lo);
372	}
373
374	/* create some space for rounding */
375	lshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
376	lshift128(frac2_hi, frac2_lo, 6, &frac2_hi, &frac2_lo);
377
378	if (expdiff > FLOAT128_FRACTION_SIZE + 1) {
379	goto done;
380	}
381
382	rshift128(frac2_hi, frac2_lo, expdiff, &tmp_hi, &tmp_lo);
383	sub128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &frac1_hi, &frac1_lo);
384
385	done:
386	/* TODO: find first nonzero digit and shift result and detect possibly underflow */
387	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6,
388	&tmp_hi, &tmp_lo);
389	and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
390	while ((exp1 > 0) && (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
391	--exp1;
392	lshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
393	/* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
394
395	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6,
396	&tmp_hi, &tmp_lo);
397	and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
398	}
399
400	/* rounding - if first bit after fraction is set then round up */
401	add128(frac1_hi, frac1_lo, 0x0ll, 0x20ll, &frac1_hi, &frac1_lo);
402
403	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7,
404	&tmp_hi, &tmp_lo);
405	and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
406	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
407	++exp1;
408	rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
409	}
410
411	/* Clear hidden bit and shift */
412	rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
413	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
414	&tmp_hi, &tmp_lo);
415	and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
416	result.parts.frac_hi = tmp_hi;
417	result.parts.frac_lo = tmp_lo;
418
419	result.parts.exp = exp1;
420
421	return result;
422	}
423
424	/** @}
425	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/softfloat/generic/sub.c@ 8bcd727

Download in other formats: