Context Navigation

add.c@ c67aff2

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since c67aff2 was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

completed double-precision support
added quadruple-precision support
added SPARC quadruple-precision wrappers
added doxygen comments
corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

Property mode set to 100644

File size: 9.9 KB

Line
1	/*
2	* Copyright (c) 2005 Josef Cejka
3	* Copyright (c) 2011 Petr Koupy
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup softfloat
31	* @{
32	*/
33	/** @file Addition functions.
34	*/
35
36	#include <sftypes.h>
37	#include <add.h>
38	#include <comparison.h>
39	#include <common.h>
40
41	/**
42	* Add two single-precision floats with the same signs.
43	*
44	* @param a First input operand.
45	* @param b Second input operand.
46	* @return Result of addition.
47	*/
48	float32 addFloat32(float32 a, float32 b)
49	{
50	int expdiff;
51	uint32_t exp1, exp2, frac1, frac2;
52
53	expdiff = a.parts.exp - b.parts.exp;
54	if (expdiff < 0) {
55	if (isFloat32NaN(b)) {
56	/* TODO: fix SigNaN */
57	if (isFloat32SigNaN(b)) {
58	}
59
60	return b;
61	}
62
63	if (b.parts.exp == FLOAT32_MAX_EXPONENT) {
64	return b;
65	}
66
67	frac1 = b.parts.fraction;
68	exp1 = b.parts.exp;
69	frac2 = a.parts.fraction;
70	exp2 = a.parts.exp;
71	expdiff *= -1;
72	} else {
73	if ((isFloat32NaN(a)) \|\| (isFloat32NaN(b))) {
74	/* TODO: fix SigNaN */
75	if (isFloat32SigNaN(a) \|\| isFloat32SigNaN(b)) {
76	}
77	return (isFloat32NaN(a) ? a : b);
78	}
79
80	if (a.parts.exp == FLOAT32_MAX_EXPONENT) {
81	return a;
82	}
83
84	frac1 = a.parts.fraction;
85	exp1 = a.parts.exp;
86	frac2 = b.parts.fraction;
87	exp2 = b.parts.exp;
88	}
89
90	if (exp1 == 0) {
91	/* both are denormalized */
92	frac1 += frac2;
93	if (frac1 & FLOAT32_HIDDEN_BIT_MASK ) {
94	/* result is not denormalized */
95	a.parts.exp = 1;
96	}
97	a.parts.fraction = frac1;
98	return a;
99	}
100
101	frac1 \|= FLOAT32_HIDDEN_BIT_MASK; /* add hidden bit */
102
103	if (exp2 == 0) {
104	/* second operand is denormalized */
105	--expdiff;
106	} else {
107	/* add hidden bit to second operand */
108	frac2 \|= FLOAT32_HIDDEN_BIT_MASK;
109	}
110
111	/* create some space for rounding */
112	frac1 <<= 6;
113	frac2 <<= 6;
114
115	if (expdiff < (FLOAT32_FRACTION_SIZE + 2) ) {
116	frac2 >>= expdiff;
117	frac1 += frac2;
118	} else {
119	a.parts.exp = exp1;
120	a.parts.fraction = (frac1 >> 6) & (~(FLOAT32_HIDDEN_BIT_MASK));
121	return a;
122	}
123
124	if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7) ) {
125	++exp1;
126	frac1 >>= 1;
127	}
128
129	/* rounding - if first bit after fraction is set then round up */
130	frac1 += (0x1 << 5);
131
132	if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7)) {
133	/* rounding overflow */
134	++exp1;
135	frac1 >>= 1;
136	}
137
138	if ((exp1 == FLOAT32_MAX_EXPONENT ) \|\| (exp2 > exp1)) {
139	/* overflow - set infinity as result */
140	a.parts.exp = FLOAT32_MAX_EXPONENT;
141	a.parts.fraction = 0;
142	return a;
143	}
144
145	a.parts.exp = exp1;
146
147	/* Clear hidden bit and shift */
148	a.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK));
149	return a;
150	}
151
152	/**
153	* Add two double-precision floats with the same signs.
154	*
155	* @param a First input operand.
156	* @param b Second input operand.
157	* @return Result of addition.
158	*/
159	float64 addFloat64(float64 a, float64 b)
160	{
161	int expdiff;
162	uint32_t exp1, exp2;
163	uint64_t frac1, frac2;
164
165	expdiff = ((int) a.parts.exp) - b.parts.exp;
166	if (expdiff < 0) {
167	if (isFloat64NaN(b)) {
168	/* TODO: fix SigNaN */
169	if (isFloat64SigNaN(b)) {
170	}
171
172	return b;
173	}
174
175	/* b is infinity and a not */
176	if (b.parts.exp == FLOAT64_MAX_EXPONENT) {
177	return b;
178	}
179
180	frac1 = b.parts.fraction;
181	exp1 = b.parts.exp;
182	frac2 = a.parts.fraction;
183	exp2 = a.parts.exp;
184	expdiff *= -1;
185	} else {
186	if (isFloat64NaN(a)) {
187	/* TODO: fix SigNaN */
188	if (isFloat64SigNaN(a) \|\| isFloat64SigNaN(b)) {
189	}
190	return a;
191	}
192
193	/* a is infinity and b not */
194	if (a.parts.exp == FLOAT64_MAX_EXPONENT) {
195	return a;
196	}
197
198	frac1 = a.parts.fraction;
199	exp1 = a.parts.exp;
200	frac2 = b.parts.fraction;
201	exp2 = b.parts.exp;
202	}
203
204	if (exp1 == 0) {
205	/* both are denormalized */
206	frac1 += frac2;
207	if (frac1 & FLOAT64_HIDDEN_BIT_MASK) {
208	/* result is not denormalized */
209	a.parts.exp = 1;
210	}
211	a.parts.fraction = frac1;
212	return a;
213	}
214
215	/* add hidden bit - frac1 is sure not denormalized */
216	frac1 \|= FLOAT64_HIDDEN_BIT_MASK;
217
218	/* second operand ... */
219	if (exp2 == 0) {
220	/* ... is denormalized */
221	--expdiff;
222	} else {
223	/* is not denormalized */
224	frac2 \|= FLOAT64_HIDDEN_BIT_MASK;
225	}
226
227	/* create some space for rounding */
228	frac1 <<= 6;
229	frac2 <<= 6;
230
231	if (expdiff < (FLOAT64_FRACTION_SIZE + 2)) {
232	frac2 >>= expdiff;
233	frac1 += frac2;
234	} else {
235	a.parts.exp = exp1;
236	a.parts.fraction = (frac1 >> 6) & (~(FLOAT64_HIDDEN_BIT_MASK));
237	return a;
238	}
239
240	if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7)) {
241	++exp1;
242	frac1 >>= 1;
243	}
244
245	/* rounding - if first bit after fraction is set then round up */
246	frac1 += (0x1 << 5);
247
248	if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7)) {
249	/* rounding overflow */
250	++exp1;
251	frac1 >>= 1;
252	}
253
254	if ((exp1 == FLOAT64_MAX_EXPONENT ) \|\| (exp2 > exp1)) {
255	/* overflow - set infinity as result */
256	a.parts.exp = FLOAT64_MAX_EXPONENT;
257	a.parts.fraction = 0;
258	return a;
259	}
260
261	a.parts.exp = exp1;
262	/* Clear hidden bit and shift */
263	a.parts.fraction = ((frac1 >> 6 ) & (~FLOAT64_HIDDEN_BIT_MASK));
264	return a;
265	}
266
267	/**
268	* Add two quadruple-precision floats with the same signs.
269	*
270	* @param a First input operand.
271	* @param b Second input operand.
272	* @return Result of addition.
273	*/
274	float128 addFloat128(float128 a, float128 b)
275	{
276	int expdiff;
277	uint32_t exp1, exp2;
278	uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo;
279
280	expdiff = ((int) a.parts.exp) - b.parts.exp;
281	if (expdiff < 0) {
282	if (isFloat128NaN(b)) {
283	/* TODO: fix SigNaN */
284	if (isFloat128SigNaN(b)) {
285	}
286
287	return b;
288	}
289
290	/* b is infinity and a not */
291	if (b.parts.exp == FLOAT128_MAX_EXPONENT) {
292	return b;
293	}
294
295	frac1_hi = b.parts.frac_hi;
296	frac1_lo = b.parts.frac_lo;
297	exp1 = b.parts.exp;
298	frac2_hi = a.parts.frac_hi;
299	frac2_lo = a.parts.frac_lo;
300	exp2 = a.parts.exp;
301	expdiff *= -1;
302	} else {
303	if (isFloat128NaN(a)) {
304	/* TODO: fix SigNaN */
305	if (isFloat128SigNaN(a) \|\| isFloat128SigNaN(b)) {
306	}
307	return a;
308	}
309
310	/* a is infinity and b not */
311	if (a.parts.exp == FLOAT128_MAX_EXPONENT) {
312	return a;
313	}
314
315	frac1_hi = a.parts.frac_hi;
316	frac1_lo = a.parts.frac_lo;
317	exp1 = a.parts.exp;
318	frac2_hi = b.parts.frac_hi;
319	frac2_lo = b.parts.frac_lo;
320	exp2 = b.parts.exp;
321	}
322
323	if (exp1 == 0) {
324	/* both are denormalized */
325	add128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &frac1_hi, &frac1_lo);
326
327	and128(frac1_hi, frac1_lo,
328	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
329	&tmp_hi, &tmp_lo);
330	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
331	/* result is not denormalized */
332	a.parts.exp = 1;
333	}
334
335	a.parts.frac_hi = frac1_hi;
336	a.parts.frac_lo = frac1_lo;
337	return a;
338	}
339
340	/* add hidden bit - frac1 is sure not denormalized */
341	or128(frac1_hi, frac1_lo,
342	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
343	&frac1_hi, &frac1_lo);
344
345	/* second operand ... */
346	if (exp2 == 0) {
347	/* ... is denormalized */
348	--expdiff;
349	} else {
350	/* is not denormalized */
351	or128(frac2_hi, frac2_lo,
352	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
353	&frac2_hi, &frac2_lo);
354	}
355
356	/* create some space for rounding */
357	lshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
358	lshift128(frac2_hi, frac2_lo, 6, &frac2_hi, &frac2_lo);
359
360	if (expdiff < (FLOAT128_FRACTION_SIZE + 2)) {
361	rshift128(frac2_hi, frac2_lo, expdiff, &frac2_hi, &frac2_lo);
362	add128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &frac1_hi, &frac1_lo);
363	} else {
364	a.parts.exp = exp1;
365
366	rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
367	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
368	&tmp_hi, &tmp_lo);
369	and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
370
371	a.parts.frac_hi = tmp_hi;
372	a.parts.frac_lo = tmp_lo;
373	return a;
374	}
375
376	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7,
377	&tmp_hi, &tmp_lo);
378	and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
379	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
380	++exp1;
381	rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
382	}
383
384	/* rounding - if first bit after fraction is set then round up */
385	add128(frac1_hi, frac1_lo, 0x0ll, 0x1ll << 5, &frac1_hi, &frac1_lo);
386
387	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7,
388	&tmp_hi, &tmp_lo);
389	and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
390	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
391	/* rounding overflow */
392	++exp1;
393	rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
394	}
395
396	if ((exp1 == FLOAT128_MAX_EXPONENT ) \|\| (exp2 > exp1)) {
397	/* overflow - set infinity as result */
398	a.parts.exp = FLOAT64_MAX_EXPONENT;
399	a.parts.frac_hi = 0;
400	a.parts.frac_lo = 0;
401	return a;
402	}
403
404	a.parts.exp = exp1;
405
406	/* Clear hidden bit and shift */
407	rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
408	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
409	&tmp_hi, &tmp_lo);
410	and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
411
412	a.parts.frac_hi = tmp_hi;
413	a.parts.frac_lo = tmp_lo;
414
415	return a;
416	}
417
418	/** @}
419	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/softfloat/generic/add.c@ c67aff2

Download in other formats: