source: mainline/uspace/lib/softfloat/generic/add.c@ c67aff2

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since c67aff2 was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

  • completed double-precision support
  • added quadruple-precision support
  • added SPARC quadruple-precision wrappers
  • added doxygen comments
  • corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

  • Property mode set to 100644
File size: 9.9 KB
Line 
1/*
2 * Copyright (c) 2005 Josef Cejka
3 * Copyright (c) 2011 Petr Koupy
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup softfloat
31 * @{
32 */
33/** @file Addition functions.
34 */
35
36#include <sftypes.h>
37#include <add.h>
38#include <comparison.h>
39#include <common.h>
40
41/**
42 * Add two single-precision floats with the same signs.
43 *
44 * @param a First input operand.
45 * @param b Second input operand.
46 * @return Result of addition.
47 */
48float32 addFloat32(float32 a, float32 b)
49{
50 int expdiff;
51 uint32_t exp1, exp2, frac1, frac2;
52
53 expdiff = a.parts.exp - b.parts.exp;
54 if (expdiff < 0) {
55 if (isFloat32NaN(b)) {
56 /* TODO: fix SigNaN */
57 if (isFloat32SigNaN(b)) {
58 }
59
60 return b;
61 }
62
63 if (b.parts.exp == FLOAT32_MAX_EXPONENT) {
64 return b;
65 }
66
67 frac1 = b.parts.fraction;
68 exp1 = b.parts.exp;
69 frac2 = a.parts.fraction;
70 exp2 = a.parts.exp;
71 expdiff *= -1;
72 } else {
73 if ((isFloat32NaN(a)) || (isFloat32NaN(b))) {
74 /* TODO: fix SigNaN */
75 if (isFloat32SigNaN(a) || isFloat32SigNaN(b)) {
76 }
77 return (isFloat32NaN(a) ? a : b);
78 }
79
80 if (a.parts.exp == FLOAT32_MAX_EXPONENT) {
81 return a;
82 }
83
84 frac1 = a.parts.fraction;
85 exp1 = a.parts.exp;
86 frac2 = b.parts.fraction;
87 exp2 = b.parts.exp;
88 }
89
90 if (exp1 == 0) {
91 /* both are denormalized */
92 frac1 += frac2;
93 if (frac1 & FLOAT32_HIDDEN_BIT_MASK ) {
94 /* result is not denormalized */
95 a.parts.exp = 1;
96 }
97 a.parts.fraction = frac1;
98 return a;
99 }
100
101 frac1 |= FLOAT32_HIDDEN_BIT_MASK; /* add hidden bit */
102
103 if (exp2 == 0) {
104 /* second operand is denormalized */
105 --expdiff;
106 } else {
107 /* add hidden bit to second operand */
108 frac2 |= FLOAT32_HIDDEN_BIT_MASK;
109 }
110
111 /* create some space for rounding */
112 frac1 <<= 6;
113 frac2 <<= 6;
114
115 if (expdiff < (FLOAT32_FRACTION_SIZE + 2) ) {
116 frac2 >>= expdiff;
117 frac1 += frac2;
118 } else {
119 a.parts.exp = exp1;
120 a.parts.fraction = (frac1 >> 6) & (~(FLOAT32_HIDDEN_BIT_MASK));
121 return a;
122 }
123
124 if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7) ) {
125 ++exp1;
126 frac1 >>= 1;
127 }
128
129 /* rounding - if first bit after fraction is set then round up */
130 frac1 += (0x1 << 5);
131
132 if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7)) {
133 /* rounding overflow */
134 ++exp1;
135 frac1 >>= 1;
136 }
137
138 if ((exp1 == FLOAT32_MAX_EXPONENT ) || (exp2 > exp1)) {
139 /* overflow - set infinity as result */
140 a.parts.exp = FLOAT32_MAX_EXPONENT;
141 a.parts.fraction = 0;
142 return a;
143 }
144
145 a.parts.exp = exp1;
146
147 /* Clear hidden bit and shift */
148 a.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK));
149 return a;
150}
151
152/**
153 * Add two double-precision floats with the same signs.
154 *
155 * @param a First input operand.
156 * @param b Second input operand.
157 * @return Result of addition.
158 */
159float64 addFloat64(float64 a, float64 b)
160{
161 int expdiff;
162 uint32_t exp1, exp2;
163 uint64_t frac1, frac2;
164
165 expdiff = ((int) a.parts.exp) - b.parts.exp;
166 if (expdiff < 0) {
167 if (isFloat64NaN(b)) {
168 /* TODO: fix SigNaN */
169 if (isFloat64SigNaN(b)) {
170 }
171
172 return b;
173 }
174
175 /* b is infinity and a not */
176 if (b.parts.exp == FLOAT64_MAX_EXPONENT) {
177 return b;
178 }
179
180 frac1 = b.parts.fraction;
181 exp1 = b.parts.exp;
182 frac2 = a.parts.fraction;
183 exp2 = a.parts.exp;
184 expdiff *= -1;
185 } else {
186 if (isFloat64NaN(a)) {
187 /* TODO: fix SigNaN */
188 if (isFloat64SigNaN(a) || isFloat64SigNaN(b)) {
189 }
190 return a;
191 }
192
193 /* a is infinity and b not */
194 if (a.parts.exp == FLOAT64_MAX_EXPONENT) {
195 return a;
196 }
197
198 frac1 = a.parts.fraction;
199 exp1 = a.parts.exp;
200 frac2 = b.parts.fraction;
201 exp2 = b.parts.exp;
202 }
203
204 if (exp1 == 0) {
205 /* both are denormalized */
206 frac1 += frac2;
207 if (frac1 & FLOAT64_HIDDEN_BIT_MASK) {
208 /* result is not denormalized */
209 a.parts.exp = 1;
210 }
211 a.parts.fraction = frac1;
212 return a;
213 }
214
215 /* add hidden bit - frac1 is sure not denormalized */
216 frac1 |= FLOAT64_HIDDEN_BIT_MASK;
217
218 /* second operand ... */
219 if (exp2 == 0) {
220 /* ... is denormalized */
221 --expdiff;
222 } else {
223 /* is not denormalized */
224 frac2 |= FLOAT64_HIDDEN_BIT_MASK;
225 }
226
227 /* create some space for rounding */
228 frac1 <<= 6;
229 frac2 <<= 6;
230
231 if (expdiff < (FLOAT64_FRACTION_SIZE + 2)) {
232 frac2 >>= expdiff;
233 frac1 += frac2;
234 } else {
235 a.parts.exp = exp1;
236 a.parts.fraction = (frac1 >> 6) & (~(FLOAT64_HIDDEN_BIT_MASK));
237 return a;
238 }
239
240 if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7)) {
241 ++exp1;
242 frac1 >>= 1;
243 }
244
245 /* rounding - if first bit after fraction is set then round up */
246 frac1 += (0x1 << 5);
247
248 if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7)) {
249 /* rounding overflow */
250 ++exp1;
251 frac1 >>= 1;
252 }
253
254 if ((exp1 == FLOAT64_MAX_EXPONENT ) || (exp2 > exp1)) {
255 /* overflow - set infinity as result */
256 a.parts.exp = FLOAT64_MAX_EXPONENT;
257 a.parts.fraction = 0;
258 return a;
259 }
260
261 a.parts.exp = exp1;
262 /* Clear hidden bit and shift */
263 a.parts.fraction = ((frac1 >> 6 ) & (~FLOAT64_HIDDEN_BIT_MASK));
264 return a;
265}
266
267/**
268 * Add two quadruple-precision floats with the same signs.
269 *
270 * @param a First input operand.
271 * @param b Second input operand.
272 * @return Result of addition.
273 */
274float128 addFloat128(float128 a, float128 b)
275{
276 int expdiff;
277 uint32_t exp1, exp2;
278 uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo;
279
280 expdiff = ((int) a.parts.exp) - b.parts.exp;
281 if (expdiff < 0) {
282 if (isFloat128NaN(b)) {
283 /* TODO: fix SigNaN */
284 if (isFloat128SigNaN(b)) {
285 }
286
287 return b;
288 }
289
290 /* b is infinity and a not */
291 if (b.parts.exp == FLOAT128_MAX_EXPONENT) {
292 return b;
293 }
294
295 frac1_hi = b.parts.frac_hi;
296 frac1_lo = b.parts.frac_lo;
297 exp1 = b.parts.exp;
298 frac2_hi = a.parts.frac_hi;
299 frac2_lo = a.parts.frac_lo;
300 exp2 = a.parts.exp;
301 expdiff *= -1;
302 } else {
303 if (isFloat128NaN(a)) {
304 /* TODO: fix SigNaN */
305 if (isFloat128SigNaN(a) || isFloat128SigNaN(b)) {
306 }
307 return a;
308 }
309
310 /* a is infinity and b not */
311 if (a.parts.exp == FLOAT128_MAX_EXPONENT) {
312 return a;
313 }
314
315 frac1_hi = a.parts.frac_hi;
316 frac1_lo = a.parts.frac_lo;
317 exp1 = a.parts.exp;
318 frac2_hi = b.parts.frac_hi;
319 frac2_lo = b.parts.frac_lo;
320 exp2 = b.parts.exp;
321 }
322
323 if (exp1 == 0) {
324 /* both are denormalized */
325 add128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &frac1_hi, &frac1_lo);
326
327 and128(frac1_hi, frac1_lo,
328 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
329 &tmp_hi, &tmp_lo);
330 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
331 /* result is not denormalized */
332 a.parts.exp = 1;
333 }
334
335 a.parts.frac_hi = frac1_hi;
336 a.parts.frac_lo = frac1_lo;
337 return a;
338 }
339
340 /* add hidden bit - frac1 is sure not denormalized */
341 or128(frac1_hi, frac1_lo,
342 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
343 &frac1_hi, &frac1_lo);
344
345 /* second operand ... */
346 if (exp2 == 0) {
347 /* ... is denormalized */
348 --expdiff;
349 } else {
350 /* is not denormalized */
351 or128(frac2_hi, frac2_lo,
352 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
353 &frac2_hi, &frac2_lo);
354 }
355
356 /* create some space for rounding */
357 lshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
358 lshift128(frac2_hi, frac2_lo, 6, &frac2_hi, &frac2_lo);
359
360 if (expdiff < (FLOAT128_FRACTION_SIZE + 2)) {
361 rshift128(frac2_hi, frac2_lo, expdiff, &frac2_hi, &frac2_lo);
362 add128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &frac1_hi, &frac1_lo);
363 } else {
364 a.parts.exp = exp1;
365
366 rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
367 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
368 &tmp_hi, &tmp_lo);
369 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
370
371 a.parts.frac_hi = tmp_hi;
372 a.parts.frac_lo = tmp_lo;
373 return a;
374 }
375
376 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7,
377 &tmp_hi, &tmp_lo);
378 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
379 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
380 ++exp1;
381 rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
382 }
383
384 /* rounding - if first bit after fraction is set then round up */
385 add128(frac1_hi, frac1_lo, 0x0ll, 0x1ll << 5, &frac1_hi, &frac1_lo);
386
387 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7,
388 &tmp_hi, &tmp_lo);
389 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
390 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
391 /* rounding overflow */
392 ++exp1;
393 rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
394 }
395
396 if ((exp1 == FLOAT128_MAX_EXPONENT ) || (exp2 > exp1)) {
397 /* overflow - set infinity as result */
398 a.parts.exp = FLOAT64_MAX_EXPONENT;
399 a.parts.frac_hi = 0;
400 a.parts.frac_lo = 0;
401 return a;
402 }
403
404 a.parts.exp = exp1;
405
406 /* Clear hidden bit and shift */
407 rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
408 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
409 &tmp_hi, &tmp_lo);
410 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
411
412 a.parts.frac_hi = tmp_hi;
413 a.parts.frac_lo = tmp_lo;
414
415 return a;
416}
417
418/** @}
419 */
Note: See TracBrowser for help on using the repository browser.