source: mainline/uspace/lib/softfloat/generic/sub.c@ 8bcd727

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 8bcd727 was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

  • completed double-precision support
  • added quadruple-precision support
  • added SPARC quadruple-precision wrappers
  • added doxygen comments
  • corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

  • Property mode set to 100644
File size: 10.5 KB
Line 
1/*
2 * Copyright (c) 2005 Josef Cejka
3 * Copyright (c) 2011 Petr Koupy
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup softfloat
31 * @{
32 */
33/** @file Substraction functions.
34 */
35
36#include <sftypes.h>
37#include <sub.h>
38#include <comparison.h>
39#include <common.h>
40
41/**
42 * Subtract two single-precision floats with the same signs.
43 *
44 * @param a First input operand.
45 * @param b Second input operand.
46 * @return Result of substraction.
47 */
48float32 subFloat32(float32 a, float32 b)
49{
50 int expdiff;
51 uint32_t exp1, exp2, frac1, frac2;
52 float32 result;
53
54 result.f = 0;
55
56 expdiff = a.parts.exp - b.parts.exp;
57 if ((expdiff < 0 ) || ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) {
58 if (isFloat32NaN(b)) {
59 /* TODO: fix SigNaN */
60 if (isFloat32SigNaN(b)) {
61 }
62 return b;
63 }
64
65 if (b.parts.exp == FLOAT32_MAX_EXPONENT) {
66 b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */
67 return b;
68 }
69
70 result.parts.sign = !a.parts.sign;
71
72 frac1 = b.parts.fraction;
73 exp1 = b.parts.exp;
74 frac2 = a.parts.fraction;
75 exp2 = a.parts.exp;
76 expdiff *= -1;
77 } else {
78 if (isFloat32NaN(a)) {
79 /* TODO: fix SigNaN */
80 if (isFloat32SigNaN(a) || isFloat32SigNaN(b)) {
81 }
82 return a;
83 }
84
85 if (a.parts.exp == FLOAT32_MAX_EXPONENT) {
86 if (b.parts.exp == FLOAT32_MAX_EXPONENT) {
87 /* inf - inf => nan */
88 /* TODO: fix exception */
89 result.binary = FLOAT32_NAN;
90 return result;
91 }
92 return a;
93 }
94
95 result.parts.sign = a.parts.sign;
96
97 frac1 = a.parts.fraction;
98 exp1 = a.parts.exp;
99 frac2 = b.parts.fraction;
100 exp2 = b.parts.exp;
101 }
102
103 if (exp1 == 0) {
104 /* both are denormalized */
105 result.parts.fraction = frac1 - frac2;
106 if (result.parts.fraction > frac1) {
107 /* TODO: underflow exception */
108 return result;
109 }
110 result.parts.exp = 0;
111 return result;
112 }
113
114 /* add hidden bit */
115 frac1 |= FLOAT32_HIDDEN_BIT_MASK;
116
117 if (exp2 == 0) {
118 /* denormalized */
119 --expdiff;
120 } else {
121 /* normalized */
122 frac2 |= FLOAT32_HIDDEN_BIT_MASK;
123 }
124
125 /* create some space for rounding */
126 frac1 <<= 6;
127 frac2 <<= 6;
128
129 if (expdiff > FLOAT32_FRACTION_SIZE + 1) {
130 goto done;
131 }
132
133 frac1 = frac1 - (frac2 >> expdiff);
134
135done:
136 /* TODO: find first nonzero digit and shift result and detect possibly underflow */
137 while ((exp1 > 0) && (!(frac1 & (FLOAT32_HIDDEN_BIT_MASK << 6 )))) {
138 --exp1;
139 frac1 <<= 1;
140 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
141 }
142
143 /* rounding - if first bit after fraction is set then round up */
144 frac1 += 0x20;
145
146 if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7)) {
147 ++exp1;
148 frac1 >>= 1;
149 }
150
151 /* Clear hidden bit and shift */
152 result.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK));
153 result.parts.exp = exp1;
154
155 return result;
156}
157
158/**
159 * Subtract two double-precision floats with the same signs.
160 *
161 * @param a First input operand.
162 * @param b Second input operand.
163 * @return Result of substraction.
164 */
165float64 subFloat64(float64 a, float64 b)
166{
167 int expdiff;
168 uint32_t exp1, exp2;
169 uint64_t frac1, frac2;
170 float64 result;
171
172 result.d = 0;
173
174 expdiff = a.parts.exp - b.parts.exp;
175 if ((expdiff < 0 ) || ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) {
176 if (isFloat64NaN(b)) {
177 /* TODO: fix SigNaN */
178 if (isFloat64SigNaN(b)) {
179 }
180 return b;
181 }
182
183 if (b.parts.exp == FLOAT64_MAX_EXPONENT) {
184 b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */
185 return b;
186 }
187
188 result.parts.sign = !a.parts.sign;
189
190 frac1 = b.parts.fraction;
191 exp1 = b.parts.exp;
192 frac2 = a.parts.fraction;
193 exp2 = a.parts.exp;
194 expdiff *= -1;
195 } else {
196 if (isFloat64NaN(a)) {
197 /* TODO: fix SigNaN */
198 if (isFloat64SigNaN(a) || isFloat64SigNaN(b)) {
199 }
200 return a;
201 }
202
203 if (a.parts.exp == FLOAT64_MAX_EXPONENT) {
204 if (b.parts.exp == FLOAT64_MAX_EXPONENT) {
205 /* inf - inf => nan */
206 /* TODO: fix exception */
207 result.binary = FLOAT64_NAN;
208 return result;
209 }
210 return a;
211 }
212
213 result.parts.sign = a.parts.sign;
214
215 frac1 = a.parts.fraction;
216 exp1 = a.parts.exp;
217 frac2 = b.parts.fraction;
218 exp2 = b.parts.exp;
219 }
220
221 if (exp1 == 0) {
222 /* both are denormalized */
223 result.parts.fraction = frac1 - frac2;
224 if (result.parts.fraction > frac1) {
225 /* TODO: underflow exception */
226 return result;
227 }
228 result.parts.exp = 0;
229 return result;
230 }
231
232 /* add hidden bit */
233 frac1 |= FLOAT64_HIDDEN_BIT_MASK;
234
235 if (exp2 == 0) {
236 /* denormalized */
237 --expdiff;
238 } else {
239 /* normalized */
240 frac2 |= FLOAT64_HIDDEN_BIT_MASK;
241 }
242
243 /* create some space for rounding */
244 frac1 <<= 6;
245 frac2 <<= 6;
246
247 if (expdiff > FLOAT64_FRACTION_SIZE + 1) {
248 goto done;
249 }
250
251 frac1 = frac1 - (frac2 >> expdiff);
252
253done:
254 /* TODO: find first nonzero digit and shift result and detect possibly underflow */
255 while ((exp1 > 0) && (!(frac1 & (FLOAT64_HIDDEN_BIT_MASK << 6 )))) {
256 --exp1;
257 frac1 <<= 1;
258 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
259 }
260
261 /* rounding - if first bit after fraction is set then round up */
262 frac1 += 0x20;
263
264 if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7)) {
265 ++exp1;
266 frac1 >>= 1;
267 }
268
269 /* Clear hidden bit and shift */
270 result.parts.fraction = ((frac1 >> 6) & (~FLOAT64_HIDDEN_BIT_MASK));
271 result.parts.exp = exp1;
272
273 return result;
274}
275
276/**
277 * Subtract two quadruple-precision floats with the same signs.
278 *
279 * @param a First input operand.
280 * @param b Second input operand.
281 * @return Result of substraction.
282 */
283float128 subFloat128(float128 a, float128 b)
284{
285 int expdiff;
286 uint32_t exp1, exp2;
287 uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo;
288 float128 result;
289
290 result.binary.hi = 0;
291 result.binary.lo = 0;
292
293 expdiff = a.parts.exp - b.parts.exp;
294 if ((expdiff < 0 ) || ((expdiff == 0) &&
295 lt128(a.parts.frac_hi, a.parts.frac_lo, b.parts.frac_hi, b.parts.frac_lo))) {
296 if (isFloat128NaN(b)) {
297 /* TODO: fix SigNaN */
298 if (isFloat128SigNaN(b)) {
299 }
300 return b;
301 }
302
303 if (b.parts.exp == FLOAT128_MAX_EXPONENT) {
304 b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */
305 return b;
306 }
307
308 result.parts.sign = !a.parts.sign;
309
310 frac1_hi = b.parts.frac_hi;
311 frac1_lo = b.parts.frac_lo;
312 exp1 = b.parts.exp;
313 frac2_hi = a.parts.frac_hi;
314 frac2_lo = a.parts.frac_lo;
315 exp2 = a.parts.exp;
316 expdiff *= -1;
317 } else {
318 if (isFloat128NaN(a)) {
319 /* TODO: fix SigNaN */
320 if (isFloat128SigNaN(a) || isFloat128SigNaN(b)) {
321 }
322 return a;
323 }
324
325 if (a.parts.exp == FLOAT128_MAX_EXPONENT) {
326 if (b.parts.exp == FLOAT128_MAX_EXPONENT) {
327 /* inf - inf => nan */
328 /* TODO: fix exception */
329 result.binary.hi = FLOAT128_NAN_HI;
330 result.binary.lo = FLOAT128_NAN_LO;
331 return result;
332 }
333 return a;
334 }
335
336 result.parts.sign = a.parts.sign;
337
338 frac1_hi = a.parts.frac_hi;
339 frac1_lo = a.parts.frac_lo;
340 exp1 = a.parts.exp;
341 frac2_hi = b.parts.frac_hi;
342 frac2_lo = b.parts.frac_lo;
343 exp2 = b.parts.exp;
344 }
345
346 if (exp1 == 0) {
347 /* both are denormalized */
348 sub128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &tmp_hi, &tmp_lo);
349 result.parts.frac_hi = tmp_hi;
350 result.parts.frac_lo = tmp_lo;
351 if (lt128(frac1_hi, frac1_lo, result.parts.frac_hi, result.parts.frac_lo)) {
352 /* TODO: underflow exception */
353 return result;
354 }
355 result.parts.exp = 0;
356 return result;
357 }
358
359 /* add hidden bit */
360 or128(frac1_hi, frac1_lo,
361 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
362 &frac1_hi, &frac1_lo);
363
364 if (exp2 == 0) {
365 /* denormalized */
366 --expdiff;
367 } else {
368 /* normalized */
369 or128(frac2_hi, frac2_lo,
370 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
371 &frac2_hi, &frac2_lo);
372 }
373
374 /* create some space for rounding */
375 lshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
376 lshift128(frac2_hi, frac2_lo, 6, &frac2_hi, &frac2_lo);
377
378 if (expdiff > FLOAT128_FRACTION_SIZE + 1) {
379 goto done;
380 }
381
382 rshift128(frac2_hi, frac2_lo, expdiff, &tmp_hi, &tmp_lo);
383 sub128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &frac1_hi, &frac1_lo);
384
385done:
386 /* TODO: find first nonzero digit and shift result and detect possibly underflow */
387 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6,
388 &tmp_hi, &tmp_lo);
389 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
390 while ((exp1 > 0) && (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
391 --exp1;
392 lshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
393 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
394
395 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6,
396 &tmp_hi, &tmp_lo);
397 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
398 }
399
400 /* rounding - if first bit after fraction is set then round up */
401 add128(frac1_hi, frac1_lo, 0x0ll, 0x20ll, &frac1_hi, &frac1_lo);
402
403 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7,
404 &tmp_hi, &tmp_lo);
405 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
406 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
407 ++exp1;
408 rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
409 }
410
411 /* Clear hidden bit and shift */
412 rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
413 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
414 &tmp_hi, &tmp_lo);
415 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
416 result.parts.frac_hi = tmp_hi;
417 result.parts.frac_lo = tmp_lo;
418
419 result.parts.exp = exp1;
420
421 return result;
422}
423
424/** @}
425 */
Note: See TracBrowser for help on using the repository browser.