source: mainline/uspace/lib/softfloat/sub.c@ c0c38c7c

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since c0c38c7c was c0c38c7c, checked in by Martin Decky <martin@…>, 10 years ago

software floating point overhaul
use proper type mapping
fix cosine calculation

  • Property mode set to 100644
File size: 12.3 KB
Line 
1/*
2 * Copyright (c) 2005 Josef Cejka
3 * Copyright (c) 2011 Petr Koupy
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup softfloat
31 * @{
32 */
33/** @file Substraction functions.
34 */
35
36#include "sub.h"
37#include "comparison.h"
38#include "common.h"
39#include "add.h"
40
41/** Subtract two single-precision floats with the same sign.
42 *
43 * @param a First input operand.
44 * @param b Second input operand.
45 *
46 * @return Result of substraction.
47 *
48 */
49float32 sub_float32(float32 a, float32 b)
50{
51 int expdiff;
52 uint32_t exp1, exp2, frac1, frac2;
53 float32 result;
54
55 result.bin = 0;
56
57 expdiff = a.parts.exp - b.parts.exp;
58 if ((expdiff < 0 ) || ((expdiff == 0) &&
59 (a.parts.fraction < b.parts.fraction))) {
60 if (is_float32_nan(b)) {
61 if (is_float32_signan(b)) {
62 // TODO: fix SigNaN
63 }
64
65 return b;
66 }
67
68 if (b.parts.exp == FLOAT32_MAX_EXPONENT) {
69 /* num -(+-inf) = -+inf */
70 b.parts.sign = !b.parts.sign;
71 return b;
72 }
73
74 result.parts.sign = !a.parts.sign;
75
76 frac1 = b.parts.fraction;
77 exp1 = b.parts.exp;
78 frac2 = a.parts.fraction;
79 exp2 = a.parts.exp;
80 expdiff *= -1;
81 } else {
82 if (is_float32_nan(a)) {
83 if ((is_float32_signan(a)) || (is_float32_signan(b))) {
84 // TODO: fix SigNaN
85 }
86
87 return a;
88 }
89
90 if (a.parts.exp == FLOAT32_MAX_EXPONENT) {
91 if (b.parts.exp == FLOAT32_MAX_EXPONENT) {
92 /* inf - inf => nan */
93 // TODO: fix exception
94 result.bin = FLOAT32_NAN;
95 return result;
96 }
97
98 return a;
99 }
100
101 result.parts.sign = a.parts.sign;
102
103 frac1 = a.parts.fraction;
104 exp1 = a.parts.exp;
105 frac2 = b.parts.fraction;
106 exp2 = b.parts.exp;
107 }
108
109 if (exp1 == 0) {
110 /* both are denormalized */
111 result.parts.fraction = frac1 - frac2;
112 if (result.parts.fraction > frac1) {
113 // TODO: underflow exception
114 return result;
115 }
116
117 result.parts.exp = 0;
118 return result;
119 }
120
121 /* add hidden bit */
122 frac1 |= FLOAT32_HIDDEN_BIT_MASK;
123
124 if (exp2 == 0) {
125 /* denormalized */
126 --expdiff;
127 } else {
128 /* normalized */
129 frac2 |= FLOAT32_HIDDEN_BIT_MASK;
130 }
131
132 /* create some space for rounding */
133 frac1 <<= 6;
134 frac2 <<= 6;
135
136 if (expdiff > FLOAT32_FRACTION_SIZE + 1)
137 goto done;
138
139 frac1 = frac1 - (frac2 >> expdiff);
140
141done:
142 /* TODO: find first nonzero digit and shift result and detect possibly underflow */
143 while ((exp1 > 0) && (!(frac1 & (FLOAT32_HIDDEN_BIT_MASK << 6 )))) {
144 --exp1;
145 frac1 <<= 1;
146 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
147 }
148
149 /* rounding - if first bit after fraction is set then round up */
150 frac1 += 0x20;
151
152 if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7)) {
153 ++exp1;
154 frac1 >>= 1;
155 }
156
157 /* Clear hidden bit and shift */
158 result.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK));
159 result.parts.exp = exp1;
160
161 return result;
162}
163
164/** Subtract two double-precision floats with the same sign.
165 *
166 * @param a First input operand.
167 * @param b Second input operand.
168 *
169 * @return Result of substraction.
170 *
171 */
172float64 sub_float64(float64 a, float64 b)
173{
174 int expdiff;
175 uint32_t exp1, exp2;
176 uint64_t frac1, frac2;
177 float64 result;
178
179 result.bin = 0;
180
181 expdiff = a.parts.exp - b.parts.exp;
182 if ((expdiff < 0 ) ||
183 ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) {
184 if (is_float64_nan(b)) {
185 if (is_float64_signan(b)) {
186 // TODO: fix SigNaN
187 }
188
189 return b;
190 }
191
192 if (b.parts.exp == FLOAT64_MAX_EXPONENT) {
193 /* num -(+-inf) = -+inf */
194 b.parts.sign = !b.parts.sign;
195 return b;
196 }
197
198 result.parts.sign = !a.parts.sign;
199
200 frac1 = b.parts.fraction;
201 exp1 = b.parts.exp;
202 frac2 = a.parts.fraction;
203 exp2 = a.parts.exp;
204 expdiff *= -1;
205 } else {
206 if (is_float64_nan(a)) {
207 if (is_float64_signan(a) || is_float64_signan(b)) {
208 // TODO: fix SigNaN
209 }
210
211 return a;
212 }
213
214 if (a.parts.exp == FLOAT64_MAX_EXPONENT) {
215 if (b.parts.exp == FLOAT64_MAX_EXPONENT) {
216 /* inf - inf => nan */
217 // TODO: fix exception
218 result.bin = FLOAT64_NAN;
219 return result;
220 }
221
222 return a;
223 }
224
225 result.parts.sign = a.parts.sign;
226
227 frac1 = a.parts.fraction;
228 exp1 = a.parts.exp;
229 frac2 = b.parts.fraction;
230 exp2 = b.parts.exp;
231 }
232
233 if (exp1 == 0) {
234 /* both are denormalized */
235 result.parts.fraction = frac1 - frac2;
236 if (result.parts.fraction > frac1) {
237 // TODO: underflow exception
238 return result;
239 }
240
241 result.parts.exp = 0;
242 return result;
243 }
244
245 /* add hidden bit */
246 frac1 |= FLOAT64_HIDDEN_BIT_MASK;
247
248 if (exp2 == 0) {
249 /* denormalized */
250 --expdiff;
251 } else {
252 /* normalized */
253 frac2 |= FLOAT64_HIDDEN_BIT_MASK;
254 }
255
256 /* create some space for rounding */
257 frac1 <<= 6;
258 frac2 <<= 6;
259
260 if (expdiff > FLOAT64_FRACTION_SIZE + 1)
261 goto done;
262
263 frac1 = frac1 - (frac2 >> expdiff);
264
265done:
266 /* TODO: find first nonzero digit and shift result and detect possibly underflow */
267 while ((exp1 > 0) && (!(frac1 & (FLOAT64_HIDDEN_BIT_MASK << 6 )))) {
268 --exp1;
269 frac1 <<= 1;
270 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
271 }
272
273 /* rounding - if first bit after fraction is set then round up */
274 frac1 += 0x20;
275
276 if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7)) {
277 ++exp1;
278 frac1 >>= 1;
279 }
280
281 /* Clear hidden bit and shift */
282 result.parts.fraction = ((frac1 >> 6) & (~FLOAT64_HIDDEN_BIT_MASK));
283 result.parts.exp = exp1;
284
285 return result;
286}
287
288/** Subtract two quadruple-precision floats with the same sign.
289 *
290 * @param a First input operand.
291 * @param b Second input operand.
292 *
293 * @return Result of substraction.
294 *
295 */
296float128 sub_float128(float128 a, float128 b)
297{
298 int expdiff;
299 uint32_t exp1, exp2;
300 uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo;
301 float128 result;
302
303 result.bin.hi = 0;
304 result.bin.lo = 0;
305
306 expdiff = a.parts.exp - b.parts.exp;
307 if ((expdiff < 0 ) || ((expdiff == 0) &&
308 lt128(a.parts.frac_hi, a.parts.frac_lo, b.parts.frac_hi, b.parts.frac_lo))) {
309 if (is_float128_nan(b)) {
310 if (is_float128_signan(b)) {
311 // TODO: fix SigNaN
312 }
313
314 return b;
315 }
316
317 if (b.parts.exp == FLOAT128_MAX_EXPONENT) {
318 /* num -(+-inf) = -+inf */
319 b.parts.sign = !b.parts.sign;
320 return b;
321 }
322
323 result.parts.sign = !a.parts.sign;
324
325 frac1_hi = b.parts.frac_hi;
326 frac1_lo = b.parts.frac_lo;
327 exp1 = b.parts.exp;
328 frac2_hi = a.parts.frac_hi;
329 frac2_lo = a.parts.frac_lo;
330 exp2 = a.parts.exp;
331 expdiff *= -1;
332 } else {
333 if (is_float128_nan(a)) {
334 if (is_float128_signan(a) || is_float128_signan(b)) {
335 // TODO: fix SigNaN
336 }
337
338 return a;
339 }
340
341 if (a.parts.exp == FLOAT128_MAX_EXPONENT) {
342 if (b.parts.exp == FLOAT128_MAX_EXPONENT) {
343 /* inf - inf => nan */
344 // TODO: fix exception
345 result.bin.hi = FLOAT128_NAN_HI;
346 result.bin.lo = FLOAT128_NAN_LO;
347 return result;
348 }
349 return a;
350 }
351
352 result.parts.sign = a.parts.sign;
353
354 frac1_hi = a.parts.frac_hi;
355 frac1_lo = a.parts.frac_lo;
356 exp1 = a.parts.exp;
357 frac2_hi = b.parts.frac_hi;
358 frac2_lo = b.parts.frac_lo;
359 exp2 = b.parts.exp;
360 }
361
362 if (exp1 == 0) {
363 /* both are denormalized */
364 sub128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &tmp_hi, &tmp_lo);
365 result.parts.frac_hi = tmp_hi;
366 result.parts.frac_lo = tmp_lo;
367 if (lt128(frac1_hi, frac1_lo, result.parts.frac_hi, result.parts.frac_lo)) {
368 // TODO: underflow exception
369 return result;
370 }
371
372 result.parts.exp = 0;
373 return result;
374 }
375
376 /* add hidden bit */
377 or128(frac1_hi, frac1_lo,
378 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
379 &frac1_hi, &frac1_lo);
380
381 if (exp2 == 0) {
382 /* denormalized */
383 --expdiff;
384 } else {
385 /* normalized */
386 or128(frac2_hi, frac2_lo,
387 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
388 &frac2_hi, &frac2_lo);
389 }
390
391 /* create some space for rounding */
392 lshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
393 lshift128(frac2_hi, frac2_lo, 6, &frac2_hi, &frac2_lo);
394
395 if (expdiff > FLOAT128_FRACTION_SIZE + 1)
396 goto done;
397
398 rshift128(frac2_hi, frac2_lo, expdiff, &tmp_hi, &tmp_lo);
399 sub128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &frac1_hi, &frac1_lo);
400
401done:
402 /* TODO: find first nonzero digit and shift result and detect possibly underflow */
403 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6,
404 &tmp_hi, &tmp_lo);
405 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
406 while ((exp1 > 0) && (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
407 --exp1;
408 lshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
409 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
410
411 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6,
412 &tmp_hi, &tmp_lo);
413 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
414 }
415
416 /* rounding - if first bit after fraction is set then round up */
417 add128(frac1_hi, frac1_lo, 0x0ll, 0x20ll, &frac1_hi, &frac1_lo);
418
419 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7,
420 &tmp_hi, &tmp_lo);
421 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
422 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
423 ++exp1;
424 rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
425 }
426
427 /* Clear hidden bit and shift */
428 rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);
429 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
430 &tmp_hi, &tmp_lo);
431 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
432 result.parts.frac_hi = tmp_hi;
433 result.parts.frac_lo = tmp_lo;
434
435 result.parts.exp = exp1;
436
437 return result;
438}
439
440#ifdef float32_t
441
442float32_t __subsf3(float32_t a, float32_t b)
443{
444 float32_u ua;
445 ua.val = a;
446
447 float32_u ub;
448 ub.val = b;
449
450 float32_u res;
451
452 if (ua.data.parts.sign != ub.data.parts.sign) {
453 ub.data.parts.sign = !ub.data.parts.sign;
454 res.data = add_float32(ua.data, ub.data);
455 } else
456 res.data = sub_float32(ua.data, ub.data);
457
458 return res.val;
459}
460
461float32_t __aeabi_fsub(float32_t a, float32_t b)
462{
463 float32_u ua;
464 ua.val = a;
465
466 float32_u ub;
467 ub.val = b;
468
469 float32_u res;
470
471 if (ua.data.parts.sign != ub.data.parts.sign) {
472 ub.data.parts.sign = !ub.data.parts.sign;
473 res.data = add_float32(ua.data, ub.data);
474 } else
475 res.data = sub_float32(ua.data, ub.data);
476
477 return res.val;
478}
479
480#endif
481
482#ifdef float64_t
483
484float64_t __subdf3(float64_t a, float64_t b)
485{
486 float64_u ua;
487 ua.val = a;
488
489 float64_u ub;
490 ub.val = b;
491
492 float64_u res;
493
494 if (ua.data.parts.sign != ub.data.parts.sign) {
495 ub.data.parts.sign = !ub.data.parts.sign;
496 res.data = add_float64(ua.data, ub.data);
497 } else
498 res.data = sub_float64(ua.data, ub.data);
499
500 return res.val;
501}
502
503float64_t __aeabi_dsub(float64_t a, float64_t b)
504{
505 float64_u ua;
506 ua.val = a;
507
508 float64_u ub;
509 ub.val = b;
510
511 float64_u res;
512
513 if (ua.data.parts.sign != ub.data.parts.sign) {
514 ub.data.parts.sign = !ub.data.parts.sign;
515 res.data = add_float64(ua.data, ub.data);
516 } else
517 res.data = sub_float64(ua.data, ub.data);
518
519 return res.val;
520}
521
522#endif
523
524#ifdef float128_t
525
526float128_t __subtf3(float128_t a, float128_t b)
527{
528 float128_u ua;
529 ua.val = a;
530
531 float128_u ub;
532 ub.val = b;
533
534 float128_u res;
535
536 if (ua.data.parts.sign != ub.data.parts.sign) {
537 ub.data.parts.sign = !ub.data.parts.sign;
538 res.data = add_float128(ua.data, ub.data);
539 } else
540 res.data = sub_float128(ua.data, ub.data);
541
542 return res.val;
543}
544
545void _Qp_sub(float128_t *c, float128_t *a, float128_t *b)
546{
547 *c = __subtf3(*a, *b);
548}
549
550#endif
551
552/** @}
553 */
Note: See TracBrowser for help on using the repository browser.