source: mainline/uspace/lib/softfloat/generic/div.c@ 8bcd727

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 8bcd727 was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

  • completed double-precision support
  • added quadruple-precision support
  • added SPARC quadruple-precision wrappers
  • added doxygen comments
  • corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

  • Property mode set to 100644
File size: 12.4 KB
Line 
1/*
2 * Copyright (c) 2005 Josef Cejka
3 * Copyright (c) 2011 Petr Koupy
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup softfloat
31 * @{
32 */
33/** @file Division functions.
34 */
35
36#include <sftypes.h>
37#include <add.h>
38#include <div.h>
39#include <comparison.h>
40#include <mul.h>
41#include <common.h>
42
43/**
44 * Divide two single-precision floats.
45 *
46 * @param a Nominator.
47 * @param b Denominator.
48 * @return Result of division.
49 */
50float32 divFloat32(float32 a, float32 b)
51{
52 float32 result;
53 int32_t aexp, bexp, cexp;
54 uint64_t afrac, bfrac, cfrac;
55
56 result.parts.sign = a.parts.sign ^ b.parts.sign;
57
58 if (isFloat32NaN(a)) {
59 if (isFloat32SigNaN(a)) {
60 /*FIXME: SigNaN*/
61 }
62 /*NaN*/
63 return a;
64 }
65
66 if (isFloat32NaN(b)) {
67 if (isFloat32SigNaN(b)) {
68 /*FIXME: SigNaN*/
69 }
70 /*NaN*/
71 return b;
72 }
73
74 if (isFloat32Infinity(a)) {
75 if (isFloat32Infinity(b)) {
76 /*FIXME: inf / inf */
77 result.binary = FLOAT32_NAN;
78 return result;
79 }
80 /* inf / num */
81 result.parts.exp = a.parts.exp;
82 result.parts.fraction = a.parts.fraction;
83 return result;
84 }
85
86 if (isFloat32Infinity(b)) {
87 if (isFloat32Zero(a)) {
88 /* FIXME 0 / inf */
89 result.parts.exp = 0;
90 result.parts.fraction = 0;
91 return result;
92 }
93 /* FIXME: num / inf*/
94 result.parts.exp = 0;
95 result.parts.fraction = 0;
96 return result;
97 }
98
99 if (isFloat32Zero(b)) {
100 if (isFloat32Zero(a)) {
101 /*FIXME: 0 / 0*/
102 result.binary = FLOAT32_NAN;
103 return result;
104 }
105 /* FIXME: division by zero */
106 result.parts.exp = 0;
107 result.parts.fraction = 0;
108 return result;
109 }
110
111 afrac = a.parts.fraction;
112 aexp = a.parts.exp;
113 bfrac = b.parts.fraction;
114 bexp = b.parts.exp;
115
116 /* denormalized numbers */
117 if (aexp == 0) {
118 if (afrac == 0) {
119 result.parts.exp = 0;
120 result.parts.fraction = 0;
121 return result;
122 }
123
124 /* normalize it*/
125 afrac <<= 1;
126 /* afrac is nonzero => it must stop */
127 while (!(afrac & FLOAT32_HIDDEN_BIT_MASK)) {
128 afrac <<= 1;
129 aexp--;
130 }
131 }
132
133 if (bexp == 0) {
134 bfrac <<= 1;
135 /* bfrac is nonzero => it must stop */
136 while (!(bfrac & FLOAT32_HIDDEN_BIT_MASK)) {
137 bfrac <<= 1;
138 bexp--;
139 }
140 }
141
142 afrac = (afrac | FLOAT32_HIDDEN_BIT_MASK) << (32 - FLOAT32_FRACTION_SIZE - 1);
143 bfrac = (bfrac | FLOAT32_HIDDEN_BIT_MASK) << (32 - FLOAT32_FRACTION_SIZE);
144
145 if (bfrac <= (afrac << 1)) {
146 afrac >>= 1;
147 aexp++;
148 }
149
150 cexp = aexp - bexp + FLOAT32_BIAS - 2;
151
152 cfrac = (afrac << 32) / bfrac;
153 if ((cfrac & 0x3F) == 0) {
154 cfrac |= (bfrac * cfrac != afrac << 32);
155 }
156
157 /* pack and round */
158
159 /* find first nonzero digit and shift result and detect possibly underflow */
160 while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT32_HIDDEN_BIT_MASK << 7)))) {
161 cexp--;
162 cfrac <<= 1;
163 /* TODO: fix underflow */
164 }
165
166 cfrac += (0x1 << 6); /* FIXME: 7 is not sure*/
167
168 if (cfrac & (FLOAT32_HIDDEN_BIT_MASK << 7)) {
169 ++cexp;
170 cfrac >>= 1;
171 }
172
173 /* check overflow */
174 if (cexp >= FLOAT32_MAX_EXPONENT) {
175 /* FIXME: overflow, return infinity */
176 result.parts.exp = FLOAT32_MAX_EXPONENT;
177 result.parts.fraction = 0;
178 return result;
179 }
180
181 if (cexp < 0) {
182 /* FIXME: underflow */
183 result.parts.exp = 0;
184 if ((cexp + FLOAT32_FRACTION_SIZE) < 0) {
185 result.parts.fraction = 0;
186 return result;
187 }
188 cfrac >>= 1;
189 while (cexp < 0) {
190 cexp++;
191 cfrac >>= 1;
192 }
193 } else {
194 result.parts.exp = (uint32_t) cexp;
195 }
196
197 result.parts.fraction = ((cfrac >> 6) & (~FLOAT32_HIDDEN_BIT_MASK));
198
199 return result;
200}
201
202/**
203 * Divide two double-precision floats.
204 *
205 * @param a Nominator.
206 * @param b Denominator.
207 * @return Result of division.
208 */
209float64 divFloat64(float64 a, float64 b)
210{
211 float64 result;
212 int64_t aexp, bexp, cexp;
213 uint64_t afrac, bfrac, cfrac;
214 uint64_t remlo, remhi;
215 uint64_t tmplo, tmphi;
216
217 result.parts.sign = a.parts.sign ^ b.parts.sign;
218
219 if (isFloat64NaN(a)) {
220 if (isFloat64SigNaN(b)) {
221 /*FIXME: SigNaN*/
222 return b;
223 }
224
225 if (isFloat64SigNaN(a)) {
226 /*FIXME: SigNaN*/
227 }
228 /*NaN*/
229 return a;
230 }
231
232 if (isFloat64NaN(b)) {
233 if (isFloat64SigNaN(b)) {
234 /*FIXME: SigNaN*/
235 }
236 /*NaN*/
237 return b;
238 }
239
240 if (isFloat64Infinity(a)) {
241 if (isFloat64Infinity(b) || isFloat64Zero(b)) {
242 /*FIXME: inf / inf */
243 result.binary = FLOAT64_NAN;
244 return result;
245 }
246 /* inf / num */
247 result.parts.exp = a.parts.exp;
248 result.parts.fraction = a.parts.fraction;
249 return result;
250 }
251
252 if (isFloat64Infinity(b)) {
253 if (isFloat64Zero(a)) {
254 /* FIXME 0 / inf */
255 result.parts.exp = 0;
256 result.parts.fraction = 0;
257 return result;
258 }
259 /* FIXME: num / inf*/
260 result.parts.exp = 0;
261 result.parts.fraction = 0;
262 return result;
263 }
264
265 if (isFloat64Zero(b)) {
266 if (isFloat64Zero(a)) {
267 /*FIXME: 0 / 0*/
268 result.binary = FLOAT64_NAN;
269 return result;
270 }
271 /* FIXME: division by zero */
272 result.parts.exp = 0;
273 result.parts.fraction = 0;
274 return result;
275 }
276
277 afrac = a.parts.fraction;
278 aexp = a.parts.exp;
279 bfrac = b.parts.fraction;
280 bexp = b.parts.exp;
281
282 /* denormalized numbers */
283 if (aexp == 0) {
284 if (afrac == 0) {
285 result.parts.exp = 0;
286 result.parts.fraction = 0;
287 return result;
288 }
289
290 /* normalize it*/
291 aexp++;
292 /* afrac is nonzero => it must stop */
293 while (!(afrac & FLOAT64_HIDDEN_BIT_MASK)) {
294 afrac <<= 1;
295 aexp--;
296 }
297 }
298
299 if (bexp == 0) {
300 bexp++;
301 /* bfrac is nonzero => it must stop */
302 while (!(bfrac & FLOAT64_HIDDEN_BIT_MASK)) {
303 bfrac <<= 1;
304 bexp--;
305 }
306 }
307
308 afrac = (afrac | FLOAT64_HIDDEN_BIT_MASK) << (64 - FLOAT64_FRACTION_SIZE - 2);
309 bfrac = (bfrac | FLOAT64_HIDDEN_BIT_MASK) << (64 - FLOAT64_FRACTION_SIZE - 1);
310
311 if (bfrac <= (afrac << 1)) {
312 afrac >>= 1;
313 aexp++;
314 }
315
316 cexp = aexp - bexp + FLOAT64_BIAS - 2;
317
318 cfrac = div128est(afrac, 0x0ll, bfrac);
319
320 if ((cfrac & 0x1FF) <= 2) {
321 mul64(bfrac, cfrac, &tmphi, &tmplo);
322 sub128(afrac, 0x0ll, tmphi, tmplo, &remhi, &remlo);
323
324 while ((int64_t) remhi < 0) {
325 cfrac--;
326 add128(remhi, remlo, 0x0ll, bfrac, &remhi, &remlo);
327 }
328 cfrac |= (remlo != 0);
329 }
330
331 /* round and shift */
332 result = finishFloat64(cexp, cfrac, result.parts.sign);
333 return result;
334}
335
336/**
337 * Divide two quadruple-precision floats.
338 *
339 * @param a Nominator.
340 * @param b Denominator.
341 * @return Result of division.
342 */
343float128 divFloat128(float128 a, float128 b)
344{
345 float128 result;
346 int64_t aexp, bexp, cexp;
347 uint64_t afrac_hi, afrac_lo, bfrac_hi, bfrac_lo, cfrac_hi, cfrac_lo;
348 uint64_t shift_out;
349 uint64_t rem_hihi, rem_hilo, rem_lohi, rem_lolo;
350 uint64_t tmp_hihi, tmp_hilo, tmp_lohi, tmp_lolo;
351
352 result.parts.sign = a.parts.sign ^ b.parts.sign;
353
354 if (isFloat128NaN(a)) {
355 if (isFloat128SigNaN(b)) {
356 /*FIXME: SigNaN*/
357 return b;
358 }
359
360 if (isFloat128SigNaN(a)) {
361 /*FIXME: SigNaN*/
362 }
363 /*NaN*/
364 return a;
365 }
366
367 if (isFloat128NaN(b)) {
368 if (isFloat128SigNaN(b)) {
369 /*FIXME: SigNaN*/
370 }
371 /*NaN*/
372 return b;
373 }
374
375 if (isFloat128Infinity(a)) {
376 if (isFloat128Infinity(b) || isFloat128Zero(b)) {
377 /*FIXME: inf / inf */
378 result.binary.hi = FLOAT128_NAN_HI;
379 result.binary.lo = FLOAT128_NAN_LO;
380 return result;
381 }
382 /* inf / num */
383 result.parts.exp = a.parts.exp;
384 result.parts.frac_hi = a.parts.frac_hi;
385 result.parts.frac_lo = a.parts.frac_lo;
386 return result;
387 }
388
389 if (isFloat128Infinity(b)) {
390 if (isFloat128Zero(a)) {
391 /* FIXME 0 / inf */
392 result.parts.exp = 0;
393 result.parts.frac_hi = 0;
394 result.parts.frac_lo = 0;
395 return result;
396 }
397 /* FIXME: num / inf*/
398 result.parts.exp = 0;
399 result.parts.frac_hi = 0;
400 result.parts.frac_lo = 0;
401 return result;
402 }
403
404 if (isFloat128Zero(b)) {
405 if (isFloat128Zero(a)) {
406 /*FIXME: 0 / 0*/
407 result.binary.hi = FLOAT128_NAN_HI;
408 result.binary.lo = FLOAT128_NAN_LO;
409 return result;
410 }
411 /* FIXME: division by zero */
412 result.parts.exp = 0;
413 result.parts.frac_hi = 0;
414 result.parts.frac_lo = 0;
415 return result;
416 }
417
418 afrac_hi = a.parts.frac_hi;
419 afrac_lo = a.parts.frac_lo;
420 aexp = a.parts.exp;
421 bfrac_hi = b.parts.frac_hi;
422 bfrac_lo = b.parts.frac_lo;
423 bexp = b.parts.exp;
424
425 /* denormalized numbers */
426 if (aexp == 0) {
427 if (eq128(afrac_hi, afrac_lo, 0x0ll, 0x0ll)) {
428 result.parts.exp = 0;
429 result.parts.frac_hi = 0;
430 result.parts.frac_lo = 0;
431 return result;
432 }
433
434 /* normalize it*/
435 aexp++;
436 /* afrac is nonzero => it must stop */
437 and128(afrac_hi, afrac_lo,
438 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
439 &tmp_hihi, &tmp_lolo);
440 while (!lt128(0x0ll, 0x0ll, tmp_hihi, tmp_lolo)) {
441 lshift128(afrac_hi, afrac_lo, 1, &afrac_hi, &afrac_lo);
442 aexp--;
443 }
444 }
445
446 if (bexp == 0) {
447 bexp++;
448 /* bfrac is nonzero => it must stop */
449 and128(bfrac_hi, bfrac_lo,
450 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
451 &tmp_hihi, &tmp_lolo);
452 while (!lt128(0x0ll, 0x0ll, tmp_hihi, tmp_lolo)) {
453 lshift128(bfrac_hi, bfrac_lo, 1, &bfrac_hi, &bfrac_lo);
454 bexp--;
455 }
456 }
457
458 or128(afrac_hi, afrac_lo,
459 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
460 &afrac_hi, &afrac_lo);
461 lshift128(afrac_hi, afrac_lo,
462 (128 - FLOAT128_FRACTION_SIZE - 1), &afrac_hi, &afrac_lo);
463 or128(bfrac_hi, bfrac_lo,
464 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
465 &bfrac_hi, &bfrac_lo);
466 lshift128(bfrac_hi, bfrac_lo,
467 (128 - FLOAT128_FRACTION_SIZE - 1), &bfrac_hi, &bfrac_lo);
468
469 if (le128(bfrac_hi, bfrac_lo, afrac_hi, afrac_lo)) {
470 rshift128(afrac_hi, afrac_lo, 1, &afrac_hi, &afrac_lo);
471 aexp++;
472 }
473
474 cexp = aexp - bexp + FLOAT128_BIAS - 2;
475
476 cfrac_hi = div128est(afrac_hi, afrac_lo, bfrac_hi);
477
478 mul128(bfrac_hi, bfrac_lo, 0x0ll, cfrac_hi,
479 &tmp_lolo /* dummy */, &tmp_hihi, &tmp_hilo, &tmp_lohi);
480
481 /* sub192(afrac_hi, afrac_lo, 0,
482 * tmp_hihi, tmp_hilo, tmp_lohi
483 * &rem_hihi, &rem_hilo, &rem_lohi); */
484 sub128(afrac_hi, afrac_lo, tmp_hihi, tmp_hilo, &rem_hihi, &rem_hilo);
485 if (tmp_lohi > 0) {
486 sub128(rem_hihi, rem_hilo, 0x0ll, 0x1ll, &rem_hihi, &rem_hilo);
487 }
488 rem_lohi = -tmp_lohi;
489
490 while ((int64_t) rem_hihi < 0) {
491 --cfrac_hi;
492 /* add192(rem_hihi, rem_hilo, rem_lohi,
493 * 0, bfrac_hi, bfrac_lo,
494 * &rem_hihi, &rem_hilo, &rem_lohi); */
495 add128(rem_hilo, rem_lohi, bfrac_hi, bfrac_lo, &rem_hilo, &rem_lohi);
496 if (lt128(rem_hilo, rem_lohi, bfrac_hi, bfrac_lo)) {
497 ++rem_hihi;
498 }
499 }
500
501 cfrac_lo = div128est(rem_hilo, rem_lohi, bfrac_lo);
502
503 if ((cfrac_lo & 0x3FFF) <= 4) {
504 mul128(bfrac_hi, bfrac_lo, 0x0ll, cfrac_lo,
505 &tmp_hihi /* dummy */, &tmp_hilo, &tmp_lohi, &tmp_lolo);
506
507 /* sub192(rem_hilo, rem_lohi, 0,
508 * tmp_hilo, tmp_lohi, tmp_lolo,
509 * &rem_hilo, &rem_lohi, &rem_lolo); */
510 sub128(rem_hilo, rem_lohi, tmp_hilo, tmp_lohi, &rem_hilo, &rem_lohi);
511 if (tmp_lolo > 0) {
512 sub128(rem_hilo, rem_lohi, 0x0ll, 0x1ll, &rem_hilo, &rem_lohi);
513 }
514 rem_lolo = -tmp_lolo;
515
516 while ((int64_t) rem_hilo < 0) {
517 --cfrac_lo;
518 /* add192(rem_hilo, rem_lohi, rem_lolo,
519 * 0, bfrac_hi, bfrac_lo,
520 * &rem_hilo, &rem_lohi, &rem_lolo); */
521 add128(rem_lohi, rem_lolo, bfrac_hi, bfrac_lo, &rem_lohi, &rem_lolo);
522 if (lt128(rem_lohi, rem_lolo, bfrac_hi, bfrac_lo)) {
523 ++rem_hilo;
524 }
525 }
526
527 cfrac_lo |= ((rem_hilo | rem_lohi | rem_lolo) != 0 );
528 }
529
530 shift_out = cfrac_lo << (64 - (128 - FLOAT128_FRACTION_SIZE - 1));
531 rshift128(cfrac_hi, cfrac_lo, (128 - FLOAT128_FRACTION_SIZE - 1),
532 &cfrac_hi, &cfrac_lo);
533
534 result = finishFloat128(cexp, cfrac_hi, cfrac_lo, result.parts.sign, shift_out);
535 return result;
536}
537
538/** @}
539 */
Note: See TracBrowser for help on using the repository browser.