source: mainline/uspace/lib/softfloat/generic/conversion.c@ 8bcd727

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 8bcd727 was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

  • completed double-precision support
  • added quadruple-precision support
  • added SPARC quadruple-precision wrappers
  • added doxygen comments
  • corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

  • Property mode set to 100644
File size: 21.8 KB
Line 
1/*
2 * Copyright (c) 2005 Josef Cejka
3 * Copyright (c) 2011 Petr Koupy
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup softfloat
31 * @{
32 */
33/** @file Conversion of precision and conversion between integers and floats.
34 */
35
36#include <sftypes.h>
37#include <conversion.h>
38#include <comparison.h>
39#include <common.h>
40
41float64 convertFloat32ToFloat64(float32 a)
42{
43 float64 result;
44 uint64_t frac;
45
46 result.parts.sign = a.parts.sign;
47 result.parts.fraction = a.parts.fraction;
48 result.parts.fraction <<= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE);
49
50 if ((isFloat32Infinity(a)) || (isFloat32NaN(a))) {
51 result.parts.exp = FLOAT64_MAX_EXPONENT;
52 /* TODO; check if its correct for SigNaNs*/
53 return result;
54 }
55
56 result.parts.exp = a.parts.exp + ((int) FLOAT64_BIAS - FLOAT32_BIAS);
57 if (a.parts.exp == 0) {
58 /* normalize denormalized numbers */
59
60 if (result.parts.fraction == 0) { /* fix zero */
61 result.parts.exp = 0;
62 return result;
63 }
64
65 frac = result.parts.fraction;
66
67 while (!(frac & FLOAT64_HIDDEN_BIT_MASK)) {
68 frac <<= 1;
69 --result.parts.exp;
70 }
71
72 ++result.parts.exp;
73 result.parts.fraction = frac;
74 }
75
76 return result;
77}
78
79float128 convertFloat32ToFloat128(float32 a)
80{
81 float128 result;
82 uint64_t frac_hi, frac_lo;
83 uint64_t tmp_hi, tmp_lo;
84
85 result.parts.sign = a.parts.sign;
86 result.parts.frac_hi = 0;
87 result.parts.frac_lo = a.parts.fraction;
88 lshift128(result.parts.frac_hi, result.parts.frac_lo,
89 (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE),
90 &frac_hi, &frac_lo);
91 result.parts.frac_hi = frac_hi;
92 result.parts.frac_lo = frac_lo;
93
94 if ((isFloat32Infinity(a)) || (isFloat32NaN(a))) {
95 result.parts.exp = FLOAT128_MAX_EXPONENT;
96 /* TODO; check if its correct for SigNaNs*/
97 return result;
98 }
99
100 result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT32_BIAS);
101 if (a.parts.exp == 0) {
102 /* normalize denormalized numbers */
103
104 if (eq128(result.parts.frac_hi,
105 result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */
106 result.parts.exp = 0;
107 return result;
108 }
109
110 frac_hi = result.parts.frac_hi;
111 frac_lo = result.parts.frac_lo;
112
113 and128(frac_hi, frac_lo,
114 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
115 &tmp_hi, &tmp_lo);
116 while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
117 lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
118 --result.parts.exp;
119 }
120
121 ++result.parts.exp;
122 result.parts.frac_hi = frac_hi;
123 result.parts.frac_lo = frac_lo;
124 }
125
126 return result;
127}
128
129float128 convertFloat64ToFloat128(float64 a)
130{
131 float128 result;
132 uint64_t frac_hi, frac_lo;
133 uint64_t tmp_hi, tmp_lo;
134
135 result.parts.sign = a.parts.sign;
136 result.parts.frac_hi = 0;
137 result.parts.frac_lo = a.parts.fraction;
138 lshift128(result.parts.frac_hi, result.parts.frac_lo,
139 (FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE),
140 &frac_hi, &frac_lo);
141 result.parts.frac_hi = frac_hi;
142 result.parts.frac_lo = frac_lo;
143
144 if ((isFloat64Infinity(a)) || (isFloat64NaN(a))) {
145 result.parts.exp = FLOAT128_MAX_EXPONENT;
146 /* TODO; check if its correct for SigNaNs*/
147 return result;
148 }
149
150 result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT64_BIAS);
151 if (a.parts.exp == 0) {
152 /* normalize denormalized numbers */
153
154 if (eq128(result.parts.frac_hi,
155 result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */
156 result.parts.exp = 0;
157 return result;
158 }
159
160 frac_hi = result.parts.frac_hi;
161 frac_lo = result.parts.frac_lo;
162
163 and128(frac_hi, frac_lo,
164 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
165 &tmp_hi, &tmp_lo);
166 while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
167 lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
168 --result.parts.exp;
169 }
170
171 ++result.parts.exp;
172 result.parts.frac_hi = frac_hi;
173 result.parts.frac_lo = frac_lo;
174 }
175
176 return result;
177}
178
179float32 convertFloat64ToFloat32(float64 a)
180{
181 float32 result;
182 int32_t exp;
183 uint64_t frac;
184
185 result.parts.sign = a.parts.sign;
186
187 if (isFloat64NaN(a)) {
188 result.parts.exp = FLOAT32_MAX_EXPONENT;
189
190 if (isFloat64SigNaN(a)) {
191 /* set first bit of fraction nonzero */
192 result.parts.fraction = FLOAT32_HIDDEN_BIT_MASK >> 1;
193 return result;
194 }
195
196 /* fraction nonzero but its first bit is zero */
197 result.parts.fraction = 0x1;
198 return result;
199 }
200
201 if (isFloat64Infinity(a)) {
202 result.parts.fraction = 0;
203 result.parts.exp = FLOAT32_MAX_EXPONENT;
204 return result;
205 }
206
207 exp = (int) a.parts.exp - FLOAT64_BIAS + FLOAT32_BIAS;
208
209 if (exp >= FLOAT32_MAX_EXPONENT) {
210 /* FIXME: overflow */
211 result.parts.fraction = 0;
212 result.parts.exp = FLOAT32_MAX_EXPONENT;
213 return result;
214 } else if (exp <= 0) {
215 /* underflow or denormalized */
216
217 result.parts.exp = 0;
218
219 exp *= -1;
220 if (exp > FLOAT32_FRACTION_SIZE) {
221 /* FIXME: underflow */
222 result.parts.fraction = 0;
223 return result;
224 }
225
226 /* denormalized */
227
228 frac = a.parts.fraction;
229 frac |= FLOAT64_HIDDEN_BIT_MASK; /* denormalize and set hidden bit */
230
231 frac >>= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1);
232
233 while (exp > 0) {
234 --exp;
235 frac >>= 1;
236 }
237 result.parts.fraction = frac;
238
239 return result;
240 }
241
242 result.parts.exp = exp;
243 result.parts.fraction =
244 a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE);
245 return result;
246}
247
248float32 convertFloat128ToFloat32(float128 a)
249{
250 float32 result;
251 int32_t exp;
252 uint64_t frac_hi, frac_lo;
253
254 result.parts.sign = a.parts.sign;
255
256 if (isFloat128NaN(a)) {
257 result.parts.exp = FLOAT32_MAX_EXPONENT;
258
259 if (isFloat128SigNaN(a)) {
260 /* set first bit of fraction nonzero */
261 result.parts.fraction = FLOAT32_HIDDEN_BIT_MASK >> 1;
262 return result;
263 }
264
265 /* fraction nonzero but its first bit is zero */
266 result.parts.fraction = 0x1;
267 return result;
268 }
269
270 if (isFloat128Infinity(a)) {
271 result.parts.fraction = 0;
272 result.parts.exp = FLOAT32_MAX_EXPONENT;
273 return result;
274 }
275
276 exp = (int) a.parts.exp - FLOAT128_BIAS + FLOAT32_BIAS;
277
278 if (exp >= FLOAT32_MAX_EXPONENT) {
279 /* FIXME: overflow */
280 result.parts.fraction = 0;
281 result.parts.exp = FLOAT32_MAX_EXPONENT;
282 return result;
283 } else if (exp <= 0) {
284 /* underflow or denormalized */
285
286 result.parts.exp = 0;
287
288 exp *= -1;
289 if (exp > FLOAT32_FRACTION_SIZE) {
290 /* FIXME: underflow */
291 result.parts.fraction = 0;
292 return result;
293 }
294
295 /* denormalized */
296
297 frac_hi = a.parts.frac_hi;
298 frac_lo = a.parts.frac_lo;
299
300 /* denormalize and set hidden bit */
301 frac_hi |= FLOAT128_HIDDEN_BIT_MASK_HI;
302
303 rshift128(frac_hi, frac_lo,
304 (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1),
305 &frac_hi, &frac_lo);
306
307 while (exp > 0) {
308 --exp;
309 rshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
310 }
311 result.parts.fraction = frac_lo;
312
313 return result;
314 }
315
316 result.parts.exp = exp;
317 frac_hi = a.parts.frac_hi;
318 frac_lo = a.parts.frac_lo;
319 rshift128(frac_hi, frac_lo,
320 (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1),
321 &frac_hi, &frac_lo);
322 result.parts.fraction = frac_lo;
323 return result;
324}
325
326float64 convertFloat128ToFloat64(float128 a)
327{
328 float64 result;
329 int32_t exp;
330 uint64_t frac_hi, frac_lo;
331
332 result.parts.sign = a.parts.sign;
333
334 if (isFloat128NaN(a)) {
335 result.parts.exp = FLOAT64_MAX_EXPONENT;
336
337 if (isFloat128SigNaN(a)) {
338 /* set first bit of fraction nonzero */
339 result.parts.fraction = FLOAT64_HIDDEN_BIT_MASK >> 1;
340 return result;
341 }
342
343 /* fraction nonzero but its first bit is zero */
344 result.parts.fraction = 0x1;
345 return result;
346 }
347
348 if (isFloat128Infinity(a)) {
349 result.parts.fraction = 0;
350 result.parts.exp = FLOAT64_MAX_EXPONENT;
351 return result;
352 }
353
354 exp = (int) a.parts.exp - FLOAT128_BIAS + FLOAT64_BIAS;
355
356 if (exp >= FLOAT64_MAX_EXPONENT) {
357 /* FIXME: overflow */
358 result.parts.fraction = 0;
359 result.parts.exp = FLOAT64_MAX_EXPONENT;
360 return result;
361 } else if (exp <= 0) {
362 /* underflow or denormalized */
363
364 result.parts.exp = 0;
365
366 exp *= -1;
367 if (exp > FLOAT64_FRACTION_SIZE) {
368 /* FIXME: underflow */
369 result.parts.fraction = 0;
370 return result;
371 }
372
373 /* denormalized */
374
375 frac_hi = a.parts.frac_hi;
376 frac_lo = a.parts.frac_lo;
377
378 /* denormalize and set hidden bit */
379 frac_hi |= FLOAT128_HIDDEN_BIT_MASK_HI;
380
381 rshift128(frac_hi, frac_lo,
382 (FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE + 1),
383 &frac_hi, &frac_lo);
384
385 while (exp > 0) {
386 --exp;
387 rshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
388 }
389 result.parts.fraction = frac_lo;
390
391 return result;
392 }
393
394 result.parts.exp = exp;
395 frac_hi = a.parts.frac_hi;
396 frac_lo = a.parts.frac_lo;
397 rshift128(frac_hi, frac_lo,
398 (FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE + 1),
399 &frac_hi, &frac_lo);
400 result.parts.fraction = frac_lo;
401 return result;
402}
403
404
405/**
406 * Helping procedure for converting float32 to uint32.
407 *
408 * @param a Floating point number in normalized form
409 * (NaNs or Inf are not checked).
410 * @return Converted unsigned integer.
411 */
412static uint32_t _float32_to_uint32_helper(float32 a)
413{
414 uint32_t frac;
415
416 if (a.parts.exp < FLOAT32_BIAS) {
417 /* TODO: rounding */
418 return 0;
419 }
420
421 frac = a.parts.fraction;
422
423 frac |= FLOAT32_HIDDEN_BIT_MASK;
424 /* shift fraction to left so hidden bit will be the most significant bit */
425 frac <<= 32 - FLOAT32_FRACTION_SIZE - 1;
426
427 frac >>= 32 - (a.parts.exp - FLOAT32_BIAS) - 1;
428 if ((a.parts.sign == 1) && (frac != 0)) {
429 frac = ~frac;
430 ++frac;
431 }
432
433 return frac;
434}
435
436/*
437 * FIXME: Im not sure what to return if overflow/underflow happens
438 * - now its the biggest or the smallest int
439 */
440uint32_t float32_to_uint32(float32 a)
441{
442 if (isFloat32NaN(a))
443 return UINT32_MAX;
444
445 if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS))) {
446 if (a.parts.sign)
447 return UINT32_MIN;
448
449 return UINT32_MAX;
450 }
451
452 return _float32_to_uint32_helper(a);
453}
454
455/*
456 * FIXME: Im not sure what to return if overflow/underflow happens
457 * - now its the biggest or the smallest int
458 */
459int32_t float32_to_int32(float32 a)
460{
461 if (isFloat32NaN(a))
462 return INT32_MAX;
463
464 if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS))) {
465 if (a.parts.sign)
466 return INT32_MIN;
467
468 return INT32_MAX;
469 }
470
471 return _float32_to_uint32_helper(a);
472}
473
474
475/**
476 * Helping procedure for converting float32 to uint64.
477 *
478 * @param a Floating point number in normalized form
479 * (NaNs or Inf are not checked).
480 * @return Converted unsigned integer.
481 */
482static uint64_t _float32_to_uint64_helper(float32 a)
483{
484 uint64_t frac;
485
486 if (a.parts.exp < FLOAT32_BIAS) {
487 /*TODO: rounding*/
488 return 0;
489 }
490
491 frac = a.parts.fraction;
492
493 frac |= FLOAT32_HIDDEN_BIT_MASK;
494 /* shift fraction to left so hidden bit will be the most significant bit */
495 frac <<= 64 - FLOAT32_FRACTION_SIZE - 1;
496
497 frac >>= 64 - (a.parts.exp - FLOAT32_BIAS) - 1;
498 if ((a.parts.sign == 1) && (frac != 0)) {
499 frac = ~frac;
500 ++frac;
501 }
502
503 return frac;
504}
505
506/*
507 * FIXME: Im not sure what to return if overflow/underflow happens
508 * - now its the biggest or the smallest int
509 */
510uint64_t float32_to_uint64(float32 a)
511{
512 if (isFloat32NaN(a))
513 return UINT64_MAX;
514
515
516 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) {
517 if (a.parts.sign)
518 return UINT64_MIN;
519
520 return UINT64_MAX;
521 }
522
523 return _float32_to_uint64_helper(a);
524}
525
526/*
527 * FIXME: Im not sure what to return if overflow/underflow happens
528 * - now its the biggest or the smallest int
529 */
530int64_t float32_to_int64(float32 a)
531{
532 if (isFloat32NaN(a))
533 return INT64_MAX;
534
535 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) {
536 if (a.parts.sign)
537 return INT64_MIN;
538
539 return INT64_MAX;
540 }
541
542 return _float32_to_uint64_helper(a);
543}
544
545
546/**
547 * Helping procedure for converting float64 to uint64.
548 *
549 * @param a Floating point number in normalized form
550 * (NaNs or Inf are not checked).
551 * @return Converted unsigned integer.
552 */
553static uint64_t _float64_to_uint64_helper(float64 a)
554{
555 uint64_t frac;
556
557 if (a.parts.exp < FLOAT64_BIAS) {
558 /*TODO: rounding*/
559 return 0;
560 }
561
562 frac = a.parts.fraction;
563
564 frac |= FLOAT64_HIDDEN_BIT_MASK;
565 /* shift fraction to left so hidden bit will be the most significant bit */
566 frac <<= 64 - FLOAT64_FRACTION_SIZE - 1;
567
568 frac >>= 64 - (a.parts.exp - FLOAT64_BIAS) - 1;
569 if ((a.parts.sign == 1) && (frac != 0)) {
570 frac = ~frac;
571 ++frac;
572 }
573
574 return frac;
575}
576
577/*
578 * FIXME: Im not sure what to return if overflow/underflow happens
579 * - now its the biggest or the smallest int
580 */
581uint32_t float64_to_uint32(float64 a)
582{
583 if (isFloat64NaN(a))
584 return UINT32_MAX;
585
586 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) {
587 if (a.parts.sign)
588 return UINT32_MIN;
589
590 return UINT32_MAX;
591 }
592
593 return (uint32_t) _float64_to_uint64_helper(a);
594}
595
596/*
597 * FIXME: Im not sure what to return if overflow/underflow happens
598 * - now its the biggest or the smallest int
599 */
600int32_t float64_to_int32(float64 a)
601{
602 if (isFloat64NaN(a))
603 return INT32_MAX;
604
605 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) {
606 if (a.parts.sign)
607 return INT32_MIN;
608
609 return INT32_MAX;
610 }
611
612 return (int32_t) _float64_to_uint64_helper(a);
613}
614
615
616/*
617 * FIXME: Im not sure what to return if overflow/underflow happens
618 * - now its the biggest or the smallest int
619 */
620uint64_t float64_to_uint64(float64 a)
621{
622 if (isFloat64NaN(a))
623 return UINT64_MAX;
624
625 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) {
626 if (a.parts.sign)
627 return UINT64_MIN;
628
629 return UINT64_MAX;
630 }
631
632 return _float64_to_uint64_helper(a);
633}
634
635/*
636 * FIXME: Im not sure what to return if overflow/underflow happens
637 * - now its the biggest or the smallest int
638 */
639int64_t float64_to_int64(float64 a)
640{
641 if (isFloat64NaN(a))
642 return INT64_MAX;
643
644 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) {
645 if (a.parts.sign)
646 return INT64_MIN;
647
648 return INT64_MAX;
649 }
650
651 return _float64_to_uint64_helper(a);
652}
653
654
655/**
656 * Helping procedure for converting float128 to uint64.
657 *
658 * @param a Floating point number in normalized form
659 * (NaNs or Inf are not checked).
660 * @return Converted unsigned integer.
661 */
662static uint64_t _float128_to_uint64_helper(float128 a)
663{
664 uint64_t frac_hi, frac_lo;
665
666 if (a.parts.exp < FLOAT128_BIAS) {
667 /*TODO: rounding*/
668 return 0;
669 }
670
671 frac_hi = a.parts.frac_hi;
672 frac_lo = a.parts.frac_lo;
673
674 frac_hi |= FLOAT128_HIDDEN_BIT_MASK_HI;
675 /* shift fraction to left so hidden bit will be the most significant bit */
676 lshift128(frac_hi, frac_lo,
677 (128 - FLOAT128_FRACTION_SIZE - 1), &frac_hi, &frac_lo);
678
679 rshift128(frac_hi, frac_lo,
680 (128 - (a.parts.exp - FLOAT128_BIAS) - 1), &frac_hi, &frac_lo);
681 if ((a.parts.sign == 1) && !eq128(frac_hi, frac_lo, 0x0ll, 0x0ll)) {
682 not128(frac_hi, frac_lo, &frac_hi, &frac_lo);
683 add128(frac_hi, frac_lo, 0x0ll, 0x1ll, &frac_hi, &frac_lo);
684 }
685
686 return frac_lo;
687}
688
689/*
690 * FIXME: Im not sure what to return if overflow/underflow happens
691 * - now its the biggest or the smallest int
692 */
693uint32_t float128_to_uint32(float128 a)
694{
695 if (isFloat128NaN(a))
696 return UINT32_MAX;
697
698 if (isFloat128Infinity(a) || (a.parts.exp >= (32 + FLOAT128_BIAS))) {
699 if (a.parts.sign)
700 return UINT32_MIN;
701
702 return UINT32_MAX;
703 }
704
705 return (uint32_t) _float128_to_uint64_helper(a);
706}
707
708/*
709 * FIXME: Im not sure what to return if overflow/underflow happens
710 * - now its the biggest or the smallest int
711 */
712int32_t float128_to_int32(float128 a)
713{
714 if (isFloat128NaN(a))
715 return INT32_MAX;
716
717 if (isFloat128Infinity(a) || (a.parts.exp >= (32 + FLOAT128_BIAS))) {
718 if (a.parts.sign)
719 return INT32_MIN;
720
721 return INT32_MAX;
722 }
723
724 return (int32_t) _float128_to_uint64_helper(a);
725}
726
727
728/*
729 * FIXME: Im not sure what to return if overflow/underflow happens
730 * - now its the biggest or the smallest int
731 */
732uint64_t float128_to_uint64(float128 a)
733{
734 if (isFloat128NaN(a))
735 return UINT64_MAX;
736
737 if (isFloat128Infinity(a) || (a.parts.exp >= (64 + FLOAT128_BIAS))) {
738 if (a.parts.sign)
739 return UINT64_MIN;
740
741 return UINT64_MAX;
742 }
743
744 return _float128_to_uint64_helper(a);
745}
746
747/*
748 * FIXME: Im not sure what to return if overflow/underflow happens
749 * - now its the biggest or the smallest int
750 */
751int64_t float128_to_int64(float128 a)
752{
753 if (isFloat128NaN(a))
754 return INT64_MAX;
755
756 if (isFloat128Infinity(a) || (a.parts.exp >= (64 + FLOAT128_BIAS))) {
757 if (a.parts.sign)
758 return INT64_MIN;
759
760 return INT64_MAX;
761 }
762
763 return _float128_to_uint64_helper(a);
764}
765
766
767float32 uint32_to_float32(uint32_t i)
768{
769 int counter;
770 int32_t exp;
771 float32 result;
772
773 result.parts.sign = 0;
774 result.parts.fraction = 0;
775
776 counter = countZeroes32(i);
777
778 exp = FLOAT32_BIAS + 32 - counter - 1;
779
780 if (counter == 32) {
781 result.binary = 0;
782 return result;
783 }
784
785 if (counter > 0) {
786 i <<= counter - 1;
787 } else {
788 i >>= 1;
789 }
790
791 roundFloat32(&exp, &i);
792
793 result.parts.fraction = i >> (32 - FLOAT32_FRACTION_SIZE - 2);
794 result.parts.exp = exp;
795
796 return result;
797}
798
799float32 int32_to_float32(int32_t i)
800{
801 float32 result;
802
803 if (i < 0) {
804 result = uint32_to_float32((uint32_t) (-i));
805 } else {
806 result = uint32_to_float32((uint32_t) i);
807 }
808
809 result.parts.sign = i < 0;
810
811 return result;
812}
813
814
815float32 uint64_to_float32(uint64_t i)
816{
817 int counter;
818 int32_t exp;
819 uint32_t j;
820 float32 result;
821
822 result.parts.sign = 0;
823 result.parts.fraction = 0;
824
825 counter = countZeroes64(i);
826
827 exp = FLOAT32_BIAS + 64 - counter - 1;
828
829 if (counter == 64) {
830 result.binary = 0;
831 return result;
832 }
833
834 /* Shift all to the first 31 bits (31st will be hidden 1) */
835 if (counter > 33) {
836 i <<= counter - 1 - 32;
837 } else {
838 i >>= 1 + 32 - counter;
839 }
840
841 j = (uint32_t) i;
842 roundFloat32(&exp, &j);
843
844 result.parts.fraction = j >> (32 - FLOAT32_FRACTION_SIZE - 2);
845 result.parts.exp = exp;
846 return result;
847}
848
849float32 int64_to_float32(int64_t i)
850{
851 float32 result;
852
853 if (i < 0) {
854 result = uint64_to_float32((uint64_t) (-i));
855 } else {
856 result = uint64_to_float32((uint64_t) i);
857 }
858
859 result.parts.sign = i < 0;
860
861 return result;
862}
863
864float64 uint32_to_float64(uint32_t i)
865{
866 int counter;
867 int32_t exp;
868 float64 result;
869 uint64_t frac;
870
871 result.parts.sign = 0;
872 result.parts.fraction = 0;
873
874 counter = countZeroes32(i);
875
876 exp = FLOAT64_BIAS + 32 - counter - 1;
877
878 if (counter == 32) {
879 result.binary = 0;
880 return result;
881 }
882
883 frac = i;
884 frac <<= counter + 32 - 1;
885
886 roundFloat64(&exp, &frac);
887
888 result.parts.fraction = frac >> (64 - FLOAT64_FRACTION_SIZE - 2);
889 result.parts.exp = exp;
890
891 return result;
892}
893
894float64 int32_to_float64(int32_t i)
895{
896 float64 result;
897
898 if (i < 0) {
899 result = uint32_to_float64((uint32_t) (-i));
900 } else {
901 result = uint32_to_float64((uint32_t) i);
902 }
903
904 result.parts.sign = i < 0;
905
906 return result;
907}
908
909
910float64 uint64_to_float64(uint64_t i)
911{
912 int counter;
913 int32_t exp;
914 float64 result;
915
916 result.parts.sign = 0;
917 result.parts.fraction = 0;
918
919 counter = countZeroes64(i);
920
921 exp = FLOAT64_BIAS + 64 - counter - 1;
922
923 if (counter == 64) {
924 result.binary = 0;
925 return result;
926 }
927
928 if (counter > 0) {
929 i <<= counter - 1;
930 } else {
931 i >>= 1;
932 }
933
934 roundFloat64(&exp, &i);
935
936 result.parts.fraction = i >> (64 - FLOAT64_FRACTION_SIZE - 2);
937 result.parts.exp = exp;
938 return result;
939}
940
941float64 int64_to_float64(int64_t i)
942{
943 float64 result;
944
945 if (i < 0) {
946 result = uint64_to_float64((uint64_t) (-i));
947 } else {
948 result = uint64_to_float64((uint64_t) i);
949 }
950
951 result.parts.sign = i < 0;
952
953 return result;
954}
955
956
957float128 uint32_to_float128(uint32_t i)
958{
959 int counter;
960 int32_t exp;
961 float128 result;
962 uint64_t frac_hi, frac_lo;
963
964 result.parts.sign = 0;
965 result.parts.frac_hi = 0;
966 result.parts.frac_lo = 0;
967
968 counter = countZeroes32(i);
969
970 exp = FLOAT128_BIAS + 32 - counter - 1;
971
972 if (counter == 32) {
973 result.binary.hi = 0;
974 result.binary.lo = 0;
975 return result;
976 }
977
978 frac_hi = 0;
979 frac_lo = i;
980 lshift128(frac_hi, frac_lo, (counter + 96 - 1), &frac_hi, &frac_lo);
981
982 roundFloat128(&exp, &frac_hi, &frac_lo);
983
984 rshift128(frac_hi, frac_lo,
985 (128 - FLOAT128_FRACTION_SIZE - 2), &frac_hi, &frac_lo);
986 result.parts.frac_hi = frac_hi;
987 result.parts.frac_lo = frac_lo;
988 result.parts.exp = exp;
989
990 return result;
991}
992
993float128 int32_to_float128(int32_t i)
994{
995 float128 result;
996
997 if (i < 0) {
998 result = uint32_to_float128((uint32_t) (-i));
999 } else {
1000 result = uint32_to_float128((uint32_t) i);
1001 }
1002
1003 result.parts.sign = i < 0;
1004
1005 return result;
1006}
1007
1008
1009float128 uint64_to_float128(uint64_t i)
1010{
1011 int counter;
1012 int32_t exp;
1013 float128 result;
1014 uint64_t frac_hi, frac_lo;
1015
1016 result.parts.sign = 0;
1017 result.parts.frac_hi = 0;
1018 result.parts.frac_lo = 0;
1019
1020 counter = countZeroes64(i);
1021
1022 exp = FLOAT128_BIAS + 64 - counter - 1;
1023
1024 if (counter == 64) {
1025 result.binary.hi = 0;
1026 result.binary.lo = 0;
1027 return result;
1028 }
1029
1030 frac_hi = 0;
1031 frac_lo = i;
1032 lshift128(frac_hi, frac_lo, (counter + 64 - 1), &frac_hi, &frac_lo);
1033
1034 roundFloat128(&exp, &frac_hi, &frac_lo);
1035
1036 rshift128(frac_hi, frac_lo,
1037 (128 - FLOAT128_FRACTION_SIZE - 2), &frac_hi, &frac_lo);
1038 result.parts.frac_hi = frac_hi;
1039 result.parts.frac_lo = frac_lo;
1040 result.parts.exp = exp;
1041
1042 return result;
1043}
1044
1045float128 int64_to_float128(int64_t i)
1046{
1047 float128 result;
1048
1049 if (i < 0) {
1050 result = uint64_to_float128((uint64_t) (-i));
1051 } else {
1052 result = uint64_to_float128((uint64_t) i);
1053 }
1054
1055 result.parts.sign = i < 0;
1056
1057 return result;
1058}
1059
1060/** @}
1061 */
Note: See TracBrowser for help on using the repository browser.