source: mainline/uspace/lib/softfloat/generic/conversion.c@ 087c27f6

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 087c27f6 was 711e7fe5, checked in by Prutkov Alex <prutkov.alex@…>, 14 years ago

Added printf module

  • Property mode set to 100755
File size: 21.8 KB
Line 
1/*
2 * Copyright (c) 2005 Josef Cejka
3 * Copyright (c) 2011 Petr Koupy
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup softfloat
31 * @{
32 */
33/** @file Conversion of precision and conversion between integers and floats.
34 */
35
36#include <sftypes.h>
37#include <conversion.h>
38#include <comparison.h>
39#include <common.h>
40
41float64 convertFloat32ToFloat64(float32 a)
42{
43 float64 result;
44 uint64_t frac;
45
46 result.parts.sign = a.parts.sign;
47 result.parts.fraction = a.parts.fraction;
48 result.parts.fraction <<= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE);
49
50 if ((isFloat32Infinity(a)) || (isFloat32NaN(a))) {
51 result.parts.exp = FLOAT64_MAX_EXPONENT;
52 /* TODO; check if its correct for SigNaNs*/
53 return result;
54 }
55
56 result.parts.exp = a.parts.exp + ((int) FLOAT64_BIAS - FLOAT32_BIAS);
57 if (a.parts.exp == 0) {
58 /* normalize denormalized numbers */
59
60 if (result.parts.fraction == 0) { /* fix zero */
61 result.parts.exp = 0;
62 return result;
63 }
64
65 frac = result.parts.fraction;
66
67 while (!(frac & FLOAT64_HIDDEN_BIT_MASK)) {
68 frac <<= 1;
69 --result.parts.exp;
70 }
71
72 ++result.parts.exp;
73 result.parts.fraction = frac;
74 }
75
76 return result;
77}
78
79float128 convertFloat32ToFloat128(float32 a)
80{
81 float128 result;
82 uint64_t frac_hi, frac_lo;
83 uint64_t tmp_hi, tmp_lo;
84
85 result.parts.sign = a.parts.sign;
86 result.parts.frac_hi = 0;
87 result.parts.frac_lo = a.parts.fraction;
88 lshift128(result.parts.frac_hi, result.parts.frac_lo,
89 (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE),
90 &frac_hi, &frac_lo);
91 result.parts.frac_hi = frac_hi;
92 result.parts.frac_lo = frac_lo;
93
94 if ((isFloat32Infinity(a)) || (isFloat32NaN(a))) {
95 result.parts.exp = FLOAT128_MAX_EXPONENT;
96 /* TODO; check if its correct for SigNaNs*/
97 return result;
98 }
99
100 result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT32_BIAS);
101 if (a.parts.exp == 0) {
102 /* normalize denormalized numbers */
103
104 if (eq128(result.parts.frac_hi,
105 result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */
106 result.parts.exp = 0;
107 return result;
108 }
109
110 frac_hi = result.parts.frac_hi;
111 frac_lo = result.parts.frac_lo;
112
113 and128(frac_hi, frac_lo,
114 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
115 &tmp_hi, &tmp_lo);
116 while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
117 lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
118 --result.parts.exp;
119 }
120
121 ++result.parts.exp;
122 result.parts.frac_hi = frac_hi;
123 result.parts.frac_lo = frac_lo;
124 }
125
126 return result;
127}
128
129float128 convertFloat64ToFloat128(float64 a)
130{
131 float128 result;
132 uint64_t frac_hi, frac_lo;
133 uint64_t tmp_hi, tmp_lo;
134
135 result.parts.sign = a.parts.sign;
136 result.parts.frac_hi = 0;
137 result.parts.frac_lo = a.parts.fraction;
138 lshift128(result.parts.frac_hi, result.parts.frac_lo,
139 (FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE),
140 &frac_hi, &frac_lo);
141 result.parts.frac_hi = frac_hi;
142 result.parts.frac_lo = frac_lo;
143
144 if ((isFloat64Infinity(a)) || (isFloat64NaN(a))) {
145 result.parts.exp = FLOAT128_MAX_EXPONENT;
146 /* TODO; check if its correct for SigNaNs*/
147 return result;
148 }
149
150 result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT64_BIAS);
151 if (a.parts.exp == 0) {
152 /* normalize denormalized numbers */
153
154 if (eq128(result.parts.frac_hi,
155 result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */
156 result.parts.exp = 0;
157 return result;
158 }
159
160 frac_hi = result.parts.frac_hi;
161 frac_lo = result.parts.frac_lo;
162
163 and128(frac_hi, frac_lo,
164 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
165 &tmp_hi, &tmp_lo);
166 while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
167 lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
168 --result.parts.exp;
169 }
170
171 ++result.parts.exp;
172 result.parts.frac_hi = frac_hi;
173 result.parts.frac_lo = frac_lo;
174 }
175
176 return result;
177}
178
179float32 convertFloat64ToFloat32(float64 a)
180{
181 float32 result;
182 int32_t exp;
183 uint64_t frac;
184
185 result.parts.sign = a.parts.sign;
186
187 if (isFloat64NaN(a)) {
188 result.parts.exp = FLOAT32_MAX_EXPONENT;
189
190 if (isFloat64SigNaN(a)) {
191 /* set first bit of fraction nonzero */
192 result.parts.fraction = FLOAT32_HIDDEN_BIT_MASK >> 1;
193 return result;
194 }
195
196 /* fraction nonzero but its first bit is zero */
197 result.parts.fraction = 0x1;
198 return result;
199 }
200
201 if (isFloat64Infinity(a)) {
202 result.parts.fraction = 0;
203 result.parts.exp = FLOAT32_MAX_EXPONENT;
204 return result;
205 }
206
207 exp = (int) a.parts.exp - FLOAT64_BIAS + FLOAT32_BIAS;
208
209 if (exp >= FLOAT32_MAX_EXPONENT) {
210 /* FIXME: overflow */
211 result.parts.fraction = 0;
212 result.parts.exp = FLOAT32_MAX_EXPONENT;
213 return result;
214 } else if (exp <= 0) {
215 /* underflow or denormalized */
216
217 result.parts.exp = 0;
218
219 exp *= -1;
220 if (exp > FLOAT32_FRACTION_SIZE) {
221 /* FIXME: underflow */
222 result.parts.fraction = 0;
223 return result;
224 }
225
226 /* denormalized */
227
228 frac = a.parts.fraction;
229 frac |= FLOAT64_HIDDEN_BIT_MASK; /* denormalize and set hidden bit */
230
231 frac >>= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1);
232
233 while (exp > 0) {
234 --exp;
235 frac >>= 1;
236 }
237 result.parts.fraction = frac;
238
239 return result;
240 }
241
242 result.parts.exp = exp;
243 result.parts.fraction =
244 a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE);
245 return result;
246}
247
248float32 convertFloat128ToFloat32(float128 a)
249{
250 float32 result;
251 int32_t exp;
252 uint64_t frac_hi, frac_lo;
253
254 result.parts.sign = a.parts.sign;
255
256 if (isFloat128NaN(a)) {
257 result.parts.exp = FLOAT32_MAX_EXPONENT;
258
259 if (isFloat128SigNaN(a)) {
260 /* set first bit of fraction nonzero */
261 result.parts.fraction = FLOAT32_HIDDEN_BIT_MASK >> 1;
262 return result;
263 }
264
265 /* fraction nonzero but its first bit is zero */
266 result.parts.fraction = 0x1;
267 return result;
268 }
269
270 if (isFloat128Infinity(a)) {
271 result.parts.fraction = 0;
272 result.parts.exp = FLOAT32_MAX_EXPONENT;
273 return result;
274 }
275
276 exp = (int) a.parts.exp - FLOAT128_BIAS + FLOAT32_BIAS;
277
278 if (exp >= FLOAT32_MAX_EXPONENT) {
279 /* FIXME: overflow */
280 result.parts.fraction = 0;
281 result.parts.exp = FLOAT32_MAX_EXPONENT;
282 return result;
283 } else if (exp <= 0) {
284 /* underflow or denormalized */
285
286 result.parts.exp = 0;
287
288 exp *= -1;
289 if (exp > FLOAT32_FRACTION_SIZE) {
290 /* FIXME: underflow */
291 result.parts.fraction = 0;
292 return result;
293 }
294
295 /* denormalized */
296
297 frac_hi = a.parts.frac_hi;
298 frac_lo = a.parts.frac_lo;
299
300 /* denormalize and set hidden bit */
301 frac_hi |= FLOAT128_HIDDEN_BIT_MASK_HI;
302
303 rshift128(frac_hi, frac_lo,
304 (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1),
305 &frac_hi, &frac_lo);
306
307 while (exp > 0) {
308 --exp;
309 rshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
310 }
311 result.parts.fraction = frac_lo;
312
313 return result;
314 }
315
316 result.parts.exp = exp;
317 frac_hi = a.parts.frac_hi;
318 frac_lo = a.parts.frac_lo;
319 rshift128(frac_hi, frac_lo,
320 (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1),
321 &frac_hi, &frac_lo);
322 result.parts.fraction = frac_lo;
323 return result;
324}
325
326float64 convertFloat128ToFloat64(float128 a)
327{
328 float64 result;
329 int32_t exp;
330 uint64_t frac_hi, frac_lo;
331
332 result.parts.sign = a.parts.sign;
333
334 if (isFloat128NaN(a)) {
335 result.parts.exp = FLOAT64_MAX_EXPONENT;
336
337 if (isFloat128SigNaN(a)) {
338 /* set first bit of fraction nonzero */
339 result.parts.fraction = FLOAT64_HIDDEN_BIT_MASK >> 1;
340 return result;
341 }
342
343 /* fraction nonzero but its first bit is zero */
344 result.parts.fraction = 0x1;
345 return result;
346 }
347
348 if (isFloat128Infinity(a)) {
349 result.parts.fraction = 0;
350 result.parts.exp = FLOAT64_MAX_EXPONENT;
351 return result;
352 }
353
354 exp = (int) a.parts.exp - FLOAT128_BIAS + FLOAT64_BIAS;
355
356 if (exp >= FLOAT64_MAX_EXPONENT) {
357 /* FIXME: overflow */
358 result.parts.fraction = 0;
359 result.parts.exp = FLOAT64_MAX_EXPONENT;
360 return result;
361 } else if (exp <= 0) {
362 /* underflow or denormalized */
363
364 result.parts.exp = 0;
365
366 exp *= -1;
367 if (exp > FLOAT64_FRACTION_SIZE) {
368 /* FIXME: underflow */
369 result.parts.fraction = 0;
370 return result;
371 }
372
373 /* denormalized */
374
375 frac_hi = a.parts.frac_hi;
376 frac_lo = a.parts.frac_lo;
377
378 /* denormalize and set hidden bit */
379 frac_hi |= FLOAT128_HIDDEN_BIT_MASK_HI;
380
381 rshift128(frac_hi, frac_lo,
382 (FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE + 1),
383 &frac_hi, &frac_lo);
384
385 while (exp > 0) {
386 --exp;
387 rshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
388 }
389 result.parts.fraction = frac_lo;
390
391 return result;
392 }
393
394 result.parts.exp = exp;
395 frac_hi = a.parts.frac_hi;
396 frac_lo = a.parts.frac_lo;
397 rshift128(frac_hi, frac_lo,
398 (FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE + 1),
399 &frac_hi, &frac_lo);
400 result.parts.fraction = frac_lo;
401 return result;
402}
403
404
405/**
406 * Helping procedure for converting float32 to uint32.
407 *
408 * @param a Floating point number in normalized form
409 * (NaNs or Inf are not checked).
410 * @return Converted unsigned integer.
411 */
412static uint32_t _float32_to_uint32_helper(float32 a)
413{
414 uint32_t frac;
415
416 if (a.parts.exp < FLOAT32_BIAS) {
417 /* TODO: rounding */
418 return 0;
419 }
420
421 frac = a.parts.fraction;
422
423 frac |= FLOAT32_HIDDEN_BIT_MASK;
424 /* shift fraction to left so hidden bit will be the most significant bit */
425 frac <<= 32 - FLOAT32_FRACTION_SIZE - 1;
426
427 frac >>= 32 - (a.parts.exp - FLOAT32_BIAS) - 1;
428 if ((a.parts.sign == 1) && (frac != 0)) {
429 frac = ~frac;
430 ++frac;
431 }
432
433 return frac;
434}
435
436/*
437 * FIXME: Im not sure what to return if overflow/underflow happens
438 * - now its the biggest or the smallest int
439 */
440uint32_t float32_to_uint32(float32 a)
441{
442 if (isFloat32NaN(a))
443 return UINT32_MAX;
444
445 if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS))) {
446 if (a.parts.sign)
447 return UINT32_MIN;
448
449 return UINT32_MAX;
450 }
451
452 return _float32_to_uint32_helper(a);
453}
454
455/*
456 * FIXME: Im not sure what to return if overflow/underflow happens
457 * - now its the biggest or the smallest int
458 */
459int32_t float32_to_int32(float32 a)
460{
461 if (isFloat32NaN(a))
462 return INT32_MAX;
463
464 if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS))) {
465 if (a.parts.sign)
466 return INT32_MIN;
467
468 return INT32_MAX;
469 }
470
471 return _float32_to_uint32_helper(a);
472}
473
474
475/**
476 * Helping procedure for converting float32 to uint64.
477 *
478 * @param a Floating point number in normalized form
479 * (NaNs or Inf are not checked).
480 * @return Converted unsigned integer.
481 */
482static uint64_t _float32_to_uint64_helper(float32 a)
483{
484 uint64_t frac;
485
486 if (a.parts.exp < FLOAT32_BIAS) {
487 /*TODO: rounding*/
488 return 0;
489 }
490
491 frac = a.parts.fraction;
492
493 frac |= FLOAT32_HIDDEN_BIT_MASK;
494 /* shift fraction to left so hidden bit will be the most significant bit */
495 frac <<= 64 - FLOAT32_FRACTION_SIZE - 1;
496
497 frac >>= 64 - (a.parts.exp - FLOAT32_BIAS) - 1;
498 if ((a.parts.sign == 1) && (frac != 0)) {
499 frac = ~frac;
500 ++frac;
501 }
502
503 return frac;
504}
505
506/*
507 * FIXME: Im not sure what to return if overflow/underflow happens
508 * - now its the biggest or the smallest int
509 */
510uint64_t float32_to_uint64(float32 a)
511{
512 if (isFloat32NaN(a))
513 return UINT64_MAX;
514
515
516 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) {
517 if (a.parts.sign)
518 return UINT64_MIN;
519
520 return UINT64_MAX;
521 }
522
523 return _float32_to_uint64_helper(a);
524}
525
526/*
527 * FIXME: Im not sure what to return if overflow/underflow happens
528 * - now its the biggest or the smallest int
529 */
530int64_t float32_to_int64(float32 a)
531{
532 if (isFloat32NaN(a))
533 return INT64_MAX;
534
535 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) {
536 if (a.parts.sign)
537 return INT64_MIN;
538
539 return INT64_MAX;
540 }
541
542 return _float32_to_uint64_helper(a);
543}
544
545
546/**
547 * Helping procedure for converting float64 to uint64.
548 *
549 * @param a Floating point number in normalized form
550 * (NaNs or Inf are not checked).
551 * @return Converted unsigned integer.
552 */
553static uint64_t _float64_to_uint64_helper(float64 a)
554{
555 uint64_t frac;
556
557 if (a.parts.exp < FLOAT64_BIAS) {
558 /*TODO: rounding*/
559 return 0;
560 }
561
562 frac = a.parts.fraction;
563
564 frac |= FLOAT64_HIDDEN_BIT_MASK;
565 /* shift fraction to left so hidden bit will be the most significant bit */
566 frac <<= 64 - FLOAT64_FRACTION_SIZE - 1;
567
568 frac >>= 64 - (a.parts.exp - FLOAT64_BIAS) - 1;
569 if ((a.parts.sign == 1) && (frac != 0)) {
570 frac = ~frac;
571 ++frac;
572 }
573
574 return frac;
575}
576
577/*
578 * FIXME: Im not sure what to return if overflow/underflow happens
579 * - now its the biggest or the smallest int
580 */
581uint32_t float64_to_uint32(float64 a)
582{
583 if (isFloat64NaN(a))
584 return UINT32_MAX;
585
586 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) {
587 if (a.parts.sign)
588 return UINT32_MIN;
589
590 return UINT32_MAX;
591 }
592
593 return (uint32_t) _float64_to_uint64_helper(a);
594}
595
596/*
597 * FIXME: Im not sure what to return if overflow/underflow happens
598 * - now its the biggest or the smallest int
599 */
600int32_t float64_to_int32(float64 a)
601{
602 if (isFloat64NaN(a))
603 return INT32_MAX;
604
605 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) {
606 if (a.parts.sign)
607 return INT32_MIN;
608
609 return INT32_MAX;
610 }
611
612 return (int32_t) _float64_to_uint64_helper(a);
613}
614
615
616/*
617 * FIXME: Im not sure what to return if overflow/underflow happens
618 * - now its the biggest or the smallest int
619 */
620uint64_t float64_to_uint64(float64 a)
621{
622 if (isFloat64NaN(a))
623 return UINT64_MAX;
624
625 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) {
626 if (a.parts.sign)
627 return UINT64_MIN;
628
629 return UINT64_MAX;
630 }
631
632 return _float64_to_uint64_helper(a);
633}
634
635/*
636 * FIXME: Im not sure what to return if overflow/underflow happens
637 * - now its the biggest or the smallest int
638 */
639int64_t float64_to_int64(float64 a)
640{
641 if (isFloat64NaN(a))
642 return INT64_MAX;
643
644 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) {
645 if (a.parts.sign)
646 return INT64_MIN;
647
648 return INT64_MAX;
649 }
650
651 return _float64_to_uint64_helper(a);
652}
653
654
655/**
656 * Helping procedure for converting float128 to uint64.
657 *
658 * @param a Floating point number in normalized form
659 * (NaNs or Inf are not checked).
660 * @return Converted unsigned integer.
661 */
662static uint64_t _float128_to_uint64_helper(float128 a)
663{
664 uint64_t frac_hi, frac_lo;
665
666 if (a.parts.exp < FLOAT128_BIAS) {
667 /*TODO: rounding*/
668 return 0;
669 }
670
671 frac_hi = a.parts.frac_hi;
672 frac_lo = a.parts.frac_lo;
673
674 frac_hi |= FLOAT128_HIDDEN_BIT_MASK_HI;
675 /* shift fraction to left so hidden bit will be the most significant bit */
676 lshift128(frac_hi, frac_lo,
677 (128 - FLOAT128_FRACTION_SIZE - 1), &frac_hi, &frac_lo);
678
679 rshift128(frac_hi, frac_lo,
680 (128 - (a.parts.exp - FLOAT128_BIAS) - 1), &frac_hi, &frac_lo);
681 if ((a.parts.sign == 1) && !eq128(frac_hi, frac_lo, 0x0ll, 0x0ll)) {
682 not128(frac_hi, frac_lo, &frac_hi, &frac_lo);
683 add128(frac_hi, frac_lo, 0x0ll, 0x1ll, &frac_hi, &frac_lo);
684 }
685
686 return frac_lo;
687}
688
689/*
690 * FIXME: Im not sure what to return if overflow/underflow happens
691 * - now its the biggest or the smallest int
692 */
693uint32_t float128_to_uint32(float128 a)
694{
695 if (isFloat128NaN(a))
696 return UINT32_MAX;
697
698 if (isFloat128Infinity(a) || (a.parts.exp >= (32 + FLOAT128_BIAS))) {
699 if (a.parts.sign)
700 return UINT32_MIN;
701
702 return UINT32_MAX;
703 }
704
705 return (uint32_t) _float128_to_uint64_helper(a);
706}
707
708/*
709 * FIXME: Im not sure what to return if overflow/underflow happens
710 * - now its the biggest or the smallest int
711 */
712int32_t float128_to_int32(float128 a)
713{
714 if (isFloat128NaN(a))
715 return INT32_MAX;
716
717 if (isFloat128Infinity(a) || (a.parts.exp >= (32 + FLOAT128_BIAS))) {
718 if (a.parts.sign)
719 return INT32_MIN;
720
721 return INT32_MAX;
722 }
723
724 return (int32_t) _float128_to_uint64_helper(a);
725}
726
727
728/*
729 * FIXME: Im not sure what to return if overflow/underflow happens
730 * - now its the biggest or the smallest int
731 */
732uint64_t float128_to_uint64(float128 a)
733{
734 if (isFloat128NaN(a))
735 return UINT64_MAX;
736
737 if (isFloat128Infinity(a) || (a.parts.exp >= (64 + FLOAT128_BIAS))) {
738 if (a.parts.sign)
739 return UINT64_MIN;
740
741 return UINT64_MAX;
742 }
743
744 return _float128_to_uint64_helper(a);
745}
746
747/*
748 * FIXME: Im not sure what to return if overflow/underflow happens
749 * - now its the biggest or the smallest int
750 */
751int64_t float128_to_int64(float128 a)
752{
753 if (isFloat128NaN(a))
754 return INT64_MAX;
755
756 if (isFloat128Infinity(a) || (a.parts.exp >= (64 + FLOAT128_BIAS))) {
757 if (a.parts.sign)
758 return INT64_MIN;
759
760 return INT64_MAX;
761 }
762
763 return _float128_to_uint64_helper(a);
764}
765
766
767float32 uint32_to_float32(uint32_t i)
768{
769 int counter;
770 int32_t exp;
771 float32 result;
772
773 result.parts.sign = 0;
774 result.parts.fraction = 0;
775
776 counter = countZeroes32(i);
777
778 exp = FLOAT32_BIAS + 32 - counter - 1;
779
780 if (counter == 32) {
781 result.binary = 0;
782 return result;
783 }
784
785 if (counter > 0) {
786 i <<= counter - 1;
787 } else {
788 i >>= 1;
789 }
790
791 roundFloat32(&exp, &i);
792
793 result.parts.fraction = i >> (32 - FLOAT32_FRACTION_SIZE - 2);
794 result.parts.exp = exp;
795
796 return result;
797}
798
799float32 int32_to_float32(int32_t i)
800{
801 float32 result;
802
803 if (i < 0) {
804 result = uint32_to_float32((uint32_t) (-i));
805 } else {
806 result = uint32_to_float32((uint32_t) i);
807 }
808
809 result.parts.sign = i < 0;
810
811 return result;
812}
813
814
815float32 uint64_to_float32(uint64_t i)
816{
817 int counter;
818 int32_t exp;
819 uint32_t j;
820 float32 result;
821
822 result.parts.sign = 0;
823 result.parts.fraction = 0;
824
825 counter = countZeroes64(i);
826
827 exp = FLOAT32_BIAS + 64 - counter - 1;
828
829 if (counter == 64) {
830 result.binary = 0;
831 return result;
832 }
833
834 /* Shift all to the first 31 bits (31st will be hidden 1) */
835 if (counter > 33) {
836 i <<= counter - 1 - 32;
837 } else {
838 i >>= 1 + 32 - counter;
839 }
840
841 j = (uint32_t) i;
842 roundFloat32(&exp, &j);
843
844 result.parts.fraction = j >> (32 - FLOAT32_FRACTION_SIZE - 2);
845 result.parts.exp = exp;
846 return result;
847}
848
849float32 int64_to_float32(int64_t i)
850{
851 float32 result;
852
853 if (i < 0) {
854 result = uint64_to_float32((uint64_t) (-i));
855 } else {
856 result = uint64_to_float32((uint64_t) i);
857 }
858
859 result.parts.sign = i < 0;
860
861 return result;
862}
863
864float64 uint32_to_float64(uint32_t i)
865{
866 int counter;
867 int32_t exp;
868 float64 result;
869 uint64_t frac;
870
871 result.parts.sign = 0;
872 result.parts.fraction = 0;
873
874 counter = countZeroes32(i);
875
876 exp = FLOAT64_BIAS + 32 - counter - 1;
877
878 if (counter == 32) {
879 result.binary = 0;
880 return result;
881 }
882
883 frac = i;
884 frac <<= counter + 32 - 1;
885
886 roundFloat64(&exp, &frac);
887
888 result.parts.fraction = frac >> (64 - FLOAT64_FRACTION_SIZE - 2);
889 result.parts.exp = exp;
890
891 return result;
892}
893
894float64 int32_to_float64(int32_t i)
895{
896 float64 result;
897
898 if (i < 0) {
899 result = uint32_to_float64((uint32_t) (-i));
900 } else {
901 result = uint32_to_float64((uint32_t) i);
902 }
903
904 result.parts.sign = i < 0;
905
906 return result;
907}
908
909
910float64 uint64_to_float64(uint64_t i)
911{
912 int counter;
913 int32_t exp;
914 float64 result;
915
916 result.parts.sign = 0;
917 result.parts.fraction = 0;
918
919 counter = countZeroes64(i);
920
921 exp = FLOAT64_BIAS + 64 - counter - 1;
922
923 if (counter == 64) {
924 result.binary = 0;
925 return result;
926 }
927
928 if (counter > 0) {
929 i <<= counter - 1;
930 } else {
931 i >>= 1;
932 }
933
934 roundFloat64(&exp, &i);
935
936 result.parts.fraction = i >> (64 - FLOAT64_FRACTION_SIZE - 2);
937 result.parts.exp = exp;
938 return result;
939}
940
941float64 int64_to_float64(int64_t i)
942{
943 float64 result;
944
945 if (i < 0) {
946 result = uint64_to_float64((uint64_t) (-i));
947 } else {
948 result = uint64_to_float64((uint64_t) i);
949 }
950
951 result.parts.sign = i < 0;
952
953 return result;
954}
955
956
957float128 uint32_to_float128(uint32_t i)
958{
959 int counter;
960 int32_t exp;
961 float128 result;
962 uint64_t frac_hi, frac_lo;
963
964 result.parts.sign = 0;
965 result.parts.frac_hi = 0;
966 result.parts.frac_lo = 0;
967
968 counter = countZeroes32(i);
969
970 exp = FLOAT128_BIAS + 32 - counter - 1;
971
972 if (counter == 32) {
973 result.binary.hi = 0;
974 result.binary.lo = 0;
975 return result;
976 }
977
978 frac_hi = 0;
979 frac_lo = i;
980 lshift128(frac_hi, frac_lo, (counter + 96 - 1), &frac_hi, &frac_lo);
981
982 roundFloat128(&exp, &frac_hi, &frac_lo);
983
984 rshift128(frac_hi, frac_lo,
985 (128 - FLOAT128_FRACTION_SIZE - 2), &frac_hi, &frac_lo);
986 result.parts.frac_hi = frac_hi;
987 result.parts.frac_lo = frac_lo;
988 result.parts.exp = exp;
989
990 return result;
991}
992
993float128 int32_to_float128(int32_t i)
994{
995 float128 result;
996
997 if (i < 0) {
998 result = uint32_to_float128((uint32_t) (-i));
999 } else {
1000 result = uint32_to_float128((uint32_t) i);
1001 }
1002
1003 result.parts.sign = i < 0;
1004
1005 return result;
1006}
1007
1008
1009float128 uint64_to_float128(uint64_t i)
1010{
1011 int counter;
1012 int32_t exp;
1013 float128 result;
1014 uint64_t frac_hi, frac_lo;
1015
1016 result.parts.sign = 0;
1017 result.parts.frac_hi = 0;
1018 result.parts.frac_lo = 0;
1019
1020 counter = countZeroes64(i);
1021
1022 exp = FLOAT128_BIAS + 64 - counter - 1;
1023
1024 if (counter == 64) {
1025 result.binary.hi = 0;
1026 result.binary.lo = 0;
1027 return result;
1028 }
1029
1030 frac_hi = 0;
1031 frac_lo = i;
1032 lshift128(frac_hi, frac_lo, (counter + 64 - 1), &frac_hi, &frac_lo);
1033
1034 roundFloat128(&exp, &frac_hi, &frac_lo);
1035
1036 rshift128(frac_hi, frac_lo,
1037 (128 - FLOAT128_FRACTION_SIZE - 2), &frac_hi, &frac_lo);
1038 result.parts.frac_hi = frac_hi;
1039 result.parts.frac_lo = frac_lo;
1040 result.parts.exp = exp;
1041
1042 return result;
1043}
1044
1045float128 int64_to_float128(int64_t i)
1046{
1047 float128 result;
1048
1049 if (i < 0) {
1050 result = uint64_to_float128((uint64_t) (-i));
1051 } else {
1052 result = uint64_to_float128((uint64_t) i);
1053 }
1054
1055 result.parts.sign = i < 0;
1056
1057 return result;
1058}
1059
1060/** @}
1061 */
Note: See TracBrowser for help on using the repository browser.