source: mainline/uspace/lib/softfloat/generic/common.c@ 41455a22

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 41455a22 was 88d5c1e, checked in by Martin Decky <martin@…>, 13 years ago

softfloat redesign

  • avoid hardwired type sizes, actual sizes are determined at compile-time
  • add basic support for x87 extended-precision data types (stored as 96bit long double)
  • a lot of coding style changes (removal of CamelCase, etc.)
  • Property mode set to 100644
File size: 19.4 KB
Line 
1/*
2 * Copyright (c) 2005 Josef Cejka
3 * Copyright (c) 2011 Petr Koupy
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup softfloat
31 * @{
32 */
33/** @file Common helper operations.
34 */
35
36#include <sftypes.h>
37#include <common.h>
38
39/* Table for fast leading zeroes counting. */
40char zeroTable[256] = {
41 8, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, \
42 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, \
43 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
44 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
52 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
57};
58
59/**
60 * Take fraction shifted by 10 bits to the left, round it, normalize it
61 * and detect exceptions
62 *
63 * @param cexp Exponent with bias.
64 * @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
65 * @param sign Resulting sign.
66 * @return Finished double-precision float.
67 */
68float64 finish_float64(int32_t cexp, uint64_t cfrac, char sign)
69{
70 float64 result;
71
72 result.parts.sign = sign;
73
74 /* find first nonzero digit and shift result and detect possibly underflow */
75 while ((cexp > 0) && (cfrac) &&
76 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
77 cexp--;
78 cfrac <<= 1;
79 /* TODO: fix underflow */
80 }
81
82 if ((cexp < 0) || (cexp == 0 &&
83 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
84 /* FIXME: underflow */
85 result.parts.exp = 0;
86 if ((cexp + FLOAT64_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
87 result.parts.fraction = 0;
88 return result;
89 }
90
91 while (cexp < 0) {
92 cexp++;
93 cfrac >>= 1;
94 }
95
96 cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
97
98 if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
99 result.parts.fraction =
100 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
101 return result;
102 }
103 } else {
104 cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
105 }
106
107 ++cexp;
108
109 if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
110 ++cexp;
111 cfrac >>= 1;
112 }
113
114 /* check overflow */
115 if (cexp >= FLOAT64_MAX_EXPONENT) {
116 /* FIXME: overflow, return infinity */
117 result.parts.exp = FLOAT64_MAX_EXPONENT;
118 result.parts.fraction = 0;
119 return result;
120 }
121
122 result.parts.exp = (uint32_t) cexp;
123
124 result.parts.fraction =
125 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
126
127 return result;
128}
129
130/**
131 * Take fraction, round it, normalize it and detect exceptions
132 *
133 * @param cexp Exponent with bias.
134 * @param cfrac_hi High part of the fraction shifted 14 bits to the left
135 * with added hidden bit.
136 * @param cfrac_lo Low part of the fraction shifted 14 bits to the left
137 * with added hidden bit.
138 * @param sign Resulting sign.
139 * @param shift_out Bits right-shifted out from fraction by the caller.
140 * @return Finished quadruple-precision float.
141 */
142float128 finish_float128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo,
143 char sign, uint64_t shift_out)
144{
145 float128 result;
146 uint64_t tmp_hi, tmp_lo;
147
148 result.parts.sign = sign;
149
150 /* find first nonzero digit and shift result and detect possibly underflow */
151 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
152 1, &tmp_hi, &tmp_lo);
153 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
154 while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
155 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
156 cexp--;
157 lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
158 /* TODO: fix underflow */
159
160 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
161 1, &tmp_hi, &tmp_lo);
162 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
163 }
164
165 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
166 1, &tmp_hi, &tmp_lo);
167 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
168 if ((cexp < 0) || (cexp == 0 &&
169 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
170 /* FIXME: underflow */
171 result.parts.exp = 0;
172 if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
173 result.parts.frac_hi = 0x0ll;
174 result.parts.frac_lo = 0x0ll;
175 return result;
176 }
177
178 while (cexp < 0) {
179 cexp++;
180 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
181 }
182
183 if (shift_out & (0x1ull < 64)) {
184 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
185 }
186
187 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
188 1, &tmp_hi, &tmp_lo);
189 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
190 if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
191 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
192 &tmp_hi, &tmp_lo);
193 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
194 result.parts.frac_hi = tmp_hi;
195 result.parts.frac_lo = tmp_lo;
196 return result;
197 }
198 } else {
199 if (shift_out & (0x1ull < 64)) {
200 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
201 }
202 }
203
204 ++cexp;
205
206 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
207 1, &tmp_hi, &tmp_lo);
208 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
209 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
210 ++cexp;
211 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
212 }
213
214 /* check overflow */
215 if (cexp >= FLOAT128_MAX_EXPONENT) {
216 /* FIXME: overflow, return infinity */
217 result.parts.exp = FLOAT128_MAX_EXPONENT;
218 result.parts.frac_hi = 0x0ll;
219 result.parts.frac_lo = 0x0ll;
220 return result;
221 }
222
223 result.parts.exp = (uint32_t) cexp;
224
225 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
226 &tmp_hi, &tmp_lo);
227 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
228 result.parts.frac_hi = tmp_hi;
229 result.parts.frac_lo = tmp_lo;
230
231 return result;
232}
233
234/**
235 * Counts leading zeroes in byte.
236 *
237 * @param i Byte for which to count leading zeroes.
238 * @return Number of detected leading zeroes.
239 */
240int count_zeroes8(uint8_t i)
241{
242 return zeroTable[i];
243}
244
245/**
246 * Counts leading zeroes in 32bit unsigned integer.
247 *
248 * @param i Integer for which to count leading zeroes.
249 * @return Number of detected leading zeroes.
250 */
251int count_zeroes32(uint32_t i)
252{
253 int j;
254 for (j = 0; j < 32; j += 8) {
255 if (i & (0xFF << (24 - j))) {
256 return (j + count_zeroes8(i >> (24 - j)));
257 }
258 }
259
260 return 32;
261}
262
263/**
264 * Counts leading zeroes in 64bit unsigned integer.
265 *
266 * @param i Integer for which to count leading zeroes.
267 * @return Number of detected leading zeroes.
268 */
269int count_zeroes64(uint64_t i)
270{
271 int j;
272 for (j = 0; j < 64; j += 8) {
273 if (i & (0xFFll << (56 - j))) {
274 return (j + count_zeroes8(i >> (56 - j)));
275 }
276 }
277
278 return 64;
279}
280
281/**
282 * Round and normalize number expressed by exponent and fraction with
283 * first bit (equal to hidden bit) at 30th bit.
284 *
285 * @param exp Exponent part.
286 * @param fraction Fraction with hidden bit shifted to 30th bit.
287 */
288void round_float32(int32_t *exp, uint32_t *fraction)
289{
290 /* rounding - if first bit after fraction is set then round up */
291 (*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
292
293 if ((*fraction) &
294 (FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
295 /* rounding overflow */
296 ++(*exp);
297 (*fraction) >>= 1;
298 }
299
300 if (((*exp) >= FLOAT32_MAX_EXPONENT) || ((*exp) < 0)) {
301 /* overflow - set infinity as result */
302 (*exp) = FLOAT32_MAX_EXPONENT;
303 (*fraction) = 0;
304 }
305}
306
307/**
308 * Round and normalize number expressed by exponent and fraction with
309 * first bit (equal to hidden bit) at 62nd bit.
310 *
311 * @param exp Exponent part.
312 * @param fraction Fraction with hidden bit shifted to 62nd bit.
313 */
314void round_float64(int32_t *exp, uint64_t *fraction)
315{
316 /* rounding - if first bit after fraction is set then round up */
317 (*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
318
319 if ((*fraction) &
320 (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 3))) {
321 /* rounding overflow */
322 ++(*exp);
323 (*fraction) >>= 1;
324 }
325
326 if (((*exp) >= FLOAT64_MAX_EXPONENT) || ((*exp) < 0)) {
327 /* overflow - set infinity as result */
328 (*exp) = FLOAT64_MAX_EXPONENT;
329 (*fraction) = 0;
330 }
331}
332
333/**
334 * Round and normalize number expressed by exponent and fraction with
335 * first bit (equal to hidden bit) at 126th bit.
336 *
337 * @param exp Exponent part.
338 * @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
339 * @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
340 */
341void round_float128(int32_t *exp, uint64_t *frac_hi, uint64_t *frac_lo)
342{
343 uint64_t tmp_hi, tmp_lo;
344
345 /* rounding - if first bit after fraction is set then round up */
346 lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
347 add128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
348
349 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
350 (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
351 and128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
352 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
353 /* rounding overflow */
354 ++(*exp);
355 rshift128(*frac_hi, *frac_lo, 1, frac_hi, frac_lo);
356 }
357
358 if (((*exp) >= FLOAT128_MAX_EXPONENT) || ((*exp) < 0)) {
359 /* overflow - set infinity as result */
360 (*exp) = FLOAT128_MAX_EXPONENT;
361 (*frac_hi) = 0;
362 (*frac_lo) = 0;
363 }
364}
365
366/**
367 * Logical shift left on the 128-bit operand.
368 *
369 * @param a_hi High part of the input operand.
370 * @param a_lo Low part of the input operand.
371 * @param shift Number of bits by witch to shift.
372 * @param r_hi Address to store high part of the result.
373 * @param r_lo Address to store low part of the result.
374 */
375void lshift128(
376 uint64_t a_hi, uint64_t a_lo, int shift,
377 uint64_t *r_hi, uint64_t *r_lo)
378{
379 if (shift <= 0) {
380 /* do nothing */
381 } else if (shift >= 128) {
382 a_hi = 0;
383 a_lo = 0;
384 } else if (shift >= 64) {
385 a_hi = a_lo << (shift - 64);
386 a_lo = 0;
387 } else {
388 a_hi <<= shift;
389 a_hi |= a_lo >> (64 - shift);
390 a_lo <<= shift;
391 }
392
393 *r_hi = a_hi;
394 *r_lo = a_lo;
395}
396
397/**
398 * Logical shift right on the 128-bit operand.
399 *
400 * @param a_hi High part of the input operand.
401 * @param a_lo Low part of the input operand.
402 * @param shift Number of bits by witch to shift.
403 * @param r_hi Address to store high part of the result.
404 * @param r_lo Address to store low part of the result.
405 */
406void rshift128(
407 uint64_t a_hi, uint64_t a_lo, int shift,
408 uint64_t *r_hi, uint64_t *r_lo)
409{
410 if (shift <= 0) {
411 /* do nothing */
412 } else if (shift >= 128) {
413 a_hi = 0;
414 a_lo = 0;
415 } else if (shift >= 64) {
416 a_lo = a_hi >> (shift - 64);
417 a_hi = 0;
418 } else {
419 a_lo >>= shift;
420 a_lo |= a_hi << (64 - shift);
421 a_hi >>= shift;
422 }
423
424 *r_hi = a_hi;
425 *r_lo = a_lo;
426}
427
428/**
429 * Bitwise AND on 128-bit operands.
430 *
431 * @param a_hi High part of the first input operand.
432 * @param a_lo Low part of the first input operand.
433 * @param b_hi High part of the second input operand.
434 * @param b_lo Low part of the second input operand.
435 * @param r_hi Address to store high part of the result.
436 * @param r_lo Address to store low part of the result.
437 */
438void and128(
439 uint64_t a_hi, uint64_t a_lo,
440 uint64_t b_hi, uint64_t b_lo,
441 uint64_t *r_hi, uint64_t *r_lo)
442{
443 *r_hi = a_hi & b_hi;
444 *r_lo = a_lo & b_lo;
445}
446
447/**
448 * Bitwise inclusive OR on 128-bit operands.
449 *
450 * @param a_hi High part of the first input operand.
451 * @param a_lo Low part of the first input operand.
452 * @param b_hi High part of the second input operand.
453 * @param b_lo Low part of the second input operand.
454 * @param r_hi Address to store high part of the result.
455 * @param r_lo Address to store low part of the result.
456 */
457void or128(
458 uint64_t a_hi, uint64_t a_lo,
459 uint64_t b_hi, uint64_t b_lo,
460 uint64_t *r_hi, uint64_t *r_lo)
461{
462 *r_hi = a_hi | b_hi;
463 *r_lo = a_lo | b_lo;
464}
465
466/**
467 * Bitwise exclusive OR on 128-bit operands.
468 *
469 * @param a_hi High part of the first input operand.
470 * @param a_lo Low part of the first input operand.
471 * @param b_hi High part of the second input operand.
472 * @param b_lo Low part of the second input operand.
473 * @param r_hi Address to store high part of the result.
474 * @param r_lo Address to store low part of the result.
475 */
476void xor128(
477 uint64_t a_hi, uint64_t a_lo,
478 uint64_t b_hi, uint64_t b_lo,
479 uint64_t *r_hi, uint64_t *r_lo)
480{
481 *r_hi = a_hi ^ b_hi;
482 *r_lo = a_lo ^ b_lo;
483}
484
485/**
486 * Bitwise NOT on the 128-bit operand.
487 *
488 * @param a_hi High part of the input operand.
489 * @param a_lo Low part of the input operand.
490 * @param r_hi Address to store high part of the result.
491 * @param r_lo Address to store low part of the result.
492 */
493void not128(
494 uint64_t a_hi, uint64_t a_lo,
495 uint64_t *r_hi, uint64_t *r_lo)
496{
497 *r_hi = ~a_hi;
498 *r_lo = ~a_lo;
499}
500
501/**
502 * Equality comparison of 128-bit operands.
503 *
504 * @param a_hi High part of the first input operand.
505 * @param a_lo Low part of the first input operand.
506 * @param b_hi High part of the second input operand.
507 * @param b_lo Low part of the second input operand.
508 * @return 1 if operands are equal, 0 otherwise.
509 */
510int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
511{
512 return (a_hi == b_hi) && (a_lo == b_lo);
513}
514
515/**
516 * Lower-or-equal comparison of 128-bit operands.
517 *
518 * @param a_hi High part of the first input operand.
519 * @param a_lo Low part of the first input operand.
520 * @param b_hi High part of the second input operand.
521 * @param b_lo Low part of the second input operand.
522 * @return 1 if a is lower or equal to b, 0 otherwise.
523 */
524int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
525{
526 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo <= b_lo));
527}
528
529/**
530 * Lower-than comparison of 128-bit operands.
531 *
532 * @param a_hi High part of the first input operand.
533 * @param a_lo Low part of the first input operand.
534 * @param b_hi High part of the second input operand.
535 * @param b_lo Low part of the second input operand.
536 * @return 1 if a is lower than b, 0 otherwise.
537 */
538int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
539{
540 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo < b_lo));
541}
542
543/**
544 * Addition of two 128-bit unsigned integers.
545 *
546 * @param a_hi High part of the first input operand.
547 * @param a_lo Low part of the first input operand.
548 * @param b_hi High part of the second input operand.
549 * @param b_lo Low part of the second input operand.
550 * @param r_hi Address to store high part of the result.
551 * @param r_lo Address to store low part of the result.
552 */
553void add128(uint64_t a_hi, uint64_t a_lo,
554 uint64_t b_hi, uint64_t b_lo,
555 uint64_t *r_hi, uint64_t *r_lo)
556{
557 uint64_t low = a_lo + b_lo;
558 *r_lo = low;
559 /* detect overflow to add a carry */
560 *r_hi = a_hi + b_hi + (low < a_lo);
561}
562
563/**
564 * Substraction of two 128-bit unsigned integers.
565 *
566 * @param a_hi High part of the first input operand.
567 * @param a_lo Low part of the first input operand.
568 * @param b_hi High part of the second input operand.
569 * @param b_lo Low part of the second input operand.
570 * @param r_hi Address to store high part of the result.
571 * @param r_lo Address to store low part of the result.
572 */
573void sub128(uint64_t a_hi, uint64_t a_lo,
574 uint64_t b_hi, uint64_t b_lo,
575 uint64_t *r_hi, uint64_t *r_lo)
576{
577 *r_lo = a_lo - b_lo;
578 /* detect underflow to substract a carry */
579 *r_hi = a_hi - b_hi - (a_lo < b_lo);
580}
581
582/**
583 * Multiplication of two 64-bit unsigned integers.
584 *
585 * @param a First input operand.
586 * @param b Second input operand.
587 * @param r_hi Address to store high part of the result.
588 * @param r_lo Address to store low part of the result.
589 */
590void mul64(uint64_t a, uint64_t b, uint64_t *r_hi, uint64_t *r_lo)
591{
592 uint64_t low, high, middle1, middle2;
593 uint32_t alow, blow;
594
595 alow = a & 0xFFFFFFFF;
596 blow = b & 0xFFFFFFFF;
597
598 a >>= 32;
599 b >>= 32;
600
601 low = ((uint64_t) alow) * blow;
602 middle1 = a * blow;
603 middle2 = alow * b;
604 high = a * b;
605
606 middle1 += middle2;
607 high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
608 middle1 <<= 32;
609 low += middle1;
610 high += (low < middle1);
611 *r_lo = low;
612 *r_hi = high;
613}
614
615/**
616 * Multiplication of two 128-bit unsigned integers.
617 *
618 * @param a_hi High part of the first input operand.
619 * @param a_lo Low part of the first input operand.
620 * @param b_hi High part of the second input operand.
621 * @param b_lo Low part of the second input operand.
622 * @param r_hihi Address to store first (highest) quarter of the result.
623 * @param r_hilo Address to store second quarter of the result.
624 * @param r_lohi Address to store third quarter of the result.
625 * @param r_lolo Address to store fourth (lowest) quarter of the result.
626 */
627void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
628 uint64_t *r_hihi, uint64_t *r_hilo, uint64_t *r_lohi, uint64_t *r_lolo)
629{
630 uint64_t hihi, hilo, lohi, lolo;
631 uint64_t tmp1, tmp2;
632
633 mul64(a_lo, b_lo, &lohi, &lolo);
634 mul64(a_lo, b_hi, &hilo, &tmp2);
635 add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
636 mul64(a_hi, b_hi, &hihi, &tmp1);
637 add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
638 mul64(a_hi, b_lo, &tmp1, &tmp2);
639 add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
640 add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
641
642 *r_hihi = hihi;
643 *r_hilo = hilo;
644 *r_lohi = lohi;
645 *r_lolo = lolo;
646}
647
648/**
649 * Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
650 * divisor.
651 *
652 * @param a_hi High part of the divident.
653 * @param a_lo Low part of the divident.
654 * @param b Divisor.
655 * @return Quotient approximation.
656 */
657uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
658{
659 uint64_t b_hi, b_lo;
660 uint64_t rem_hi, rem_lo;
661 uint64_t tmp_hi, tmp_lo;
662 uint64_t result;
663
664 if (b <= a_hi) {
665 return 0xFFFFFFFFFFFFFFFFull;
666 }
667
668 b_hi = b >> 32;
669 result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
670 mul64(b, result, &tmp_hi, &tmp_lo);
671 sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
672
673 while ((int64_t) rem_hi < 0) {
674 result -= 0x1ll << 32;
675 b_lo = b << 32;
676 add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
677 }
678
679 rem_hi = (rem_hi << 32) | (rem_lo >> 32);
680 if ((b_hi << 32) <= rem_hi) {
681 result |= 0xFFFFFFFF;
682 } else {
683 result |= rem_hi / b_hi;
684 }
685
686 return result;
687}
688
689/** @}
690 */
Note: See TracBrowser for help on using the repository browser.