source: mainline/uspace/lib/softfloat/common.c@ 6283bf15

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 6283bf15 was 1c9ae08, checked in by Jiří Zárevúcky <jiri.zarevucky@…>, 7 years ago

Fix undefined behavior.

  • Property mode set to 100644
File size: 19.5 KB
Line 
1/*
2 * Copyright (c) 2005 Josef Cejka
3 * Copyright (c) 2011 Petr Koupy
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup softfloat
31 * @{
32 */
33/** @file Common helper operations.
34 */
35
36#include "common.h"
37
38/* Table for fast leading zeroes counting. */
39char zeroTable[256] = {
40 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
41 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
42 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
43 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
56};
57
58/**
59 * Take fraction shifted by 10 bits to the left, round it, normalize it
60 * and detect exceptions
61 *
62 * @param cexp Exponent with bias.
63 * @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
64 * @param sign Resulting sign.
65 * @return Finished double-precision float.
66 */
67float64 finish_float64(int32_t cexp, uint64_t cfrac, char sign)
68{
69 float64 result;
70
71 result.parts.sign = sign;
72
73 /* find first nonzero digit and shift result and detect possibly underflow */
74 while ((cexp > 0) && (cfrac) &&
75 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
76 cexp--;
77 cfrac <<= 1;
78 /* TODO: fix underflow */
79 }
80
81 if ((cexp < 0) || (cexp == 0 &&
82 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
83 /* FIXME: underflow */
84 result.parts.exp = 0;
85 if ((cexp + FLOAT64_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
86 result.parts.fraction = 0;
87 return result;
88 }
89
90 while (cexp < 0) {
91 cexp++;
92 cfrac >>= 1;
93 }
94
95 cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
96
97 if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
98 result.parts.fraction =
99 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
100 return result;
101 }
102 } else {
103 cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
104 }
105
106 ++cexp;
107
108 if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
109 ++cexp;
110 cfrac >>= 1;
111 }
112
113 /* check overflow */
114 if (cexp >= FLOAT64_MAX_EXPONENT) {
115 /* FIXME: overflow, return infinity */
116 result.parts.exp = FLOAT64_MAX_EXPONENT;
117 result.parts.fraction = 0;
118 return result;
119 }
120
121 result.parts.exp = (uint32_t) cexp;
122
123 result.parts.fraction =
124 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
125
126 return result;
127}
128
129/**
130 * Take fraction, round it, normalize it and detect exceptions
131 *
132 * @param cexp Exponent with bias.
133 * @param cfrac_hi High part of the fraction shifted 14 bits to the left
134 * with added hidden bit.
135 * @param cfrac_lo Low part of the fraction shifted 14 bits to the left
136 * with added hidden bit.
137 * @param sign Resulting sign.
138 * @param shift_out Bits right-shifted out from fraction by the caller.
139 * @return Finished quadruple-precision float.
140 */
141float128 finish_float128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo,
142 char sign, uint64_t shift_out)
143{
144 float128 result;
145 uint64_t tmp_hi, tmp_lo;
146
147 result.parts.sign = sign;
148
149 /* find first nonzero digit and shift result and detect possibly underflow */
150 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
151 1, &tmp_hi, &tmp_lo);
152 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
153 while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
154 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
155 cexp--;
156 lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
157 /* TODO: fix underflow */
158
159 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
160 1, &tmp_hi, &tmp_lo);
161 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
162 }
163
164 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
165 1, &tmp_hi, &tmp_lo);
166 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
167 if ((cexp < 0) || (cexp == 0 &&
168 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
169 /* FIXME: underflow */
170 result.parts.exp = 0;
171 if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
172 result.parts.frac_hi = 0x0ll;
173 result.parts.frac_lo = 0x0ll;
174 return result;
175 }
176
177 while (cexp < 0) {
178 cexp++;
179 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
180 }
181
182 if (shift_out & (0x1ull < 64)) {
183 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
184 }
185
186 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
187 1, &tmp_hi, &tmp_lo);
188 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
189 if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
190 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
191 &tmp_hi, &tmp_lo);
192 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
193 result.parts.frac_hi = tmp_hi;
194 result.parts.frac_lo = tmp_lo;
195 return result;
196 }
197 } else {
198 if (shift_out & (0x1ull < 64)) {
199 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
200 }
201 }
202
203 ++cexp;
204
205 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
206 1, &tmp_hi, &tmp_lo);
207 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
208 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
209 ++cexp;
210 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
211 }
212
213 /* check overflow */
214 if (cexp >= FLOAT128_MAX_EXPONENT) {
215 /* FIXME: overflow, return infinity */
216 result.parts.exp = FLOAT128_MAX_EXPONENT;
217 result.parts.frac_hi = 0x0ll;
218 result.parts.frac_lo = 0x0ll;
219 return result;
220 }
221
222 result.parts.exp = (uint32_t) cexp;
223
224 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
225 &tmp_hi, &tmp_lo);
226 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
227 result.parts.frac_hi = tmp_hi;
228 result.parts.frac_lo = tmp_lo;
229
230 return result;
231}
232
233/**
234 * Counts leading zeroes in byte.
235 *
236 * @param i Byte for which to count leading zeroes.
237 * @return Number of detected leading zeroes.
238 */
239int count_zeroes8(uint8_t i)
240{
241 return zeroTable[i];
242}
243
244/**
245 * Counts leading zeroes in 32bit unsigned integer.
246 *
247 * @param i Integer for which to count leading zeroes.
248 * @return Number of detected leading zeroes.
249 */
250int count_zeroes32(uint32_t i)
251{
252 int j;
253 for (j = 0; j < 32; j += 8) {
254 if (i & (0xFFu << (24 - j))) {
255 return (j + count_zeroes8(i >> (24 - j)));
256 }
257 }
258
259 return 32;
260}
261
262/**
263 * Counts leading zeroes in 64bit unsigned integer.
264 *
265 * @param i Integer for which to count leading zeroes.
266 * @return Number of detected leading zeroes.
267 */
268int count_zeroes64(uint64_t i)
269{
270 int j;
271 for (j = 0; j < 64; j += 8) {
272 if (i & (0xFFll << (56 - j))) {
273 return (j + count_zeroes8(i >> (56 - j)));
274 }
275 }
276
277 return 64;
278}
279
280/**
281 * Round and normalize number expressed by exponent and fraction with
282 * first bit (equal to hidden bit) at 30th bit.
283 *
284 * @param exp Exponent part.
285 * @param fraction Fraction with hidden bit shifted to 30th bit.
286 */
287void round_float32(int32_t *exp, uint32_t *fraction)
288{
289 /* rounding - if first bit after fraction is set then round up */
290 (*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
291
292 if ((*fraction) &
293 (FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
294 /* rounding overflow */
295 ++(*exp);
296 (*fraction) >>= 1;
297 }
298
299 if (((*exp) >= FLOAT32_MAX_EXPONENT) || ((*exp) < 0)) {
300 /* overflow - set infinity as result */
301 (*exp) = FLOAT32_MAX_EXPONENT;
302 (*fraction) = 0;
303 }
304}
305
306/**
307 * Round and normalize number expressed by exponent and fraction with
308 * first bit (equal to hidden bit) at bit 62.
309 *
310 * @param exp Exponent part.
311 * @param fraction Fraction with hidden bit shifted to bit 62.
312 */
313void round_float64(int32_t *exp, uint64_t *fraction)
314{
315 /*
316 * Rounding - if first bit after fraction is set then round up.
317 */
318
319 /*
320 * Add 1 to the least significant bit of the fraction respecting the
321 * current shift to bit 62 and see if there will be a carry to bit 63.
322 */
323 (*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
324
325 /* See if there was a carry to bit 63. */
326 if ((*fraction) &
327 (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
328 /* rounding overflow */
329 ++(*exp);
330 (*fraction) >>= 1;
331 }
332
333 if (((*exp) >= FLOAT64_MAX_EXPONENT) || ((*exp) < 0)) {
334 /* overflow - set infinity as result */
335 (*exp) = FLOAT64_MAX_EXPONENT;
336 (*fraction) = 0;
337 }
338}
339
340/**
341 * Round and normalize number expressed by exponent and fraction with
342 * first bit (equal to hidden bit) at 126th bit.
343 *
344 * @param exp Exponent part.
345 * @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
346 * @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
347 */
348void round_float128(int32_t *exp, uint64_t *frac_hi, uint64_t *frac_lo)
349{
350 uint64_t tmp_hi, tmp_lo;
351
352 /* rounding - if first bit after fraction is set then round up */
353 lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
354 add128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
355
356 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
357 (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
358 and128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
359 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
360 /* rounding overflow */
361 ++(*exp);
362 rshift128(*frac_hi, *frac_lo, 1, frac_hi, frac_lo);
363 }
364
365 if (((*exp) >= FLOAT128_MAX_EXPONENT) || ((*exp) < 0)) {
366 /* overflow - set infinity as result */
367 (*exp) = FLOAT128_MAX_EXPONENT;
368 (*frac_hi) = 0;
369 (*frac_lo) = 0;
370 }
371}
372
373/**
374 * Logical shift left on the 128-bit operand.
375 *
376 * @param a_hi High part of the input operand.
377 * @param a_lo Low part of the input operand.
378 * @param shift Number of bits by witch to shift.
379 * @param r_hi Address to store high part of the result.
380 * @param r_lo Address to store low part of the result.
381 */
382void lshift128(
383 uint64_t a_hi, uint64_t a_lo, int shift,
384 uint64_t *r_hi, uint64_t *r_lo)
385{
386 if (shift <= 0) {
387 /* do nothing */
388 } else if (shift >= 128) {
389 a_hi = 0;
390 a_lo = 0;
391 } else if (shift >= 64) {
392 a_hi = a_lo << (shift - 64);
393 a_lo = 0;
394 } else {
395 a_hi <<= shift;
396 a_hi |= a_lo >> (64 - shift);
397 a_lo <<= shift;
398 }
399
400 *r_hi = a_hi;
401 *r_lo = a_lo;
402}
403
404/**
405 * Logical shift right on the 128-bit operand.
406 *
407 * @param a_hi High part of the input operand.
408 * @param a_lo Low part of the input operand.
409 * @param shift Number of bits by witch to shift.
410 * @param r_hi Address to store high part of the result.
411 * @param r_lo Address to store low part of the result.
412 */
413void rshift128(
414 uint64_t a_hi, uint64_t a_lo, int shift,
415 uint64_t *r_hi, uint64_t *r_lo)
416{
417 if (shift <= 0) {
418 /* do nothing */
419 } else if (shift >= 128) {
420 a_hi = 0;
421 a_lo = 0;
422 } else if (shift >= 64) {
423 a_lo = a_hi >> (shift - 64);
424 a_hi = 0;
425 } else {
426 a_lo >>= shift;
427 a_lo |= a_hi << (64 - shift);
428 a_hi >>= shift;
429 }
430
431 *r_hi = a_hi;
432 *r_lo = a_lo;
433}
434
435/**
436 * Bitwise AND on 128-bit operands.
437 *
438 * @param a_hi High part of the first input operand.
439 * @param a_lo Low part of the first input operand.
440 * @param b_hi High part of the second input operand.
441 * @param b_lo Low part of the second input operand.
442 * @param r_hi Address to store high part of the result.
443 * @param r_lo Address to store low part of the result.
444 */
445void and128(
446 uint64_t a_hi, uint64_t a_lo,
447 uint64_t b_hi, uint64_t b_lo,
448 uint64_t *r_hi, uint64_t *r_lo)
449{
450 *r_hi = a_hi & b_hi;
451 *r_lo = a_lo & b_lo;
452}
453
454/**
455 * Bitwise inclusive OR on 128-bit operands.
456 *
457 * @param a_hi High part of the first input operand.
458 * @param a_lo Low part of the first input operand.
459 * @param b_hi High part of the second input operand.
460 * @param b_lo Low part of the second input operand.
461 * @param r_hi Address to store high part of the result.
462 * @param r_lo Address to store low part of the result.
463 */
464void or128(
465 uint64_t a_hi, uint64_t a_lo,
466 uint64_t b_hi, uint64_t b_lo,
467 uint64_t *r_hi, uint64_t *r_lo)
468{
469 *r_hi = a_hi | b_hi;
470 *r_lo = a_lo | b_lo;
471}
472
473/**
474 * Bitwise exclusive OR on 128-bit operands.
475 *
476 * @param a_hi High part of the first input operand.
477 * @param a_lo Low part of the first input operand.
478 * @param b_hi High part of the second input operand.
479 * @param b_lo Low part of the second input operand.
480 * @param r_hi Address to store high part of the result.
481 * @param r_lo Address to store low part of the result.
482 */
483void xor128(
484 uint64_t a_hi, uint64_t a_lo,
485 uint64_t b_hi, uint64_t b_lo,
486 uint64_t *r_hi, uint64_t *r_lo)
487{
488 *r_hi = a_hi ^ b_hi;
489 *r_lo = a_lo ^ b_lo;
490}
491
492/**
493 * Bitwise NOT on the 128-bit operand.
494 *
495 * @param a_hi High part of the input operand.
496 * @param a_lo Low part of the input operand.
497 * @param r_hi Address to store high part of the result.
498 * @param r_lo Address to store low part of the result.
499 */
500void not128(
501 uint64_t a_hi, uint64_t a_lo,
502 uint64_t *r_hi, uint64_t *r_lo)
503{
504 *r_hi = ~a_hi;
505 *r_lo = ~a_lo;
506}
507
508/**
509 * Equality comparison of 128-bit operands.
510 *
511 * @param a_hi High part of the first input operand.
512 * @param a_lo Low part of the first input operand.
513 * @param b_hi High part of the second input operand.
514 * @param b_lo Low part of the second input operand.
515 * @return 1 if operands are equal, 0 otherwise.
516 */
517int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
518{
519 return (a_hi == b_hi) && (a_lo == b_lo);
520}
521
522/**
523 * Lower-or-equal comparison of 128-bit operands.
524 *
525 * @param a_hi High part of the first input operand.
526 * @param a_lo Low part of the first input operand.
527 * @param b_hi High part of the second input operand.
528 * @param b_lo Low part of the second input operand.
529 * @return 1 if a is lower or equal to b, 0 otherwise.
530 */
531int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
532{
533 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo <= b_lo));
534}
535
536/**
537 * Lower-than comparison of 128-bit operands.
538 *
539 * @param a_hi High part of the first input operand.
540 * @param a_lo Low part of the first input operand.
541 * @param b_hi High part of the second input operand.
542 * @param b_lo Low part of the second input operand.
543 * @return 1 if a is lower than b, 0 otherwise.
544 */
545int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
546{
547 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo < b_lo));
548}
549
550/**
551 * Addition of two 128-bit unsigned integers.
552 *
553 * @param a_hi High part of the first input operand.
554 * @param a_lo Low part of the first input operand.
555 * @param b_hi High part of the second input operand.
556 * @param b_lo Low part of the second input operand.
557 * @param r_hi Address to store high part of the result.
558 * @param r_lo Address to store low part of the result.
559 */
560void add128(uint64_t a_hi, uint64_t a_lo,
561 uint64_t b_hi, uint64_t b_lo,
562 uint64_t *r_hi, uint64_t *r_lo)
563{
564 uint64_t low = a_lo + b_lo;
565 *r_lo = low;
566 /* detect overflow to add a carry */
567 *r_hi = a_hi + b_hi + (low < a_lo);
568}
569
570/**
571 * Substraction of two 128-bit unsigned integers.
572 *
573 * @param a_hi High part of the first input operand.
574 * @param a_lo Low part of the first input operand.
575 * @param b_hi High part of the second input operand.
576 * @param b_lo Low part of the second input operand.
577 * @param r_hi Address to store high part of the result.
578 * @param r_lo Address to store low part of the result.
579 */
580void sub128(uint64_t a_hi, uint64_t a_lo,
581 uint64_t b_hi, uint64_t b_lo,
582 uint64_t *r_hi, uint64_t *r_lo)
583{
584 *r_lo = a_lo - b_lo;
585 /* detect underflow to substract a carry */
586 *r_hi = a_hi - b_hi - (a_lo < b_lo);
587}
588
589/**
590 * Multiplication of two 64-bit unsigned integers.
591 *
592 * @param a First input operand.
593 * @param b Second input operand.
594 * @param r_hi Address to store high part of the result.
595 * @param r_lo Address to store low part of the result.
596 */
597void mul64(uint64_t a, uint64_t b, uint64_t *r_hi, uint64_t *r_lo)
598{
599 uint64_t low, high, middle1, middle2;
600 uint32_t alow, blow;
601
602 alow = a & 0xFFFFFFFF;
603 blow = b & 0xFFFFFFFF;
604
605 a >>= 32;
606 b >>= 32;
607
608 low = ((uint64_t) alow) * blow;
609 middle1 = a * blow;
610 middle2 = alow * b;
611 high = a * b;
612
613 middle1 += middle2;
614 high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
615 middle1 <<= 32;
616 low += middle1;
617 high += (low < middle1);
618 *r_lo = low;
619 *r_hi = high;
620}
621
622/**
623 * Multiplication of two 128-bit unsigned integers.
624 *
625 * @param a_hi High part of the first input operand.
626 * @param a_lo Low part of the first input operand.
627 * @param b_hi High part of the second input operand.
628 * @param b_lo Low part of the second input operand.
629 * @param r_hihi Address to store first (highest) quarter of the result.
630 * @param r_hilo Address to store second quarter of the result.
631 * @param r_lohi Address to store third quarter of the result.
632 * @param r_lolo Address to store fourth (lowest) quarter of the result.
633 */
634void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
635 uint64_t *r_hihi, uint64_t *r_hilo, uint64_t *r_lohi, uint64_t *r_lolo)
636{
637 uint64_t hihi, hilo, lohi, lolo;
638 uint64_t tmp1, tmp2;
639
640 mul64(a_lo, b_lo, &lohi, &lolo);
641 mul64(a_lo, b_hi, &hilo, &tmp2);
642 add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
643 mul64(a_hi, b_hi, &hihi, &tmp1);
644 add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
645 mul64(a_hi, b_lo, &tmp1, &tmp2);
646 add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
647 add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
648
649 *r_hihi = hihi;
650 *r_hilo = hilo;
651 *r_lohi = lohi;
652 *r_lolo = lolo;
653}
654
655/**
656 * Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
657 * divisor.
658 *
659 * @param a_hi High part of the divident.
660 * @param a_lo Low part of the divident.
661 * @param b Divisor.
662 * @return Quotient approximation.
663 */
664uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
665{
666 uint64_t b_hi, b_lo;
667 uint64_t rem_hi, rem_lo;
668 uint64_t tmp_hi, tmp_lo;
669 uint64_t result;
670
671 if (b <= a_hi) {
672 return 0xFFFFFFFFFFFFFFFFull;
673 }
674
675 b_hi = b >> 32;
676 result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
677 mul64(b, result, &tmp_hi, &tmp_lo);
678 sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
679
680 while ((int64_t) rem_hi < 0) {
681 result -= 0x1ll << 32;
682 b_lo = b << 32;
683 add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
684 }
685
686 rem_hi = (rem_hi << 32) | (rem_lo >> 32);
687 if ((b_hi << 32) <= rem_hi) {
688 result |= 0xFFFFFFFF;
689 } else {
690 result |= rem_hi / b_hi;
691 }
692
693 return result;
694}
695
696/** @}
697 */
Note: See TracBrowser for help on using the repository browser.