source: mainline/uspace/lib/softfloat/common.c@ 8c95dff

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 8c95dff was 7462674, checked in by Jakub Jermar <jakub@…>, 13 years ago

Fix the zeroTable once again.
(Thanks Jiri Zarevucky for noticing this.)

  • Property mode set to 100644
File size: 19.6 KB
Line 
1/*
2 * Copyright (c) 2005 Josef Cejka
3 * Copyright (c) 2011 Petr Koupy
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup softfloat
31 * @{
32 */
33/** @file Common helper operations.
34 */
35
36#include "sftypes.h"
37#include "common.h"
38
39/* Table for fast leading zeroes counting. */
40char zeroTable[256] = {
41 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, \
42 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, \
43 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
44 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
52 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
57};
58
59/**
60 * Take fraction shifted by 10 bits to the left, round it, normalize it
61 * and detect exceptions
62 *
63 * @param cexp Exponent with bias.
64 * @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
65 * @param sign Resulting sign.
66 * @return Finished double-precision float.
67 */
68float64 finish_float64(int32_t cexp, uint64_t cfrac, char sign)
69{
70 float64 result;
71
72 result.parts.sign = sign;
73
74 /* find first nonzero digit and shift result and detect possibly underflow */
75 while ((cexp > 0) && (cfrac) &&
76 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
77 cexp--;
78 cfrac <<= 1;
79 /* TODO: fix underflow */
80 }
81
82 if ((cexp < 0) || (cexp == 0 &&
83 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
84 /* FIXME: underflow */
85 result.parts.exp = 0;
86 if ((cexp + FLOAT64_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
87 result.parts.fraction = 0;
88 return result;
89 }
90
91 while (cexp < 0) {
92 cexp++;
93 cfrac >>= 1;
94 }
95
96 cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
97
98 if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
99 result.parts.fraction =
100 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
101 return result;
102 }
103 } else {
104 cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
105 }
106
107 ++cexp;
108
109 if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
110 ++cexp;
111 cfrac >>= 1;
112 }
113
114 /* check overflow */
115 if (cexp >= FLOAT64_MAX_EXPONENT) {
116 /* FIXME: overflow, return infinity */
117 result.parts.exp = FLOAT64_MAX_EXPONENT;
118 result.parts.fraction = 0;
119 return result;
120 }
121
122 result.parts.exp = (uint32_t) cexp;
123
124 result.parts.fraction =
125 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
126
127 return result;
128}
129
130/**
131 * Take fraction, round it, normalize it and detect exceptions
132 *
133 * @param cexp Exponent with bias.
134 * @param cfrac_hi High part of the fraction shifted 14 bits to the left
135 * with added hidden bit.
136 * @param cfrac_lo Low part of the fraction shifted 14 bits to the left
137 * with added hidden bit.
138 * @param sign Resulting sign.
139 * @param shift_out Bits right-shifted out from fraction by the caller.
140 * @return Finished quadruple-precision float.
141 */
142float128 finish_float128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo,
143 char sign, uint64_t shift_out)
144{
145 float128 result;
146 uint64_t tmp_hi, tmp_lo;
147
148 result.parts.sign = sign;
149
150 /* find first nonzero digit and shift result and detect possibly underflow */
151 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
152 1, &tmp_hi, &tmp_lo);
153 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
154 while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
155 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
156 cexp--;
157 lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
158 /* TODO: fix underflow */
159
160 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
161 1, &tmp_hi, &tmp_lo);
162 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
163 }
164
165 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
166 1, &tmp_hi, &tmp_lo);
167 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
168 if ((cexp < 0) || (cexp == 0 &&
169 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
170 /* FIXME: underflow */
171 result.parts.exp = 0;
172 if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
173 result.parts.frac_hi = 0x0ll;
174 result.parts.frac_lo = 0x0ll;
175 return result;
176 }
177
178 while (cexp < 0) {
179 cexp++;
180 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
181 }
182
183 if (shift_out & (0x1ull < 64)) {
184 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
185 }
186
187 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
188 1, &tmp_hi, &tmp_lo);
189 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
190 if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
191 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
192 &tmp_hi, &tmp_lo);
193 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
194 result.parts.frac_hi = tmp_hi;
195 result.parts.frac_lo = tmp_lo;
196 return result;
197 }
198 } else {
199 if (shift_out & (0x1ull < 64)) {
200 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
201 }
202 }
203
204 ++cexp;
205
206 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
207 1, &tmp_hi, &tmp_lo);
208 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
209 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
210 ++cexp;
211 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
212 }
213
214 /* check overflow */
215 if (cexp >= FLOAT128_MAX_EXPONENT) {
216 /* FIXME: overflow, return infinity */
217 result.parts.exp = FLOAT128_MAX_EXPONENT;
218 result.parts.frac_hi = 0x0ll;
219 result.parts.frac_lo = 0x0ll;
220 return result;
221 }
222
223 result.parts.exp = (uint32_t) cexp;
224
225 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
226 &tmp_hi, &tmp_lo);
227 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
228 result.parts.frac_hi = tmp_hi;
229 result.parts.frac_lo = tmp_lo;
230
231 return result;
232}
233
234/**
235 * Counts leading zeroes in byte.
236 *
237 * @param i Byte for which to count leading zeroes.
238 * @return Number of detected leading zeroes.
239 */
240int count_zeroes8(uint8_t i)
241{
242 return zeroTable[i];
243}
244
245/**
246 * Counts leading zeroes in 32bit unsigned integer.
247 *
248 * @param i Integer for which to count leading zeroes.
249 * @return Number of detected leading zeroes.
250 */
251int count_zeroes32(uint32_t i)
252{
253 int j;
254 for (j = 0; j < 32; j += 8) {
255 if (i & (0xFF << (24 - j))) {
256 return (j + count_zeroes8(i >> (24 - j)));
257 }
258 }
259
260 return 32;
261}
262
263/**
264 * Counts leading zeroes in 64bit unsigned integer.
265 *
266 * @param i Integer for which to count leading zeroes.
267 * @return Number of detected leading zeroes.
268 */
269int count_zeroes64(uint64_t i)
270{
271 int j;
272 for (j = 0; j < 64; j += 8) {
273 if (i & (0xFFll << (56 - j))) {
274 return (j + count_zeroes8(i >> (56 - j)));
275 }
276 }
277
278 return 64;
279}
280
281/**
282 * Round and normalize number expressed by exponent and fraction with
283 * first bit (equal to hidden bit) at 30th bit.
284 *
285 * @param exp Exponent part.
286 * @param fraction Fraction with hidden bit shifted to 30th bit.
287 */
288void round_float32(int32_t *exp, uint32_t *fraction)
289{
290 /* rounding - if first bit after fraction is set then round up */
291 (*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
292
293 if ((*fraction) &
294 (FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
295 /* rounding overflow */
296 ++(*exp);
297 (*fraction) >>= 1;
298 }
299
300 if (((*exp) >= FLOAT32_MAX_EXPONENT) || ((*exp) < 0)) {
301 /* overflow - set infinity as result */
302 (*exp) = FLOAT32_MAX_EXPONENT;
303 (*fraction) = 0;
304 }
305}
306
307/**
308 * Round and normalize number expressed by exponent and fraction with
309 * first bit (equal to hidden bit) at bit 62.
310 *
311 * @param exp Exponent part.
312 * @param fraction Fraction with hidden bit shifted to bit 62.
313 */
314void round_float64(int32_t *exp, uint64_t *fraction)
315{
316 /*
317 * Rounding - if first bit after fraction is set then round up.
318 */
319
320 /*
321 * Add 1 to the least significant bit of the fraction respecting the
322 * current shift to bit 62 and see if there will be a carry to bit 63.
323 */
324 (*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
325
326 /* See if there was a carry to bit 63. */
327 if ((*fraction) &
328 (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
329 /* rounding overflow */
330 ++(*exp);
331 (*fraction) >>= 1;
332 }
333
334 if (((*exp) >= FLOAT64_MAX_EXPONENT) || ((*exp) < 0)) {
335 /* overflow - set infinity as result */
336 (*exp) = FLOAT64_MAX_EXPONENT;
337 (*fraction) = 0;
338 }
339}
340
341/**
342 * Round and normalize number expressed by exponent and fraction with
343 * first bit (equal to hidden bit) at 126th bit.
344 *
345 * @param exp Exponent part.
346 * @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
347 * @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
348 */
349void round_float128(int32_t *exp, uint64_t *frac_hi, uint64_t *frac_lo)
350{
351 uint64_t tmp_hi, tmp_lo;
352
353 /* rounding - if first bit after fraction is set then round up */
354 lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
355 add128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
356
357 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
358 (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
359 and128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
360 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
361 /* rounding overflow */
362 ++(*exp);
363 rshift128(*frac_hi, *frac_lo, 1, frac_hi, frac_lo);
364 }
365
366 if (((*exp) >= FLOAT128_MAX_EXPONENT) || ((*exp) < 0)) {
367 /* overflow - set infinity as result */
368 (*exp) = FLOAT128_MAX_EXPONENT;
369 (*frac_hi) = 0;
370 (*frac_lo) = 0;
371 }
372}
373
374/**
375 * Logical shift left on the 128-bit operand.
376 *
377 * @param a_hi High part of the input operand.
378 * @param a_lo Low part of the input operand.
379 * @param shift Number of bits by witch to shift.
380 * @param r_hi Address to store high part of the result.
381 * @param r_lo Address to store low part of the result.
382 */
383void lshift128(
384 uint64_t a_hi, uint64_t a_lo, int shift,
385 uint64_t *r_hi, uint64_t *r_lo)
386{
387 if (shift <= 0) {
388 /* do nothing */
389 } else if (shift >= 128) {
390 a_hi = 0;
391 a_lo = 0;
392 } else if (shift >= 64) {
393 a_hi = a_lo << (shift - 64);
394 a_lo = 0;
395 } else {
396 a_hi <<= shift;
397 a_hi |= a_lo >> (64 - shift);
398 a_lo <<= shift;
399 }
400
401 *r_hi = a_hi;
402 *r_lo = a_lo;
403}
404
405/**
406 * Logical shift right on the 128-bit operand.
407 *
408 * @param a_hi High part of the input operand.
409 * @param a_lo Low part of the input operand.
410 * @param shift Number of bits by witch to shift.
411 * @param r_hi Address to store high part of the result.
412 * @param r_lo Address to store low part of the result.
413 */
414void rshift128(
415 uint64_t a_hi, uint64_t a_lo, int shift,
416 uint64_t *r_hi, uint64_t *r_lo)
417{
418 if (shift <= 0) {
419 /* do nothing */
420 } else if (shift >= 128) {
421 a_hi = 0;
422 a_lo = 0;
423 } else if (shift >= 64) {
424 a_lo = a_hi >> (shift - 64);
425 a_hi = 0;
426 } else {
427 a_lo >>= shift;
428 a_lo |= a_hi << (64 - shift);
429 a_hi >>= shift;
430 }
431
432 *r_hi = a_hi;
433 *r_lo = a_lo;
434}
435
436/**
437 * Bitwise AND on 128-bit operands.
438 *
439 * @param a_hi High part of the first input operand.
440 * @param a_lo Low part of the first input operand.
441 * @param b_hi High part of the second input operand.
442 * @param b_lo Low part of the second input operand.
443 * @param r_hi Address to store high part of the result.
444 * @param r_lo Address to store low part of the result.
445 */
446void and128(
447 uint64_t a_hi, uint64_t a_lo,
448 uint64_t b_hi, uint64_t b_lo,
449 uint64_t *r_hi, uint64_t *r_lo)
450{
451 *r_hi = a_hi & b_hi;
452 *r_lo = a_lo & b_lo;
453}
454
455/**
456 * Bitwise inclusive OR on 128-bit operands.
457 *
458 * @param a_hi High part of the first input operand.
459 * @param a_lo Low part of the first input operand.
460 * @param b_hi High part of the second input operand.
461 * @param b_lo Low part of the second input operand.
462 * @param r_hi Address to store high part of the result.
463 * @param r_lo Address to store low part of the result.
464 */
465void or128(
466 uint64_t a_hi, uint64_t a_lo,
467 uint64_t b_hi, uint64_t b_lo,
468 uint64_t *r_hi, uint64_t *r_lo)
469{
470 *r_hi = a_hi | b_hi;
471 *r_lo = a_lo | b_lo;
472}
473
474/**
475 * Bitwise exclusive OR on 128-bit operands.
476 *
477 * @param a_hi High part of the first input operand.
478 * @param a_lo Low part of the first input operand.
479 * @param b_hi High part of the second input operand.
480 * @param b_lo Low part of the second input operand.
481 * @param r_hi Address to store high part of the result.
482 * @param r_lo Address to store low part of the result.
483 */
484void xor128(
485 uint64_t a_hi, uint64_t a_lo,
486 uint64_t b_hi, uint64_t b_lo,
487 uint64_t *r_hi, uint64_t *r_lo)
488{
489 *r_hi = a_hi ^ b_hi;
490 *r_lo = a_lo ^ b_lo;
491}
492
493/**
494 * Bitwise NOT on the 128-bit operand.
495 *
496 * @param a_hi High part of the input operand.
497 * @param a_lo Low part of the input operand.
498 * @param r_hi Address to store high part of the result.
499 * @param r_lo Address to store low part of the result.
500 */
501void not128(
502 uint64_t a_hi, uint64_t a_lo,
503 uint64_t *r_hi, uint64_t *r_lo)
504{
505 *r_hi = ~a_hi;
506 *r_lo = ~a_lo;
507}
508
509/**
510 * Equality comparison of 128-bit operands.
511 *
512 * @param a_hi High part of the first input operand.
513 * @param a_lo Low part of the first input operand.
514 * @param b_hi High part of the second input operand.
515 * @param b_lo Low part of the second input operand.
516 * @return 1 if operands are equal, 0 otherwise.
517 */
518int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
519{
520 return (a_hi == b_hi) && (a_lo == b_lo);
521}
522
523/**
524 * Lower-or-equal comparison of 128-bit operands.
525 *
526 * @param a_hi High part of the first input operand.
527 * @param a_lo Low part of the first input operand.
528 * @param b_hi High part of the second input operand.
529 * @param b_lo Low part of the second input operand.
530 * @return 1 if a is lower or equal to b, 0 otherwise.
531 */
532int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
533{
534 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo <= b_lo));
535}
536
537/**
538 * Lower-than comparison of 128-bit operands.
539 *
540 * @param a_hi High part of the first input operand.
541 * @param a_lo Low part of the first input operand.
542 * @param b_hi High part of the second input operand.
543 * @param b_lo Low part of the second input operand.
544 * @return 1 if a is lower than b, 0 otherwise.
545 */
546int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
547{
548 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo < b_lo));
549}
550
551/**
552 * Addition of two 128-bit unsigned integers.
553 *
554 * @param a_hi High part of the first input operand.
555 * @param a_lo Low part of the first input operand.
556 * @param b_hi High part of the second input operand.
557 * @param b_lo Low part of the second input operand.
558 * @param r_hi Address to store high part of the result.
559 * @param r_lo Address to store low part of the result.
560 */
561void add128(uint64_t a_hi, uint64_t a_lo,
562 uint64_t b_hi, uint64_t b_lo,
563 uint64_t *r_hi, uint64_t *r_lo)
564{
565 uint64_t low = a_lo + b_lo;
566 *r_lo = low;
567 /* detect overflow to add a carry */
568 *r_hi = a_hi + b_hi + (low < a_lo);
569}
570
571/**
572 * Substraction of two 128-bit unsigned integers.
573 *
574 * @param a_hi High part of the first input operand.
575 * @param a_lo Low part of the first input operand.
576 * @param b_hi High part of the second input operand.
577 * @param b_lo Low part of the second input operand.
578 * @param r_hi Address to store high part of the result.
579 * @param r_lo Address to store low part of the result.
580 */
581void sub128(uint64_t a_hi, uint64_t a_lo,
582 uint64_t b_hi, uint64_t b_lo,
583 uint64_t *r_hi, uint64_t *r_lo)
584{
585 *r_lo = a_lo - b_lo;
586 /* detect underflow to substract a carry */
587 *r_hi = a_hi - b_hi - (a_lo < b_lo);
588}
589
590/**
591 * Multiplication of two 64-bit unsigned integers.
592 *
593 * @param a First input operand.
594 * @param b Second input operand.
595 * @param r_hi Address to store high part of the result.
596 * @param r_lo Address to store low part of the result.
597 */
598void mul64(uint64_t a, uint64_t b, uint64_t *r_hi, uint64_t *r_lo)
599{
600 uint64_t low, high, middle1, middle2;
601 uint32_t alow, blow;
602
603 alow = a & 0xFFFFFFFF;
604 blow = b & 0xFFFFFFFF;
605
606 a >>= 32;
607 b >>= 32;
608
609 low = ((uint64_t) alow) * blow;
610 middle1 = a * blow;
611 middle2 = alow * b;
612 high = a * b;
613
614 middle1 += middle2;
615 high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
616 middle1 <<= 32;
617 low += middle1;
618 high += (low < middle1);
619 *r_lo = low;
620 *r_hi = high;
621}
622
623/**
624 * Multiplication of two 128-bit unsigned integers.
625 *
626 * @param a_hi High part of the first input operand.
627 * @param a_lo Low part of the first input operand.
628 * @param b_hi High part of the second input operand.
629 * @param b_lo Low part of the second input operand.
630 * @param r_hihi Address to store first (highest) quarter of the result.
631 * @param r_hilo Address to store second quarter of the result.
632 * @param r_lohi Address to store third quarter of the result.
633 * @param r_lolo Address to store fourth (lowest) quarter of the result.
634 */
635void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
636 uint64_t *r_hihi, uint64_t *r_hilo, uint64_t *r_lohi, uint64_t *r_lolo)
637{
638 uint64_t hihi, hilo, lohi, lolo;
639 uint64_t tmp1, tmp2;
640
641 mul64(a_lo, b_lo, &lohi, &lolo);
642 mul64(a_lo, b_hi, &hilo, &tmp2);
643 add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
644 mul64(a_hi, b_hi, &hihi, &tmp1);
645 add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
646 mul64(a_hi, b_lo, &tmp1, &tmp2);
647 add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
648 add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
649
650 *r_hihi = hihi;
651 *r_hilo = hilo;
652 *r_lohi = lohi;
653 *r_lolo = lolo;
654}
655
656/**
657 * Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
658 * divisor.
659 *
660 * @param a_hi High part of the divident.
661 * @param a_lo Low part of the divident.
662 * @param b Divisor.
663 * @return Quotient approximation.
664 */
665uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
666{
667 uint64_t b_hi, b_lo;
668 uint64_t rem_hi, rem_lo;
669 uint64_t tmp_hi, tmp_lo;
670 uint64_t result;
671
672 if (b <= a_hi) {
673 return 0xFFFFFFFFFFFFFFFFull;
674 }
675
676 b_hi = b >> 32;
677 result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
678 mul64(b, result, &tmp_hi, &tmp_lo);
679 sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
680
681 while ((int64_t) rem_hi < 0) {
682 result -= 0x1ll << 32;
683 b_lo = b << 32;
684 add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
685 }
686
687 rem_hi = (rem_hi << 32) | (rem_lo >> 32);
688 if ((b_hi << 32) <= rem_hi) {
689 result |= 0xFFFFFFFF;
690 } else {
691 result |= rem_hi / b_hi;
692 }
693
694 return result;
695}
696
697/** @}
698 */
Note: See TracBrowser for help on using the repository browser.