Index: uspace/lib/softfloat/generic/common.c
===================================================================
--- uspace/lib/softfloat/generic/common.c	(revision 750636ac4a65360c44846b7681af6ae46854dd3a)
+++ uspace/lib/softfloat/generic/common.c	(revision 4863bb52d2de7a13a21dab7be2475975a848d49f)
@@ -1,4 +1,5 @@
 /*
  * Copyright (c) 2005 Josef Cejka
+ * Copyright (c) 2011 Petr Koupy
  * All rights reserved.
  *
@@ -30,5 +31,5 @@
  * @{
  */
-/** @file
+/** @file Common helper operations.
  */
 
@@ -36,5 +37,5 @@
 #include <common.h>
 
-/* Table for fast leading zeroes counting */
+/* Table for fast leading zeroes counting. */
 char zeroTable[256] = {
 	8, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, \
@@ -56,11 +57,12 @@
 };
 
-
-
-/** Take fraction shifted by 10 bits to left, round it, normalize it and detect exceptions
- * @param cexp exponent with bias
- * @param cfrac fraction shifted 10 places left with added hidden bit
- * @param sign
- * @return valied float64
+/** 
+ * Take fraction shifted by 10 bits to the left, round it, normalize it
+ * and detect exceptions
+ * 
+ * @param cexp Exponent with bias.
+ * @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
+ * @param sign Resulting sign.
+ * @return Finished double-precision float.
  */
 float64 finishFloat64(int32_t cexp, uint64_t cfrac, char sign)
@@ -71,11 +73,13 @@
 
 	/* find first nonzero digit and shift result and detect possibly underflow */
-	while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ) )))) {
+	while ((cexp > 0) && (cfrac) &&
+	    (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
 		cexp--; 
 		cfrac <<= 1;
-			/* TODO: fix underflow */
-	};
-	
-	if ((cexp < 0) || ( cexp == 0 && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
+		/* TODO: fix underflow */
+	}
+	
+	if ((cexp < 0) || (cexp == 0 &&
+	    (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
 		/* FIXME: underflow */
 		result.parts.exp = 0;
@@ -93,6 +97,6 @@
 		
 		if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
-			
-			result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2) ) & (~FLOAT64_HIDDEN_BIT_MASK)); 
+			result.parts.fraction =
+			    ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
 			return result;
 		}	
@@ -103,5 +107,5 @@
 	++cexp;
 
-	if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ))) {
+	if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
 		++cexp;
 		cfrac >>= 1;
@@ -109,5 +113,5 @@
 
 	/* check overflow */
-	if (cexp >= FLOAT64_MAX_EXPONENT ) {
+	if (cexp >= FLOAT64_MAX_EXPONENT) {
 		/* FIXME: overflow, return infinity */
 		result.parts.exp = FLOAT64_MAX_EXPONENT;
@@ -116,19 +120,156 @@
 	}
 
-	result.parts.exp = (uint32_t)cexp;
-	
-	result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2 ) ) & (~FLOAT64_HIDDEN_BIT_MASK)); 
+	result.parts.exp = (uint32_t) cexp;
+	
+	result.parts.fraction = 
+	    ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
 	
 	return result;	
 }
 
-/** Counts leading zeroes in 64bit unsigned integer
- * @param i 
+/**
+ * Take fraction, round it, normalize it and detect exceptions
+ * 
+ * @param cexp Exponent with bias.
+ * @param cfrac_hi High part of the fraction shifted 14 bits to the left
+ *     with added hidden bit.
+ * @param cfrac_lo Low part of the fraction shifted 14 bits to the left
+ *     with added hidden bit.
+ * @param sign Resulting sign.
+ * @param shift_out Bits right-shifted out from fraction by the caller.
+ * @return Finished quadruple-precision float.
+ */
+float128 finishFloat128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo, 
+    char sign, uint64_t shift_out)
+{
+	float128 result;
+	uint64_t tmp_hi, tmp_lo;
+
+	result.parts.sign = sign;
+
+	/* find first nonzero digit and shift result and detect possibly underflow */
+	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
+	    1, &tmp_hi, &tmp_lo);
+	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
+	while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
+	    (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
+		cexp--;
+		lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
+		/* TODO: fix underflow */
+
+		lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
+		    1, &tmp_hi, &tmp_lo);
+		and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
+	}
+
+	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
+	    1, &tmp_hi, &tmp_lo);
+	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
+	if ((cexp < 0) || (cexp == 0 &&
+	    (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
+		/* FIXME: underflow */
+		result.parts.exp = 0;
+		if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
+			result.parts.frac_hi = 0x0ll;
+			result.parts.frac_lo = 0x0ll;
+			return result;
+		}
+
+		while (cexp < 0) {
+			cexp++;
+			rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
+		}
+
+		if (shift_out & (0x1ull < 64)) {
+			add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
+		}
+
+		lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
+		    1, &tmp_hi, &tmp_lo);
+		and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
+		if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
+			not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
+			    &tmp_hi, &tmp_lo);
+			and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
+			result.parts.frac_hi = tmp_hi;
+			result.parts.frac_lo = tmp_lo;
+			return result;
+		}
+	} else {
+		if (shift_out & (0x1ull < 64)) {
+			add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
+		}
+	}
+
+	++cexp;
+
+	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
+	    1, &tmp_hi, &tmp_lo);
+	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
+	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
+		++cexp;
+		rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
+	}
+
+	/* check overflow */
+	if (cexp >= FLOAT128_MAX_EXPONENT) {
+		/* FIXME: overflow, return infinity */
+		result.parts.exp = FLOAT128_MAX_EXPONENT;
+		result.parts.frac_hi = 0x0ll;
+		result.parts.frac_lo = 0x0ll;
+		return result;
+	}
+
+	result.parts.exp = (uint32_t) cexp;
+
+	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
+	    &tmp_hi, &tmp_lo);
+	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
+	result.parts.frac_hi = tmp_hi;
+	result.parts.frac_lo = tmp_lo;
+
+	return result;	
+}
+
+/**
+ * Counts leading zeroes in byte.
+ *
+ * @param i Byte for which to count leading zeroes.
+ * @return Number of detected leading zeroes.
+ */
+int countZeroes8(uint8_t i)
+{
+	return zeroTable[i];
+}
+
+/** 
+ * Counts leading zeroes in 32bit unsigned integer.
+ *
+ * @param i Integer for which to count leading zeroes.
+ * @return Number of detected leading zeroes.
+ */
+int countZeroes32(uint32_t i)
+{
+	int j;
+	for (j = 0; j < 32; j += 8) {
+		if (i & (0xFF << (24 - j))) {
+			return (j + countZeroes8(i >> (24 - j)));
+		}
+	}
+
+	return 32;
+}
+
+/**
+ * Counts leading zeroes in 64bit unsigned integer.
+ *
+ * @param i Integer for which to count leading zeroes.
+ * @return Number of detected leading zeroes.
  */
 int countZeroes64(uint64_t i)
 {
 	int j;
-	for (j =0; j < 64; j += 8) {
-		if ( i & (0xFFll << (56 - j))) {
+	for (j = 0; j < 64; j += 8) {
+		if (i & (0xFFll << (56 - j))) {
 			return (j + countZeroes8(i >> (56 - j)));
 		}
@@ -138,75 +279,410 @@
 }
 
-/** Counts leading zeroes in 32bit unsigned integer
- * @param i 
- */
-int countZeroes32(uint32_t i)
-{
-	int j;
-	for (j =0; j < 32; j += 8) {
-		if ( i & (0xFF << (24 - j))) {
-			return (j + countZeroes8(i >> (24 - j)));
-		}
-	}
-
-	return 32;
-}
-
-/** Counts leading zeroes in byte
- * @param i 
- */
-int countZeroes8(uint8_t i)
-{
-	return zeroTable[i];
-}
-
-/** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 30. bit
- * @param exp exponent 
- * @param fraction part with hidden bit shifted to 30. bit
+/**
+ * Round and normalize number expressed by exponent and fraction with
+ * first bit (equal to hidden bit) at 30th bit.
+ *
+ * @param exp Exponent part.
+ * @param fraction Fraction with hidden bit shifted to 30th bit.
  */
 void roundFloat32(int32_t *exp, uint32_t *fraction)
 {
 	/* rounding - if first bit after fraction is set then round up */
-	(*fraction) += (0x1 << 6);
-	
-	if ((*fraction) & (FLOAT32_HIDDEN_BIT_MASK << 8)) { 
+	(*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
+	
+	if ((*fraction) & 
+	    (FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
 		/* rounding overflow */
 		++(*exp);
 		(*fraction) >>= 1;
-	};
-	
-	if (((*exp) >= FLOAT32_MAX_EXPONENT ) || ((*exp) < 0)) {
+	}
+	
+	if (((*exp) >= FLOAT32_MAX_EXPONENT) || ((*exp) < 0)) {
 		/* overflow - set infinity as result */
 		(*exp) = FLOAT32_MAX_EXPONENT;
 		(*fraction) = 0;
-		return;
-	}
-
-	return;
-}
-
-/** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 62. bit
- * @param exp exponent 
- * @param fraction part with hidden bit shifted to 62. bit
+	}
+}
+
+/**
+ * Round and normalize number expressed by exponent and fraction with
+ * first bit (equal to hidden bit) at 62nd bit.
+ *
+ * @param exp Exponent part.
+ * @param fraction Fraction with hidden bit shifted to 62nd bit.
  */
 void roundFloat64(int32_t *exp, uint64_t *fraction)
 {
 	/* rounding - if first bit after fraction is set then round up */
-	(*fraction) += (0x1 << 9);
-	
-	if ((*fraction) & (FLOAT64_HIDDEN_BIT_MASK << 11)) { 
+	(*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
+	
+	if ((*fraction) & 
+	    (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 3))) {
 		/* rounding overflow */
 		++(*exp);
 		(*fraction) >>= 1;
-	};
-	
-	if (((*exp) >= FLOAT64_MAX_EXPONENT ) || ((*exp) < 0)) {
+	}
+	
+	if (((*exp) >= FLOAT64_MAX_EXPONENT) || ((*exp) < 0)) {
 		/* overflow - set infinity as result */
 		(*exp) = FLOAT64_MAX_EXPONENT;
 		(*fraction) = 0;
-		return;
-	}
-
-	return;
+	}
+}
+
+/**
+ * Round and normalize number expressed by exponent and fraction with
+ * first bit (equal to hidden bit) at 126th bit.
+ *
+ * @param exp Exponent part.
+ * @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
+ * @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
+ */
+void roundFloat128(int32_t *exp, uint64_t *frac_hi, uint64_t *frac_lo)
+{
+	uint64_t tmp_hi, tmp_lo;
+
+	/* rounding - if first bit after fraction is set then round up */
+	lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
+	add128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
+
+	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
+	    (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
+	and128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
+	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
+		/* rounding overflow */
+		++(*exp);
+		rshift128(*frac_hi, *frac_lo, 1, frac_hi, frac_lo);
+	}
+
+	if (((*exp) >= FLOAT128_MAX_EXPONENT) || ((*exp) < 0)) {
+		/* overflow - set infinity as result */
+		(*exp) = FLOAT128_MAX_EXPONENT;
+		(*frac_hi) = 0;
+		(*frac_lo) = 0;
+	}
+}
+
+/**
+ * Logical shift left on the 128-bit operand.
+ *
+ * @param a_hi High part of the input operand.
+ * @param a_lo Low part of the input operand.
+ * @param shift Number of bits by witch to shift.
+ * @param r_hi Address to store high part of the result.
+ * @param r_lo Address to store low part of the result.
+ */
+void lshift128(
+    uint64_t a_hi, uint64_t a_lo, int shift,
+    uint64_t *r_hi, uint64_t *r_lo)
+{
+	if (shift <= 0) {
+		/* do nothing */
+	} else if (shift >= 128) {
+		a_hi = 0;
+		a_lo = 0;
+	} else if (shift >= 64) {
+		a_hi = a_lo << (shift - 64);
+		a_lo = 0;
+	} else {
+		a_hi <<= shift;
+		a_hi |= a_lo >> (64 - shift);
+		a_lo <<= shift;
+	}
+
+	*r_hi = a_hi;
+	*r_lo = a_lo;
+}
+
+/**
+ * Logical shift right on the 128-bit operand.
+ *
+ * @param a_hi High part of the input operand.
+ * @param a_lo Low part of the input operand.
+ * @param shift Number of bits by witch to shift.
+ * @param r_hi Address to store high part of the result.
+ * @param r_lo Address to store low part of the result.
+ */
+void rshift128(
+    uint64_t a_hi, uint64_t a_lo, int shift,
+    uint64_t *r_hi, uint64_t *r_lo)
+{
+	if (shift <= 0) {
+		/* do nothing */
+	} else 	if (shift >= 128) {
+		a_hi = 0;
+		a_lo = 0;
+	} else if (shift >= 64) {
+		a_lo = a_hi >> (shift - 64);
+		a_hi = 0;
+	} else {
+		a_lo >>= shift;
+		a_lo |= a_hi << (64 - shift);
+		a_hi >>= shift;
+	}
+
+	*r_hi = a_hi;
+	*r_lo = a_lo;
+}
+
+/**
+ * Bitwise AND on 128-bit operands.
+ *
+ * @param a_hi High part of the first input operand.
+ * @param a_lo Low part of the first input operand.
+ * @param b_hi High part of the second input operand.
+ * @param b_lo Low part of the second input operand.
+ * @param r_hi Address to store high part of the result.
+ * @param r_lo Address to store low part of the result.
+ */
+void and128(
+    uint64_t a_hi, uint64_t a_lo,
+    uint64_t b_hi, uint64_t b_lo,
+    uint64_t *r_hi, uint64_t *r_lo)
+{
+	*r_hi = a_hi & b_hi;
+	*r_lo = a_lo & b_lo;
+}
+
+/**
+ * Bitwise inclusive OR on 128-bit operands.
+ *
+ * @param a_hi High part of the first input operand.
+ * @param a_lo Low part of the first input operand.
+ * @param b_hi High part of the second input operand.
+ * @param b_lo Low part of the second input operand.
+ * @param r_hi Address to store high part of the result.
+ * @param r_lo Address to store low part of the result.
+ */
+void or128(
+    uint64_t a_hi, uint64_t a_lo,
+    uint64_t b_hi, uint64_t b_lo,
+    uint64_t *r_hi, uint64_t *r_lo)
+{
+	*r_hi = a_hi | b_hi;
+	*r_lo = a_lo | b_lo;
+}
+
+/**
+ * Bitwise exclusive OR on 128-bit operands.
+ *
+ * @param a_hi High part of the first input operand.
+ * @param a_lo Low part of the first input operand.
+ * @param b_hi High part of the second input operand.
+ * @param b_lo Low part of the second input operand.
+ * @param r_hi Address to store high part of the result.
+ * @param r_lo Address to store low part of the result.
+ */
+void xor128(
+    uint64_t a_hi, uint64_t a_lo,
+    uint64_t b_hi, uint64_t b_lo,
+    uint64_t *r_hi, uint64_t *r_lo)
+{
+	*r_hi = a_hi ^ b_hi;
+	*r_lo = a_lo ^ b_lo;
+}
+
+/**
+ * Bitwise NOT on the 128-bit operand.
+ *
+ * @param a_hi High part of the input operand.
+ * @param a_lo Low part of the input operand.
+ * @param r_hi Address to store high part of the result.
+ * @param r_lo Address to store low part of the result.
+ */
+void not128(
+    uint64_t a_hi, uint64_t a_lo,
+	uint64_t *r_hi, uint64_t *r_lo)
+{
+	*r_hi = ~a_hi;
+	*r_lo = ~a_lo;
+}
+
+/**
+ * Equality comparison of 128-bit operands.
+ *
+ * @param a_hi High part of the first input operand.
+ * @param a_lo Low part of the first input operand.
+ * @param b_hi High part of the second input operand.
+ * @param b_lo Low part of the second input operand.
+ * @return 1 if operands are equal, 0 otherwise.
+ */
+int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
+{
+	return (a_hi == b_hi) && (a_lo == b_lo);
+}
+
+/**
+ * Lower-or-equal comparison of 128-bit operands.
+ *
+ * @param a_hi High part of the first input operand.
+ * @param a_lo Low part of the first input operand.
+ * @param b_hi High part of the second input operand.
+ * @param b_lo Low part of the second input operand.
+ * @return 1 if a is lower or equal to b, 0 otherwise.
+ */
+int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
+{
+	return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo <= b_lo));
+}
+
+/**
+ * Lower-than comparison of 128-bit operands.
+ *
+ * @param a_hi High part of the first input operand.
+ * @param a_lo Low part of the first input operand.
+ * @param b_hi High part of the second input operand.
+ * @param b_lo Low part of the second input operand.
+ * @return 1 if a is lower than b, 0 otherwise.
+ */
+int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
+{
+	return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo < b_lo));
+}
+
+/**
+ * Addition of two 128-bit unsigned integers.
+ *
+ * @param a_hi High part of the first input operand.
+ * @param a_lo Low part of the first input operand.
+ * @param b_hi High part of the second input operand.
+ * @param b_lo Low part of the second input operand.
+ * @param r_hi Address to store high part of the result.
+ * @param r_lo Address to store low part of the result.
+ */
+void add128(uint64_t a_hi, uint64_t a_lo,
+    uint64_t b_hi, uint64_t b_lo,
+    uint64_t *r_hi, uint64_t *r_lo)
+{
+	uint64_t low = a_lo + b_lo;
+	*r_lo = low;
+	/* detect overflow to add a carry */
+	*r_hi = a_hi + b_hi + (low < a_lo);
+}
+
+/**
+ * Substraction of two 128-bit unsigned integers.
+ *
+ * @param a_hi High part of the first input operand.
+ * @param a_lo Low part of the first input operand.
+ * @param b_hi High part of the second input operand.
+ * @param b_lo Low part of the second input operand.
+ * @param r_hi Address to store high part of the result.
+ * @param r_lo Address to store low part of the result.
+ */
+void sub128(uint64_t a_hi, uint64_t a_lo,
+    uint64_t b_hi, uint64_t b_lo,
+    uint64_t *r_hi, uint64_t *r_lo)
+{
+	*r_lo = a_lo - b_lo;
+	/* detect underflow to substract a carry */
+	*r_hi = a_hi - b_hi - (a_lo < b_lo);
+}
+
+/**
+ * Multiplication of two 64-bit unsigned integers.
+ * 
+ * @param a First input operand.
+ * @param b Second input operand.
+ * @param r_hi Address to store high part of the result.
+ * @param r_lo Address to store low part of the result.
+ */
+void mul64(uint64_t a, uint64_t b, uint64_t *r_hi, uint64_t *r_lo)
+{
+	uint64_t low, high, middle1, middle2;
+	uint32_t alow, blow;
+
+	alow = a & 0xFFFFFFFF;
+	blow = b & 0xFFFFFFFF;
+
+	a >>= 32;
+	b >>= 32;
+
+	low = ((uint64_t) alow) * blow;
+	middle1 = a * blow;
+	middle2 = alow * b;
+	high = a * b;
+
+	middle1 += middle2;
+	high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
+	middle1 <<= 32;
+	low += middle1;
+	high += (low < middle1);
+	*r_lo = low;
+	*r_hi = high;
+}
+
+/**
+ * Multiplication of two 128-bit unsigned integers.
+ * 
+ * @param a_hi High part of the first input operand.
+ * @param a_lo Low part of the first input operand.
+ * @param b_hi High part of the second input operand.
+ * @param b_lo Low part of the second input operand.
+ * @param r_hihi Address to store first (highest) quarter of the result.
+ * @param r_hilo Address to store second quarter of the result.
+ * @param r_lohi Address to store third quarter of the result.
+ * @param r_lolo Address to store fourth (lowest) quarter of the result.
+ */
+void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
+    uint64_t *r_hihi, uint64_t *r_hilo, uint64_t *r_lohi, uint64_t *r_lolo)
+{
+	uint64_t hihi, hilo, lohi, lolo;
+	uint64_t tmp1, tmp2;
+
+	mul64(a_lo, b_lo, &lohi, &lolo);
+	mul64(a_lo, b_hi, &hilo, &tmp2);
+	add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
+	mul64(a_hi, b_hi, &hihi, &tmp1);
+	add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
+	mul64(a_hi, b_lo, &tmp1, &tmp2);
+	add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
+	add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
+
+	*r_hihi = hihi;
+	*r_hilo = hilo;
+	*r_lohi = lohi;
+	*r_lolo = lolo;
+}
+
+/**
+ * Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
+ * divisor.
+ * 
+ * @param a_hi High part of the divident.
+ * @param a_lo Low part of the divident.
+ * @param b Divisor.
+ * @return Quotient approximation.
+ */
+uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
+{
+	uint64_t b_hi, b_lo;
+	uint64_t rem_hi, rem_lo;
+	uint64_t tmp_hi, tmp_lo;
+	uint64_t result;
+
+	if (b <= a_hi) {
+		return 0xFFFFFFFFFFFFFFFFull;
+	}
+
+	b_hi = b >> 32;
+	result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
+	mul64(b, result, &tmp_hi, &tmp_lo);
+	sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
+	
+	while ((int64_t) rem_hi < 0) {
+		result -= 0x1ll << 32;
+		b_lo = b << 32;
+		add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
+	}
+
+	rem_hi = (rem_hi << 32) | (rem_lo >> 32);
+	if ((b_hi << 32) <= rem_hi) {
+		result |= 0xFFFFFFFF;
+	} else {
+		result |= rem_hi / b_hi;
+	}
+
+	return result;
 }
 
