summaryrefslogtreecommitdiff
path: root/Userland
diff options
context:
space:
mode:
authorDexesTTP <dexes.ttp@gmail.com>2021-05-12 22:47:07 +0200
committerLinus Groh <mail@linusgroh.de>2021-05-13 19:18:07 +0100
commit485adb5e29853cb54a3dc4a314f37e7a5528f57c (patch)
tree32646a81e9541bd028bc43a62b5c6155abfe02a3 /Userland
parent5071989545a7d60a62eca2928d4859e657c7c10c (diff)
downloadserenity-485adb5e29853cb54a3dc4a314f37e7a5528f57c.zip
LibCrypto: Add the montgomery modular power algorithm
This algorithm allows for much faster computations of modular powers (around a 5x-10x speedup of the Crypto test). However, it is only valid for odd modulo values, and therefore the old algorithm must be kept for computations involving even modulo values.
Diffstat (limited to 'Userland')
-rw-r--r--Userland/Libraries/LibCrypto/BigInt/Algorithms/BitwiseOperations.cpp13
-rw-r--r--Userland/Libraries/LibCrypto/BigInt/Algorithms/ModularPower.cpp232
-rw-r--r--Userland/Libraries/LibCrypto/BigInt/Algorithms/UnsignedBigIntegerAlgorithms.h6
-rw-r--r--Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.h1
-rw-r--r--Userland/Libraries/LibCrypto/NumberTheory/ModularFunctions.cpp14
5 files changed, 264 insertions, 2 deletions
diff --git a/Userland/Libraries/LibCrypto/BigInt/Algorithms/BitwiseOperations.cpp b/Userland/Libraries/LibCrypto/BigInt/Algorithms/BitwiseOperations.cpp
index e7f6c0bff7..dd0290fce0 100644
--- a/Userland/Libraries/LibCrypto/BigInt/Algorithms/BitwiseOperations.cpp
+++ b/Userland/Libraries/LibCrypto/BigInt/Algorithms/BitwiseOperations.cpp
@@ -203,7 +203,7 @@ FLATTEN void UnsignedBigIntegerAlgorithms::shift_left_without_allocation(
}
}
-ALWAYS_INLINE void UnsignedBigIntegerAlgorithms::shift_left_by_n_words(
+void UnsignedBigIntegerAlgorithms::shift_left_by_n_words(
UnsignedBigInteger const& number,
size_t number_of_words,
UnsignedBigInteger& output)
@@ -216,6 +216,17 @@ ALWAYS_INLINE void UnsignedBigIntegerAlgorithms::shift_left_by_n_words(
__builtin_memcpy(&output.m_words.data()[number_of_words], number.m_words.data(), number.m_words.size() * sizeof(unsigned));
}
+void UnsignedBigIntegerAlgorithms::shift_right_by_n_words(
+ UnsignedBigInteger const& number,
+ size_t number_of_words,
+ UnsignedBigInteger& output)
+{
+ // shifting right by N words means just not copying the first words
+ output.set_to_0();
+ output.m_words.resize_and_keep_capacity(number.length() - number_of_words);
+ __builtin_memcpy(output.m_words.data(), &number.m_words.data()[number_of_words], (number.m_words.size() - number_of_words) * sizeof(unsigned));
+}
+
/**
* Returns the word at a requested index in the result of a shift operation
*/
diff --git a/Userland/Libraries/LibCrypto/BigInt/Algorithms/ModularPower.cpp b/Userland/Libraries/LibCrypto/BigInt/Algorithms/ModularPower.cpp
index a990313fe0..15dca5a721 100644
--- a/Userland/Libraries/LibCrypto/BigInt/Algorithms/ModularPower.cpp
+++ b/Userland/Libraries/LibCrypto/BigInt/Algorithms/ModularPower.cpp
@@ -49,4 +49,236 @@ void UnsignedBigIntegerAlgorithms::destructive_modular_power_without_allocation(
}
}
+/**
+ * Compute (1/value) % 2^32.
+ * This needs an odd input value
+ * Algorithm from: Dumas, J.G. "On Newton–Raphson Iteration for Multiplicative Inverses Modulo Prime Powers".
+ */
+ALWAYS_INLINE static u32 inverse_wrapped(u32 value)
+{
+ VERIFY(value & 1);
+
+ i64 b = static_cast<i64>(value);
+ i64 k0 = (2 - b);
+ i64 t = (b - 1);
+ size_t i = 1;
+ while (i < 32) {
+ t = t * t;
+ k0 = k0 * (t + 1);
+ i <<= 1;
+ }
+ return static_cast<u32>(-k0);
+}
+
+/**
+ * Computes z = x * y + c. z_carry contains the top bits, z contains the bottom bits.
+ */
+ALWAYS_INLINE static void linear_multiplication_with_carry(u32 x, u32 y, u32 c, u32& z_carry, u32& z)
+{
+ u64 result = static_cast<u64>(x) * static_cast<u64>(y) + static_cast<u64>(c);
+ z_carry = static_cast<u32>(result >> 32);
+ z = static_cast<u32>(result);
+}
+
+/**
+ * Computes z = a + b. z_carry contains the top bit (1 or 0), z contains the bottom bits.
+ */
+ALWAYS_INLINE static void addition_with_carry(u32 a, u32 b, u32& z_carry, u32& z)
+{
+ u64 result = static_cast<u64>(a) + static_cast<u64>(b);
+ z_carry = static_cast<u32>(result >> 32);
+ z = static_cast<u32>(result);
+}
+
+/**
+ * Computes a montgomery "fragment" for y_i. This computes "z[i] += x[i] * y_i" for all words while rippling the carry, and returns the carry.
+ * Algorithm from: Gueron, "Efficient Software Implementations of Modular Exponentiation". (https://eprint.iacr.org/2011/239.pdf)
+ */
+UnsignedBigInteger::Word UnsignedBigIntegerAlgorithms::montgomery_fragment(UnsignedBigInteger& z, size_t offset_in_z, UnsignedBigInteger const& x, UnsignedBigInteger::Word y_digit, size_t num_words)
+{
+ UnsignedBigInteger::Word carry { 0 };
+ for (size_t i = 0; i < num_words; ++i) {
+ UnsignedBigInteger::Word a_carry;
+ UnsignedBigInteger::Word a;
+ linear_multiplication_with_carry(x.m_words[i], y_digit, z.m_words[offset_in_z + i], a_carry, a);
+ UnsignedBigInteger::Word b_carry;
+ UnsignedBigInteger::Word b;
+ addition_with_carry(a, carry, b_carry, b);
+ z.m_words[offset_in_z + i] = b;
+ carry = a_carry + b_carry;
+ }
+ return carry;
+}
+
+/**
+ * Computes the "almost montgomery" product : x * y * 2 ^ (-num_words * BITS_IN_WORD) % modulo
+ * [Note : that means that the result z satisfies z * 2^(num_words * BITS_IN_WORD) % modulo = x * y % modulo]
+ * assuming :
+ * - x, y and modulo are all already padded to num_words
+ * - k = inverse_wrapped(modulo) (optimization to not recompute K each time)
+ * Algorithm from: Gueron, "Efficient Software Implementations of Modular Exponentiation". (https://eprint.iacr.org/2011/239.pdf)
+ */
+void UnsignedBigIntegerAlgorithms::almost_montgomery_multiplication_without_allocation(
+ UnsignedBigInteger const& x,
+ UnsignedBigInteger const& y,
+ UnsignedBigInteger const& modulo,
+ UnsignedBigInteger& z,
+ UnsignedBigInteger::Word k,
+ size_t num_words,
+ UnsignedBigInteger& result)
+{
+ VERIFY(x.length() >= num_words);
+ VERIFY(y.length() >= num_words);
+ VERIFY(modulo.length() >= num_words);
+
+ z.set_to(0);
+ z.resize_with_leading_zeros(num_words * 2);
+
+ UnsignedBigInteger::Word previous_double_carry { 0 };
+ for (size_t i = 0; i < num_words; ++i) {
+ // z[i->num_words+i] += x * y_i
+ UnsignedBigInteger::Word carry_1 = montgomery_fragment(z, i, x, y.m_words[i], num_words);
+ // z[i->num_words+i] += modulo * (z_i * k)
+ UnsignedBigInteger::Word t = z.m_words[i] * k;
+ UnsignedBigInteger::Word carry_2 = montgomery_fragment(z, i, modulo, t, num_words);
+
+ // Compute the carry by combining all of the carrys of the previous computations
+ // Put it "right after" the range that we computed above
+ UnsignedBigInteger::Word temp_carry = previous_double_carry + carry_1;
+ UnsignedBigInteger::Word overall_carry = temp_carry + carry_2;
+ z.m_words[num_words + i] = overall_carry;
+
+ // Detect if there was a "double carry" for this word by checking if our carry results are smaller than their components
+ previous_double_carry = (temp_carry < carry_1 || overall_carry < carry_2) ? 1 : 0;
+ }
+
+ if (previous_double_carry == 0) {
+ // Return the top num_words bytes of Z, which contains our result.
+ shift_right_by_n_words(z, num_words, result);
+ result.resize_with_leading_zeros(num_words);
+ return;
+ }
+
+ // We have a carry, so we're "one bigger" than we need to be.
+ // Subtract the modulo from the result (the top half of z), and write it to the bottom half of Z since we have space.
+ // (With carry, of course.)
+ UnsignedBigInteger::Word c { 0 };
+ for (size_t i = 0; i < num_words; ++i) {
+ UnsignedBigInteger::Word z_digit = z.m_words[num_words + i];
+ UnsignedBigInteger::Word modulo_digit = modulo.m_words[i];
+ UnsignedBigInteger::Word new_z_digit = z_digit - modulo_digit - c;
+ z.m_words[i] = new_z_digit;
+ // Detect if the subtraction underflowed - from "Hacker's Delight"
+ c = ((modulo_digit & ~z_digit) | ((modulo_digit | ~z_digit) & new_z_digit)) >> (UnsignedBigInteger::BITS_IN_WORD - 1);
+ }
+
+ // Return the bottom num_words bytes of Z (with the carry bit handled)
+ z.m_words.resize(num_words);
+ result.set_to(z);
+ result.resize_with_leading_zeros(num_words);
+}
+
+/**
+ * Complexity: still O(N^3) with N the number of words in the largest word, but less complex than the classical mod power.
+ * Note: the montgomery multiplications requires an inverse modulo over 2^32, which is only defined for odd numbers.
+ */
+void UnsignedBigIntegerAlgorithms::montgomery_modular_power_with_minimal_allocations(
+ UnsignedBigInteger const& base,
+ UnsignedBigInteger const& exponent,
+ UnsignedBigInteger const& modulo,
+ UnsignedBigInteger& temp_z,
+ UnsignedBigInteger& rr,
+ UnsignedBigInteger& one,
+ UnsignedBigInteger& z,
+ UnsignedBigInteger& zz,
+ UnsignedBigInteger& x,
+ UnsignedBigInteger& temp_extra,
+ UnsignedBigInteger& result)
+{
+ VERIFY(modulo.is_odd());
+
+ // Note: While this is a constexpr variable for clarity and could be changed in theory,
+ // various optimized parts of the algorithm rely on this value being exactly 4.
+ constexpr size_t window_size = 4;
+
+ size_t num_words = modulo.trimmed_length();
+ UnsignedBigInteger::Word k = inverse_wrapped(modulo.m_words[0]);
+
+ one.set_to(1);
+
+ // rr = ( 2 ^ (2 * modulo.length() * BITS_IN_WORD) ) % modulo
+ shift_left_by_n_words(one, 2 * num_words, x);
+ divide_without_allocation(x, modulo, temp_z, one, z, zz, temp_extra, rr);
+ rr.resize_with_leading_zeros(num_words);
+
+ // x = base [% modulo, if x doesn't already fit in modulo's words]
+ x.set_to(base);
+ if (x.trimmed_length() > num_words)
+ divide_without_allocation(base, modulo, temp_z, one, z, zz, temp_extra, x);
+ x.resize_with_leading_zeros(num_words);
+
+ one.set_to(1);
+ one.resize_with_leading_zeros(num_words);
+
+ // Compute the montgomery powers from 0 to 2^window_size. powers[i] = x^i
+ UnsignedBigInteger powers[1 << window_size];
+ almost_montgomery_multiplication_without_allocation(one, rr, modulo, temp_z, k, num_words, powers[0]);
+ almost_montgomery_multiplication_without_allocation(x, rr, modulo, temp_z, k, num_words, powers[1]);
+ for (size_t i = 2; i < (1 << window_size); ++i)
+ almost_montgomery_multiplication_without_allocation(powers[i - 1], powers[1], modulo, temp_z, k, num_words, powers[i]);
+
+ z.set_to(powers[0]);
+ z.resize_with_leading_zeros(num_words);
+ zz.set_to(0);
+ zz.resize_with_leading_zeros(num_words);
+
+ ssize_t exponent_length = exponent.trimmed_length();
+ for (ssize_t word_in_exponent = exponent_length - 1; word_in_exponent >= 0; --word_in_exponent) {
+ UnsignedBigInteger::Word exponent_word = exponent.m_words[word_in_exponent];
+ size_t bit_in_word = 0;
+ while (bit_in_word < UnsignedBigInteger::BITS_IN_WORD) {
+ if (word_in_exponent != exponent_length - 1 || bit_in_word != 0) {
+ almost_montgomery_multiplication_without_allocation(z, z, modulo, temp_z, k, num_words, zz);
+ almost_montgomery_multiplication_without_allocation(zz, zz, modulo, temp_z, k, num_words, z);
+ almost_montgomery_multiplication_without_allocation(z, z, modulo, temp_z, k, num_words, zz);
+ almost_montgomery_multiplication_without_allocation(zz, zz, modulo, temp_z, k, num_words, z);
+ }
+ auto power_index = exponent_word >> (UnsignedBigInteger::BITS_IN_WORD - window_size);
+ auto& power = powers[power_index];
+ almost_montgomery_multiplication_without_allocation(z, power, modulo, temp_z, k, num_words, zz);
+
+ swap(z, zz);
+
+ // Move to the next window
+ exponent_word <<= window_size;
+ bit_in_word += window_size;
+ }
+ }
+
+ almost_montgomery_multiplication_without_allocation(z, one, modulo, temp_z, k, num_words, zz);
+
+ if (zz < modulo) {
+ result.set_to(zz);
+ result.clamp_to_trimmed_length();
+ return;
+ }
+
+ // Note : Since we were using "almost montgomery" multiplications, we aren't guaranteed to be under the modulo already.
+ // So, if we're here, we need to respect the modulo.
+ // We can, however, start by trying to subtract the modulo, just in case we're close.
+ subtract_without_allocation(zz, modulo, result);
+
+ if (modulo < zz) {
+ // Note: This branch shouldn't happen in theory (as noted in https://github.com/rust-num/num-bigint/blob/master/src/biguint/monty.rs#L210)
+ // Let's dbgln the values we used. That way, if we hit this branch, we can contribute these values for test cases.
+ dbgln("Encountered the modulo branch during a montgomery modular power. Params : {} - {} - {}", base, exponent, modulo);
+ // We just clobber all the other temporaries that we don't need for the division.
+ // This is wasteful, but we're on the edgiest of cases already.
+ divide_without_allocation(zz, modulo, temp_z, rr, z, x, temp_extra, result);
+ }
+
+ result.clamp_to_trimmed_length();
+ return;
+}
+
}
diff --git a/Userland/Libraries/LibCrypto/BigInt/Algorithms/UnsignedBigIntegerAlgorithms.h b/Userland/Libraries/LibCrypto/BigInt/Algorithms/UnsignedBigIntegerAlgorithms.h
index 3e645bcc9a..36ef290dc5 100644
--- a/Userland/Libraries/LibCrypto/BigInt/Algorithms/UnsignedBigIntegerAlgorithms.h
+++ b/Userland/Libraries/LibCrypto/BigInt/Algorithms/UnsignedBigIntegerAlgorithms.h
@@ -27,9 +27,13 @@ public:
static void destructive_GCD_without_allocation(UnsignedBigInteger& temp_a, UnsignedBigInteger& temp_b, UnsignedBigInteger& temp_1, UnsignedBigInteger& temp_2, UnsignedBigInteger& temp_3, UnsignedBigInteger& temp_4, UnsignedBigInteger& temp_quotient, UnsignedBigInteger& temp_remainder, UnsignedBigInteger& output);
static void modular_inverse_without_allocation(UnsignedBigInteger const& a_, UnsignedBigInteger const& b, UnsignedBigInteger& temp_1, UnsignedBigInteger& temp_2, UnsignedBigInteger& temp_3, UnsignedBigInteger& temp_4, UnsignedBigInteger& temp_minus, UnsignedBigInteger& temp_quotient, UnsignedBigInteger& temp_d, UnsignedBigInteger& temp_u, UnsignedBigInteger& temp_v, UnsignedBigInteger& temp_x, UnsignedBigInteger& result);
static void destructive_modular_power_without_allocation(UnsignedBigInteger& ep, UnsignedBigInteger& base, UnsignedBigInteger const& m, UnsignedBigInteger& temp_1, UnsignedBigInteger& temp_2, UnsignedBigInteger& temp_3, UnsignedBigInteger& temp_4, UnsignedBigInteger& temp_multiply, UnsignedBigInteger& temp_quotient, UnsignedBigInteger& temp_remainder, UnsignedBigInteger& result);
+ static void montgomery_modular_power_with_minimal_allocations(UnsignedBigInteger const& base, UnsignedBigInteger const& exponent, UnsignedBigInteger const& modulo, UnsignedBigInteger& temp_z0, UnsignedBigInteger& temp_rr, UnsignedBigInteger& temp_one, UnsignedBigInteger& temp_z, UnsignedBigInteger& temp_zz, UnsignedBigInteger& temp_x, UnsignedBigInteger& temp_extra, UnsignedBigInteger& result);
private:
- ALWAYS_INLINE static void shift_left_by_n_words(UnsignedBigInteger const& number, size_t number_of_words, UnsignedBigInteger& output);
+ static UnsignedBigInteger::Word montgomery_fragment(UnsignedBigInteger& z, size_t offset_in_z, UnsignedBigInteger const& x, UnsignedBigInteger::Word y_digit, size_t num_words);
+ static void almost_montgomery_multiplication_without_allocation(UnsignedBigInteger const& x, UnsignedBigInteger const& y, UnsignedBigInteger const& modulo, UnsignedBigInteger& z, UnsignedBigInteger::Word k, size_t num_words, UnsignedBigInteger& result);
+ static void shift_left_by_n_words(UnsignedBigInteger const& number, size_t number_of_words, UnsignedBigInteger& output);
+ static void shift_right_by_n_words(UnsignedBigInteger const& number, size_t number_of_words, UnsignedBigInteger& output);
ALWAYS_INLINE static UnsignedBigInteger::Word shift_left_get_one_word(UnsignedBigInteger const& number, size_t num_bits, size_t result_word_index);
};
diff --git a/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.h b/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.h
index 896952a4df..44a13a85fa 100644
--- a/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.h
+++ b/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.h
@@ -58,6 +58,7 @@ public:
m_cached_trimmed_length = {};
}
+ bool is_odd() const { return m_words.size() && (m_words[0] & 1); }
bool is_invalid() const { return m_is_invalid; }
size_t length() const { return m_words.size(); }
diff --git a/Userland/Libraries/LibCrypto/NumberTheory/ModularFunctions.cpp b/Userland/Libraries/LibCrypto/NumberTheory/ModularFunctions.cpp
index 83212d8b84..a4beb68e45 100644
--- a/Userland/Libraries/LibCrypto/NumberTheory/ModularFunctions.cpp
+++ b/Userland/Libraries/LibCrypto/NumberTheory/ModularFunctions.cpp
@@ -37,6 +37,20 @@ UnsignedBigInteger ModularPower(const UnsignedBigInteger& b, const UnsignedBigIn
if (m == 1)
return 0;
+ if (m.is_odd()) {
+ UnsignedBigInteger temp_z0 { 0 };
+ UnsignedBigInteger temp_rr { 0 };
+ UnsignedBigInteger temp_one { 0 };
+ UnsignedBigInteger temp_z { 0 };
+ UnsignedBigInteger temp_zz { 0 };
+ UnsignedBigInteger temp_x { 0 };
+ UnsignedBigInteger temp_extra { 0 };
+
+ UnsignedBigInteger result;
+ UnsignedBigIntegerAlgorithms::montgomery_modular_power_with_minimal_allocations(b, e, m, temp_z0, temp_rr, temp_one, temp_z, temp_zz, temp_x, temp_extra, result);
+ return result;
+ }
+
UnsignedBigInteger ep { e };
UnsignedBigInteger base { b };