【日记】Montgomery Modular Inverse

本文深入探讨了模逆元算法的实现细节,包括AlmMonInv函数,用于计算特定条件下a^-1*2^k(mod m),以及MonModInverse函数,提供了一种高效求解模逆的方法。文章还涉及了Montgomery乘法原理,并通过具体算法展示了如何在计算机科学中解决模逆问题。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

(r, k) = AlmMonInv(a, m) = (a^-1)(2 ^k) (mod m)

/// <summary>
/// 返回{x, k};x = <paramref name="value"/>−1 * 2^k (mod <paramref name="modulo"/>) 
/// if gcd(<paramref name="value"/>, <paramref name="modulo"/>) = 1 and <paramref name="modulo"/> is odd
/// </summary>
private static uint[] AlmMonInv(uint value, uint modulo)
{
    if (value >= modulo)
        throw new ArgumentOutOfRangeException("value should be less than modulo");

    uint u = modulo, v = value, r = 0, s = 1, k = 0;
    while (v > 0)
    {
        if (IsEven(u))
        {
            u >>= 1; s <<= 1;
        }
        else if (IsEven(v))
        {
            v >>= 1; r <<= 1;
        }
        else if (u > v)
        {
            u = (u - v) >> 1; r += s; s <<= 1;
        }
        else if (v >= u)
        {
            v = (v - u) >> 1; s += r; r <<= 1;
        }
        ++k;
    }

    if (r >= modulo) r -= modulo;

    return new uint[] { modulo - r, k };
}

设 z = AlmMonInv(a, m) = (a^-1)(2^k) (mod m)

因为MonPro(x, y, m, m') = (xy) R ^-1 (mod m) = (xy)(2^32)^-1 (mod m)

所以ModInverse(a, m) = MonPro(z, 2^32-k, m, m') = (a^-1)(2^k)(2^32-k)(2^-32) (mod m) = a^-1 (mod m)

/// <summary>
/// <paramref name = "value" />−1 (mod<paramref name="modulo"/>) 
/// </summary>
public static int MonModInverse(int value, int modulo)
{
    if (modulo <= 0)
        throw new ArgumentOutOfRangeException("modulo should be positive");

    if (modulo == 1) return 0;

    var mod = Mod(value, modulo);
    if (mod == 1) return 1;

    var gcd = MonGCD(1U << 31, (uint)modulo);
    var result = AlmMonInv((uint)mod, (uint)modulo);
    if (result[1] > 32)
    {
        result[0] = MonPro(result[0], 1U, (uint)modulo, gcd[1]);
        result[1] -= 32;
    }
    result[0] = MonPro(result[0], 1U << (32 - (int)result[1]), (uint)modulo, gcd[1]);
            
    return (int)result[0];
}


/// <summary>
/// <paramref name="value"/> (mod <paramref name="modulo"/>)
/// </summary>
public static int Mod(int value, int modulo)
{
    if (modulo <= 0)
        throw new ArgumentOutOfRangeException("modulo should be positive");

    var remainder = value % modulo;
    return remainder < 0 ? remainder + modulo : remainder;
}

/// <summary>
/// returns {R, K},R*(2left) - K*right = 1.
/// left = 2 ^ n and right is odd
/// </summary>
public static uint[] MonGCD(uint left, uint right)
{
    var result = new uint[] { 1UL, 0UL };
    var s = left;
    var t = right;

    while (left > 0)
    {
        left >>= 1;
        result[1] >>= 1;

        if (IsEven(result[0]))
        {
            result[0] >>= 1;
        }
        else
        {
            result[0] = ((result[0] ^ t) >> 1) + (result[0] & t);//(result[0] + t) / 2
            result[1] += s;
        }
    }

    return result;
}


private static bool IsEven(uint n) => (n & 1) == 0;

注意:a^-1 (mod m) 只有在gcd(a, m) == 1时存在,本方法并没有检查a和m是否互素。并且本方法不能求解modulo为偶数时的模逆。

MonModPro 参考《Montgomery Moduluar Exponentiation

我加入蒙哥马利模乘优化反而更慢了,中间频繁调用蒙哥马利域,但是正确的应该是开头结尾调用两次,我应该怎么把代码优化import random import time import math def dynamic_window(exponent, max_width=4): """动态分窗:返回从低位到高位的窗口值列表""" bin_str = bin(exponent)[2:] windows = [] i = len(bin_str) while i > 0: width = min(i, max_width) win_val = int(bin_str[max(0, i - width):i], 2) windows.append(win_val) i -= width return windows[::-1] # 调整为高位到低位顺序 def precompute_table(base, mod, window_size): """预计算基底表:base^0 到 base^(2^window_size-1) mod mod""" size = 1 << window_size table = [1] * size table[1] = base % mod for i in range(2, size): if i % 2 == 0: table[i] = (table[i // 2] ** 2) % mod # 平方复用 else: table[i] = (table[i - 1] * table[1]) % mod # 增量乘法 return table def windowed_mod_exp(base, exponent, mod, window_size=4): """动态窗口模幂运算""" start_time = time.perf_counter_ns() if mod == 1: return 0, time.perf_counter_ns() - start_time windows = dynamic_window(exponent, window_size) table = precompute_table(base, mod, window_size) result = 1 for win in windows: result = pow(result, (1 << window_size), mod) # 移位操作 result = (result * table[win]) % mod # 窗口组合 total_time = time.perf_counter_ns() - start_time return result, total_time, len(windows) + (window_size - 1) # 估算操作次数 def naive_mod_exp(base, exponent, mod): """朴素平方乘算法""" start_time = time.perf_counter_ns() result = 1 base = base % mod steps = 0 while exponent > 0: if exponent % 2 == 1: result = (result * base) % mod base = (base * base) % mod exponent //= 2 steps += 1 total_time = time.perf_counter_ns() - start_time return result, total_time, steps def generate_large_prime(bits): """生成大素数(带时间监控)""" start_time = time.perf_counter_ns() while True: p = random.getrandbits(bits) | (1 << (bits - 1)) | 1 # 确保最高位和最低位为1 if is_prime(p): total_time = time.perf_counter_ns() - start_time return p, total_time def is_prime(n, k=40): """Miller-Rabin素性测试(简化版)""" if n <= 1: return False if n <= 3: return True d = n - 1 s = 0 while d % 2 == 0: d //= 2 s += 1 for _ in range(k): a = random.randint(2, n - 2) x = pow(a, d, n) if x == 1 or x == n - 1: continue for __ in range(s - 1): x = pow(x, 2, n) if x == n - 1: break else: return False return True # 性能测试模块 def performance_test(bits=1024, window_size=4): print(f"\n===== {bits}位模幂运算性能测试 =====") p, _ = generate_large_prime(bits // 2) q, _ = generate_large_prime(bits // 2) n = p * q base = random.randint(2, n - 1) exp = random.getrandbits(bits) # 内置pow start = time.perf_counter_ns() expected = pow(base, exp, n) builtin_time = time.perf_counter_ns() - start print(f"1. 内置pow耗时: {builtin_time / 1000:.2f}μs") # 动态窗口算法 result_win, win_time, win_ops = windowed_mod_exp(base, exp, n, window_size) print(f"2. 动态窗口({window_size}位)耗时: {win_time / 1000:.2f}μs, 操作次数: {win_ops}") # 朴素算法 result_naive, naive_time, naive_ops = naive_mod_exp(base, exp, n) print(f"3. 朴素算法耗时: {naive_time / 1000:.2f}μs, 操作次数: {naive_ops}") # 验证与总结 assert result_win == expected, "动态窗口结果错误" assert result_naive == expected, "朴素算法结果错误" speedup = naive_time / win_time if win_time != 0 else 0 print(f"\n性能对比:") print(f" - 动态窗口比朴素算法减少 {naive_ops - win_ops} 次模乘") print(f" - 加速比: {speedup:.2f}x") print(f" - 窗口大小{window_size}时的操作效率: {win_ops / (bits / window_size):.1f}次/窗口") # 执行测试 if __name__ == "__main__": # 小数据验证 print("===== 小数据验证 =====") res, time_us, ops = windowed_mod_exp(5, 17, 17 * 19, 4) print(f"5^17 mod 323 = {res}, 耗时{time_us / 1000:.2f}μs, 操作次数{ops}") # 大数据性能测试 performance_test(bits=1024, window_size=4) performance_test(bits=2048, window_size=5)
最新发布
06-05
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值