浮点数底层计算方法验证
郝伟 2022/06/23

简介

# 浮点数 
# v = 0b0_01111111110_1111111111111111111111110100000111101011100001010010
#     s      e                                m
# 注意:最左侧是高位,最右侧是低位,这和字节顺序是反的。其中,
# * v[63] 位长:1,符号位,表示正负,其中0为正,1为负,记为 s;
# * v[52-62] 位长:11,指数位,记为 e = exp(v) - 1023;
# * v[0-51] 位长:52,小数位,记为 m 

def int2double(v):
    res = 1. if v >> 63 & 1 == 0 else -1.
    res *= 2 ** ((v >> 52 & 0b11111111111) - 1023)
    res *= sum([2**(i - 52) * (v>>i&1) for i in range(51, -1, -1)])+1
    return res

def to_double(v):
    return (1. if v >> 63 & 1 == 0 else -1.) * 2 ** ((v >> 52 & 0b11111111111) - 1023) * (sum([2**(i - 52) * (v>>i&1) for i in range(51, -1, -1)])+1)

# declare the convert function in a single line! 2022/06/23
to_dou = lambda v: (1. if v >> 63 & 1 == 0 else -1.) * 2 ** ((v >> 52 & 0b11111111111) - 1023) * (sum([2**(i - 52) * (v>>i&1) for i in range(51, -1, -1)])+1)

def int2float(v):
    res = 1. if v >> 31 & 1 == 0 else -1.
    res *= 2 ** ((v >> 23 & 0b11111111111) - 127)
    res *= sum([2**(i - 23) * (v>>i&1) for i in range(22, -1, -1)])+1
    return res

v1=0b0_01111111110_1111111111111111111111110100000111101011100001010010
# 测试值:0.9999999778717756
# 1.0349999666213990
v2=0b00111111_10000100_01111010_11100001
print("v1", int2double(v1))
print("v2", int2float(v2))
print('v3', to_double(v1))
print('v4', to_dou(v1))

# 以下是早期的测试代码
# org: 0.9999999778717756
v = 0b0_01111111110_1111111111111111111111110100000111101011100001010010
sign = v >> 63 & 1
exp = v >> 52 & 0b11111111111
mantissa = v & 0b11111111_11111111_11111111_11111111_11111111_11111111_1111
m = 0.
for i in range(51, -1, -1):
    wi = 2**(i - 52)
    vi = v>>i & 1
    mi = wi * vi
    m += mi
#    print(f'{i}: vi={vi}, wi={wi}')
#print(f'm={m}, e={exp}')
res =  (2 ** (exp - 1023)) * (1 + m)
print('res1: ',res)

def int2double(v):
    res = 1. if v >> 63 & 1 == 0 else -1.
    res *= 2 ** ((v >> 52 & 0b11111111111) - 1023)
    res *= sum([2**(i - 52) * (v>>i&1) for i in range(51, -1, -1)])+1
    return res

print('res2: ', int2double(v))
print(int2double(4607182418600704082))

v1 0.9999999778717756
v2 1.034999966621399
v3 0.9999999778717756
v4 0.9999999778717756
res1: 0.9999999778717756
res2: 0.9999999778717756
0.9999999778717756