-
Notifications
You must be signed in to change notification settings - Fork 61
SM4 with AESENCLAST
This is the pure golang code to study SM4 implementation with AESENCLAST instruction.
We combine various linear operations into two affine transforms (one on each side), A1 and A2. Here affine transform consists of a multiplication with a 8x8 binary matrix M and addition of a 8-bit constant C.
SM4-S(x) = A2(AES-S(A1(x))
A1(x) = M1*x + C1
A2(x) = M2*x + C2
The combinations of (M1, C1, M2, C2) or (A1, A2) are not unique.
Known (M1, C1, M2, C2), please reference sm4 with AESENCLAST and AES 和 SM4 S盒複合域實現方法python code. In AES 和 SM4 S盒複合域實現方法python code, we found 8 groups of (M1, C1, M2, C2). 其实按A very compact Rijndael S-box,这八组对应E All Possible Bases的八组正规基:1 4 19 22 37 40 55 58,另外还有73 76 91 94 109 112 127 130八组正规基,其它都是混合基或者多项式基,共432组,它们都假设trace is unity。
{(M1, C1, M2, C2) | SM4-S(x) = A2(AES-S(A1(x)), A1(x) = M1*x + C1, A2(x) = M2*x + C2}
M1= 0x96 ,0x47 ,0xe9 ,0x3d ,0xde ,0x65 ,0xac ,0xa7
C1= 0x69
M2= 0xfa ,0x64 ,0xb4 ,0x0a ,0x41 ,0xdd ,0x01 ,0xc1
C2= 0x61
//https://github.com/intel/ipp-crypto/blob/develop/sources/ippcp/pcpsms4_l9cn.h
//Intel也用了这组
M1= 0x52 ,0xbc ,0x2d ,0x02 ,0x9e ,0x25 ,0xac ,0x34
C1= 0x65
M2= 0xcb ,0x9a ,0x0a ,0xb4 ,0xc7 ,0xac ,0x87 ,0x4e
C2= 0x2f
M1= 0x5d ,0x50 ,0x22 ,0x1a ,0xb9 ,0x7d ,0x28 ,0x4c
C1= 0x3e
M2= 0xd3 ,0xba ,0x1d ,0x65 ,0x47 ,0x4c ,0x0e ,0x48
C2= 0x6c
M1= 0xe6 ,0xab ,0x99 ,0x5a ,0x86 ,0x42 ,0x28 ,0x24
C1= 0x8e
M2= 0x2d ,0x8b ,0x65 ,0x1d ,0xc8 ,0xfb ,0x81 ,0xce
C2= 0xe9
M1= 0xd1 ,0x37 ,0xae ,0xce ,0x05 ,0x45 ,0xec ,0xdd
C1= 0x86
M2= 0x50 ,0x16 ,0x5b ,0x2a ,0x53 ,0x92 ,0x62 ,0x33
C2= 0x3c
M1= 0xee ,0xb3 ,0x91 ,0x75 ,0xc1 ,0x81 ,0xec ,0x8a
C1= 0xd6
M2= 0x19 ,0x56 ,0x2a ,0x5b ,0xa4 ,0xea ,0x95 ,0x0b
C2= 0x4d
M1= 0x4d ,0x1f ,0x32 ,0xfe ,0x8e ,0xb1 ,0x17 ,0xd5
C1= 0xce
M2= 0xe8 ,0x28 ,0x74 ,0xc3 ,0xfc ,0x32 ,0x02 ,0x6b
C2= 0x81
M1= 0x0d ,0x9b ,0x72 ,0x3a ,0x35 ,0x0a ,0x17 ,0x06
C1= 0x23
M2= 0xa8 ,0x61 ,0xc3 ,0x74 ,0xc4 ,0x8c ,0x3a ,0x9c
C2= 0x3b
//以上都为计算获得
//https://github.com/mjosaarinen/sm4ni
M1= 0x14, 0x2e, 0x16, 0x8a, 0x60, 0x0d, 0x9b, 0x66
C1= 0x01
M2= 0xfe, 0x54, 0xaf, 0xdd, 0xf7, 0xf9, 0xac, 0xe2
C2= 0x34
sm4_box_aesenclast <-> sm4_box_aesbox_1 <-> sm4_box_aesbox_2 <-> sm4_box_aesbox_3 <-> sm4_box_aesbox_4
We note that each affine transform can be constructed from XOR of two 4x8-bit table lookups, which we implement with constant time byte shuffle instructions (each 16-entry table is in a single 128-bit register).
sm4_box_aesenclast
y := mm_and_si128(x, const_0f)
y = mm_shuffle_epi8(a1l, y)
x = mm_srli_epi64(x, 4)
x = mm_and_si128(x, const_0f)
x = xor(mm_shuffle_epi8(a1h, x), y)
x = mm_shuffle_epi8(x, shift_row_inv)
x = mm_aesenclast_si128(x, const_0f)
y = mm_andnot_si128(x, const_0f)
y = mm_shuffle_epi8(a2l, y)
x = mm_srli_epi64(x, 4)
x = mm_and_si128(x, const_0f)
x = xor(mm_shuffle_epi8(a2h, x), y)
sm4_box_aesbox_1
var y __m128i
for i := 0; i < 16; i++ {
y.bytes[i] = a1l.bytes[x.bytes[i]&0xf] ^ a1h.bytes[x.bytes[i]>>4]
}
x = y
for i := 0; i < 16; i++ {
x.bytes[i] = aes_sbox[x.bytes[i]] ^ 0xf
}
for i := 0; i < 16; i++ {
y.bytes[i] = a2l.bytes[(^x.bytes[i])&0xf] ^ a2h.bytes[x.bytes[i]>>4]
}
sm4_box_aesbox_2
for i := 0; i < 16; i++ {
v := x.bytes[i]
v = a1l.bytes[v&0xf] ^ a1h.bytes[v>>4] // v = A1(x)
v = aes_sbox[v] ^ 0xf // v = AES-S(A1(x)) XOR 0x0f, 相当于对低四位取反。
v = a2l.bytes[^v&0xf] ^ a2h.bytes[v>>4] // v = A2(AES-S(A1(x)))
x.bytes[i] = v
}
sm4_box_aesbox_3
for i := 0; i < 16; i++ {
v := x.bytes[i]
v = a1l.bytes[v&0xf] ^ a1h.bytes[v>>4]
v = aes_sbox[v]
v = a2l.bytes[v&0xf] ^ a2h.bytes[v>>4]
x.bytes[i] = v
}
sm4_box_aesbox_4
for i := 0; i < 16; i++ {
v := x.bytes[i]
v = a1_table[v]
v = aes_sbox[v]
v = a2_table[v]
x.bytes[i] = v
}
var shift_row_inv = set64(0x0306090C0F020508, 0x0B0E0104070A0D00)
var intelm1l = set64(0xdcf84460b3972b0f, 0xb6922e0ad9fd4165)
var intelm1h = set64(0x64ad03cae42d834a, 0x2ee74980ae67c900)
var intelm2l = set64(0x48c2a32957ddbc36, 0xad2746ccb23859d3)
var intelm2h = set64(0x134307579aca8ede, 0xcd9dd98944145000)
var intelenckey = set64(0x6363636363636363, 0x6363636363636363)
var intelmaskSrows = shift_row_inv
func sm4_box_aesenclast_intel(rk uint32, t0, t1, t2, t3, a1l, a1h, a2l, a2h __m128i) __m128i {
rk128 := mm_set_epi32(rk, rk, rk, rk)
x := xor(xor(t1, t2), t3)
x = xor(x, rk128)
y := mm_and_si128(x, const_0f)
y = mm_shuffle_epi8(a1l, y)
x = mm_srli_epi64(x, 4)
x = mm_and_si128(x, const_0f)
x = xor(mm_shuffle_epi8(a1h, x), y)
x = mm_aesenclast_si128(x, intelenckey)
x = mm_shuffle_epi8(x, intelmaskSrows)
y = mm_and_si128(x, const_0f)
y = mm_shuffle_epi8(a2l, y)
x = mm_srli_epi64(x, 4)
x = mm_and_si128(x, const_0f)
x = xor(mm_shuffle_epi8(a2h, x), y)
return x
}
其实x = mm_shuffle_epi8(x, intelmaskSrows)在mm_aesenclast_si128之前调用,结果也是一样的。
类似于:
{(M1, C1, M2, C2) | SM4-S(x) = A2(AES-S(A1(x)), A1(x) = M1*x + C1, A2(x) = M2*(x+0x63) + C2 = M2*x + (M2*0x63 + C2)}
如何生成Intel算法的外层查找表?
from pyfinite import genericmatrix
def XOR(x, y): return x ^ y
def AND(x, y): return x & y
def DIV(x, y): return x
def genCMatrix(c):
Imatrix = genericmatrix.GenericMatrix(size=(8, 1), zeroElement=0, identityElement=1, add=XOR, mul=AND, sub=XOR, div=DIV)
for j in range (8):
Imatrix.SetRow(j, [(0x63 >> (7 - j)) & 1])
return Imatrix
def matrix_from_cols(cols):
m = genericmatrix.GenericMatrix(size=(8, 8), zeroElement=0, identityElement=1, add=XOR, mul=AND, sub=XOR, div=DIV)
for i in range (8):
k = 7 - i
j = 1 << k
m.SetRow(i, [(cols[0] & j) >> k, (cols[1] & j) >> k, (cols[2] & j) >> k, (cols[3] & j) >> k, (cols[4] & j) >> k, (cols[5] & j) >> k, (cols[6] & j) >> k, (cols[7] & j) >> k])
return m
def gen_matrix_based_table(table):
return matrix_from_cols([table[0x80] ^ table[0], table[0x40] ^ table[0], table[0x20] ^ table[0], table[0x10] ^ table[0], table[0x08] ^ table[0], table[0x04] ^ table[0], table[0x02] ^ table[0], table[0x01] ^ table[0]])
def gen_matrix_based_high_low(high, low):
table = []
for i in range(16):
for j in range(16):
table.append(high[i] ^ low[j])
return gen_matrix_based_table(table)
def matrix_col_byte(c):
return (c[0] << 7) ^ (c[1] << 6) ^ (c[2] << 5) ^ (c[3] << 4) ^ (c[4] << 3) ^ (c[5] << 2) ^ (c[6] << 1) ^ (c[7] << 0)
def gen_lookup(m, c):
table = []
for i in range(256):
Imatrix = genericmatrix.GenericMatrix(size=(8, 1), zeroElement=0, identityElement=1, add=XOR, mul=AND, sub=XOR, div=DIV)
for j in range (8):
Imatrix.SetRow(j, [(i >> (7 - j)) & 1])
tmp = m * Imatrix
table.append(matrix_col_byte(tmp.GetColumn(0)) ^ c)
return table
def gen_lookup_low(m, c):
table = []
for i in range(256):
Imatrix = genericmatrix.GenericMatrix(size=(8, 1), zeroElement=0, identityElement=1, add=XOR, mul=AND, sub=XOR, div=DIV)
for j in range (8):
if j < 4:
Imatrix.SetRow(j, [0])
else:
Imatrix.SetRow(j, [(i >> (7 - j)) & 1])
tmp = m * Imatrix
table.append(matrix_col_byte(tmp.GetColumn(0)) ^ c)
return table
def gen_lookup_high(m):
table = []
for i in range(256):
Imatrix = genericmatrix.GenericMatrix(size=(8, 1), zeroElement=0, identityElement=1, add=XOR, mul=AND, sub=XOR, div=DIV)
for j in range (8):
if j < 4:
Imatrix.SetRow(j, [(i >> (7 - j)) & 1])
else:
Imatrix.SetRow(j, [0])
tmp = m * Imatrix
table.append(matrix_col_byte(tmp.GetColumn(0)))
return table
def print_table(table):
for i, s in enumerate(table):
print(f'0x%02X'%s,',', end='')
if (i+1) % 16 == 0:
print()
def to_matrix(x):
m = genericmatrix.GenericMatrix(size=(8,8), zeroElement=0, identityElement=1, add=XOR, mul=AND, sub=XOR, div=DIV)
for i in range(8):
m.SetRow(i, [(x[i] & 0x80) >> 7, (x[i] & 0x40) >> 6, (x[i] & 0x20) >> 5, (x[i] & 0x10) >> 4, (x[i] & 0x08) >> 3, (x[i] & 0x04) >> 2, (x[i] & 0x02) >> 1, (x[i] & 0x01) >> 0])
return m
def gen_intel_c(m, c):
Cmatrix = genCMatrix(0x63)
c1 = m*Cmatrix
return matrix_col_byte(c1.GetColumn(0)) ^ c
Mmatrix = to_matrix([0xcb, 0x9a, 0x0a, 0xb4, 0xc7, 0xac, 0x87, 0x4e])
c1 = gen_intel_c(Mmatrix, 0x2f)
print(f'0x%02X'%c1)
print()
print_table(gen_lookup_high(Mmatrix))
print()
print_table(gen_lookup_low(Mmatrix, c1))
这个查找表有256个元素,考虑到寄存器的使用,需要换个形式。
我们可以看到
这样,我们去除重复,只用16*2个字节就可以存储这个查找表。
// {Mi+C | i>=0 && i<256}
// Generate lookup table based on M matrix and C
func gen_lookup_table(m [8]byte, c byte) {
for i := 0; i < 16; i++ {
for j := 0; j < 16; j++ {
x := ((byte(bits.OnesCount8(byte(i*16+j)&m[0])) & 1) << 7) ^
((byte(bits.OnesCount8(byte(i*16+j)&m[1])) & 1) << 6) ^
((byte(bits.OnesCount8(byte(i*16+j)&m[2])) & 1) << 5) ^
((byte(bits.OnesCount8(byte(i*16+j)&m[3])) & 1) << 4) ^
((byte(bits.OnesCount8(byte(i*16+j)&m[4])) & 1) << 3) ^
((byte(bits.OnesCount8(byte(i*16+j)&m[5])) & 1) << 2) ^
((byte(bits.OnesCount8(byte(i*16+j)&m[6])) & 1) << 1) ^
((byte(bits.OnesCount8(byte(i*16+j)&m[7])) & 1) << 0) ^ c
fmt.Printf("0x%02X, ", x)
}
fmt.Println()
}
}
Below python code is more intuitive:
from pyfinite import genericmatrix
XOR = lambda x,y:x^y
AND = lambda x,y:x&y
DIV = lambda x,y:x
def to_matrix(x):
m = genericmatrix.GenericMatrix(size=(8,8), zeroElement=0, identityElement=1, add=XOR, mul=AND, sub=XOR, div=DIV)
for i in range(8):
m.SetRow(i, [(x[i] & 0x80) >> 7, (x[i] & 0x40) >> 6, (x[i] & 0x20) >> 5, (x[i] & 0x10) >> 4, (x[i] & 0x08) >> 3, (x[i] & 0x04) >> 2, (x[i] & 0x02) >> 1, (x[i] & 0x01) >> 0])
return m
def matrix_col_byte(c):
return (c[0] << 7) ^ (c[1] << 6) ^ (c[2] << 5) ^ (c[3] << 4) ^ (c[4] << 3) ^ (c[5] << 2) ^ (c[6] << 1) ^ (c[7] << 0)
def gen_lookup(m, c):
Mmatrix = to_matrix(m)
table = []
for i in range(256):
Imatrix = genericmatrix.GenericMatrix(size=(8, 1), zeroElement=0, identityElement=1, add=XOR, mul=AND, sub=XOR, div=DIV)
for j in range (8):
Imatrix.SetRow(j, [(i >> (7 - j)) & 1])
tmp = Mmatrix * Imatrix
table.append(matrix_col_byte(tmp.GetColumn(0)) ^ c)
return table
def gen_lookup_low(m, c):
Mmatrix = to_matrix(m)
table = []
for i in range(256):
Imatrix = genericmatrix.GenericMatrix(size=(8, 1), zeroElement=0, identityElement=1, add=XOR, mul=AND, sub=XOR, div=DIV)
for j in range (8):
if j < 4:
Imatrix.SetRow(j, [0])
else:
Imatrix.SetRow(j, [(i >> (7 - j)) & 1])
tmp = Mmatrix * Imatrix
table.append(matrix_col_byte(tmp.GetColumn(0)) ^ c)
return table
def gen_lookup_high(m, c):
Mmatrix = to_matrix(m)
table = []
for i in range(256):
Imatrix = genericmatrix.GenericMatrix(size=(8, 1), zeroElement=0, identityElement=1, add=XOR, mul=AND, sub=XOR, div=DIV)
for j in range (8):
if j < 4:
Imatrix.SetRow(j, [(i >> (7 - j)) & 1])
else:
Imatrix.SetRow(j, [0])
tmp = Mmatrix * Imatrix
table.append(matrix_col_byte(tmp.GetColumn(0)))
return table
def print_table(table):
for i, s in enumerate(table):
print(f'0x%02X'%s,',', end='')
if (i+1) % 16 == 0:
print()
print_table(gen_lookup_low([0xfe, 0x54, 0xaf, 0xdd, 0xf7, 0xf9, 0xac, 0xe2], 0x34))
print()
print_table(gen_lookup_high([0xfe, 0x54, 0xaf, 0xdd, 0xf7, 0xf9, 0xac, 0xe2], 0x34))
print()
print_table(gen_lookup([0xfe, 0x54, 0xaf, 0xdd, 0xf7, 0xf9, 0xac, 0xe2], 0x34))
print()
示例结果:
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
0xDC ,0xDC ,0xDC ,0xDC ,0xDC ,0xDC ,0xDC ,0xDC ,0xDC ,0xDC ,0xDC ,0xDC ,0xDC ,0xDC ,0xDC ,0xDC ,
0xAF ,0xAF ,0xAF ,0xAF ,0xAF ,0xAF ,0xAF ,0xAF ,0xAF ,0xAF ,0xAF ,0xAF ,0xAF ,0xAF ,0xAF ,0xAF ,
0x73 ,0x73 ,0x73 ,0x73 ,0x73 ,0x73 ,0x73 ,0x73 ,0x73 ,0x73 ,0x73 ,0x73 ,0x73 ,0x73 ,0x73 ,0x73 ,
0xDD ,0xDD ,0xDD ,0xDD ,0xDD ,0xDD ,0xDD ,0xDD ,0xDD ,0xDD ,0xDD ,0xDD ,0xDD ,0xDD ,0xDD ,0xDD ,
0x01 ,0x01 ,0x01 ,0x01 ,0x01 ,0x01 ,0x01 ,0x01 ,0x01 ,0x01 ,0x01 ,0x01 ,0x01 ,0x01 ,0x01 ,0x01 ,
0x72 ,0x72 ,0x72 ,0x72 ,0x72 ,0x72 ,0x72 ,0x72 ,0x72 ,0x72 ,0x72 ,0x72 ,0x72 ,0x72 ,0x72 ,0x72 ,
0xAE ,0xAE ,0xAE ,0xAE ,0xAE ,0xAE ,0xAE ,0xAE ,0xAE ,0xAE ,0xAE ,0xAE ,0xAE ,0xAE ,0xAE ,0xAE ,
0xBF ,0xBF ,0xBF ,0xBF ,0xBF ,0xBF ,0xBF ,0xBF ,0xBF ,0xBF ,0xBF ,0xBF ,0xBF ,0xBF ,0xBF ,0xBF ,
0x63 ,0x63 ,0x63 ,0x63 ,0x63 ,0x63 ,0x63 ,0x63 ,0x63 ,0x63 ,0x63 ,0x63 ,0x63 ,0x63 ,0x63 ,0x63 ,
0x10 ,0x10 ,0x10 ,0x10 ,0x10 ,0x10 ,0x10 ,0x10 ,0x10 ,0x10 ,0x10 ,0x10 ,0x10 ,0x10 ,0x10 ,0x10 ,
0xCC ,0xCC ,0xCC ,0xCC ,0xCC ,0xCC ,0xCC ,0xCC ,0xCC ,0xCC ,0xCC ,0xCC ,0xCC ,0xCC ,0xCC ,0xCC ,
0x62 ,0x62 ,0x62 ,0x62 ,0x62 ,0x62 ,0x62 ,0x62 ,0x62 ,0x62 ,0x62 ,0x62 ,0x62 ,0x62 ,0x62 ,0x62 ,
0xBE ,0xBE ,0xBE ,0xBE ,0xBE ,0xBE ,0xBE ,0xBE ,0xBE ,0xBE ,0xBE ,0xBE ,0xBE ,0xBE ,0xBE ,0xBE ,
0xCD ,0xCD ,0xCD ,0xCD ,0xCD ,0xCD ,0xCD ,0xCD ,0xCD ,0xCD ,0xCD ,0xCD ,0xCD ,0xCD ,0xCD ,0xCD ,
0x11 ,0x11 ,0x11 ,0x11 ,0x11 ,0x11 ,0x11 ,0x11 ,0x11 ,0x11 ,0x11 ,0x11 ,0x11 ,0x11 ,0x11 ,0x11 ,
0x34 ,0x08 ,0x9D ,0xA1 ,0xCE ,0xF2 ,0x67 ,0x5B ,0x82 ,0xBE ,0x2B ,0x17 ,0x78 ,0x44 ,0xD1 ,0xED ,
0xE8 ,0xD4 ,0x41 ,0x7D ,0x12 ,0x2E ,0xBB ,0x87 ,0x5E ,0x62 ,0xF7 ,0xCB ,0xA4 ,0x98 ,0x0D ,0x31 ,
0x9B ,0xA7 ,0x32 ,0x0E ,0x61 ,0x5D ,0xC8 ,0xF4 ,0x2D ,0x11 ,0x84 ,0xB8 ,0xD7 ,0xEB ,0x7E ,0x42 ,
0x47 ,0x7B ,0xEE ,0xD2 ,0xBD ,0x81 ,0x14 ,0x28 ,0xF1 ,0xCD ,0x58 ,0x64 ,0x0B ,0x37 ,0xA2 ,0x9E ,
0xE9 ,0xD5 ,0x40 ,0x7C ,0x13 ,0x2F ,0xBA ,0x86 ,0x5F ,0x63 ,0xF6 ,0xCA ,0xA5 ,0x99 ,0x0C ,0x30 ,
0x35 ,0x09 ,0x9C ,0xA0 ,0xCF ,0xF3 ,0x66 ,0x5A ,0x83 ,0xBF ,0x2A ,0x16 ,0x79 ,0x45 ,0xD0 ,0xEC ,
0x46 ,0x7A ,0xEF ,0xD3 ,0xBC ,0x80 ,0x15 ,0x29 ,0xF0 ,0xCC ,0x59 ,0x65 ,0x0A ,0x36 ,0xA3 ,0x9F ,
0x9A ,0xA6 ,0x33 ,0x0F ,0x60 ,0x5C ,0xC9 ,0xF5 ,0x2C ,0x10 ,0x85 ,0xB9 ,0xD6 ,0xEA ,0x7F ,0x43 ,
0x8B ,0xB7 ,0x22 ,0x1E ,0x71 ,0x4D ,0xD8 ,0xE4 ,0x3D ,0x01 ,0x94 ,0xA8 ,0xC7 ,0xFB ,0x6E ,0x52 ,
0x57 ,0x6B ,0xFE ,0xC2 ,0xAD ,0x91 ,0x04 ,0x38 ,0xE1 ,0xDD ,0x48 ,0x74 ,0x1B ,0x27 ,0xB2 ,0x8E ,
0x24 ,0x18 ,0x8D ,0xB1 ,0xDE ,0xE2 ,0x77 ,0x4B ,0x92 ,0xAE ,0x3B ,0x07 ,0x68 ,0x54 ,0xC1 ,0xFD ,
0xF8 ,0xC4 ,0x51 ,0x6D ,0x02 ,0x3E ,0xAB ,0x97 ,0x4E ,0x72 ,0xE7 ,0xDB ,0xB4 ,0x88 ,0x1D ,0x21 ,
0x56 ,0x6A ,0xFF ,0xC3 ,0xAC ,0x90 ,0x05 ,0x39 ,0xE0 ,0xDC ,0x49 ,0x75 ,0x1A ,0x26 ,0xB3 ,0x8F ,
0x8A ,0xB6 ,0x23 ,0x1F ,0x70 ,0x4C ,0xD9 ,0xE5 ,0x3C ,0x00 ,0x95 ,0xA9 ,0xC6 ,0xFA ,0x6F ,0x53 ,
0xF9 ,0xC5 ,0x50 ,0x6C ,0x03 ,0x3F ,0xAA ,0x96 ,0x4F ,0x73 ,0xE6 ,0xDA ,0xB5 ,0x89 ,0x1C ,0x20 ,
0x25 ,0x19 ,0x8C ,0xB0 ,0xDF ,0xE3 ,0x76 ,0x4A ,0x93 ,0xAF ,0x3A ,0x06 ,0x69 ,0x55 ,0xC0 ,0xFC ,
1.The first element of the table, T[0] should be the C.
2.Use T[1] XOR T[0], T[2] XOR T[0], T[4] XOR T[0], T[8] XOR T[0], T[16] XOR T[0], T[32] XOR T[0], T[64] XOR T[0], T[128] XOR T[0] to calculate matrix M.
Below is sample
1 2 3 4 5 6 7 8
00000110 01110011 01110101 11100101 11100011 10010110 10010000 01010110
0x00, 0x06, 0x73 0x75 0xE5 0xE3 0x96 0x90 0x56
1 10100010
2 01001001
3 11111011
4 00001001
5 10101011
6 01000000
7 11100010
8 00010010
9
00010010
00001001
01001001
10100010
01010110
11100101
01110011
00000110
1 2 3 4 5 6 7 8
00111100 10101001 11111010 10110110
0x00 0x3c 0xa9 0xfa 0xb6
1 11011100
2 10101111
3
4 11011101
5
6
7
8 10111111
10111111
11011101
10101111
11011100
10110110
11111010
10101001
00111100
// Generate matrix based on lookup table
func gen_matrix(lookup [256]byte) (m [8]byte) {
c := lookup[0]
m80 := lookup[0x80] ^ c
m40 := lookup[0x40] ^ c
m20 := lookup[0x20] ^ c
m10 := lookup[0x10] ^ c
m08 := lookup[0x08] ^ c
m04 := lookup[0x04] ^ c
m02 := lookup[0x02] ^ c
m01 := lookup[0x01] ^ c
m[0] = (m80 & 0x80) ^ ((m40 & 0x80) >> 1) ^ ((m20 & 0x80) >> 2) ^ ((m10 & 0x80) >> 3) ^ ((m08 & 0x80) >> 4) ^ ((m04 & 0x80) >> 5) ^ ((m02 & 0x80) >> 6) ^ ((m01 & 0x80) >> 7)
m[1] = ((m80 & 0x40) << 1) ^ (m40 & 0x40) ^ ((m20 & 0x40) >> 1) ^ ((m10 & 0x40) >> 2) ^ ((m08 & 0x40) >> 3) ^ ((m04 & 0x40) >> 4) ^ ((m02 & 0x40) >> 5) ^ ((m01 & 0x40) >> 6)
m[2] = ((m80 & 0x20) << 2) ^ ((m40 & 0x20) << 1) ^ (m20 & 0x20) ^ ((m10 & 0x20) >> 1) ^ ((m08 & 0x20) >> 2) ^ ((m04 & 0x20) >> 3) ^ ((m02 & 0x20) >> 4) ^ ((m01 & 0x20) >> 5)
m[3] = ((m80 & 0x10) << 3) ^ ((m40 & 0x10) << 2) ^ ((m20 & 0x10) << 1) ^ (m10 & 0x10) ^ ((m08 & 0x10) >> 1) ^ ((m04 & 0x10) >> 2) ^ ((m02 & 0x10) >> 3) ^ ((m01 & 0x10) >> 4)
m[4] = ((m80 & 0x08) << 4) ^ ((m40 & 0x08) << 3) ^ ((m20 & 0x08) << 2) ^ ((m10 & 0x08) << 1) ^ (m08 & 0x08) ^ ((m04 & 0x08) >> 1) ^ ((m02 & 0x08) >> 2) ^ ((m01 & 0x08) >> 3)
m[5] = ((m80 & 0x04) << 5) ^ ((m40 & 0x04) << 4) ^ ((m20 & 0x04) << 3) ^ ((m10 & 0x04) << 2) ^ ((m08 & 0x04) << 1) ^ (m04 & 0x04) ^ ((m02 & 0x04) >> 1) ^ ((m01 & 0x04) >> 2)
m[6] = ((m80 & 0x02) << 6) ^ ((m40 & 0x02) << 5) ^ ((m20 & 0x02) << 4) ^ ((m10 & 0x02) << 3) ^ ((m08 & 0x02) << 2) ^ ((m04 & 0x02) << 1) ^ (m02 & 0x02) ^ ((m01 & 0x02) >> 1)
m[7] = ((m80 & 0x01) << 7) ^ ((m40 & 0x01) << 6) ^ ((m20 & 0x01) << 5) ^ ((m10 & 0x01) << 4) ^ ((m08 & 0x01) << 2) ^ ((m04 & 0x01) << 2) ^ ((m02 & 0x01) << 1) ^ (m01 & 0x01)
return
}
Similar python code:
from pyfinite import genericmatrix
def XOR(x, y): return x ^ y
def AND(x, y): return x & y
def DIV(x, y): return x
def matrix_from_cols(cols):
m = genericmatrix.GenericMatrix(size=(8, 8), zeroElement=0, identityElement=1, add=XOR, mul=AND, sub=XOR, div=DIV)
for i in range (8):
k = 7 - i
j = 1 << k
m.SetRow(i, [(cols[0] & j) >> k, (cols[1] & j) >> k, (cols[2] & j) >> k, (cols[3] & j) >> k, (cols[4] & j) >> k, (cols[5] & j) >> k, (cols[6] & j) >> k, (cols[7] & j) >> k])
return m
def gen_matrix_based_table(table):
return matrix_from_cols([table[0x80] ^ table[0], table[0x40] ^ table[0], table[0x20] ^ table[0], table[0x10] ^ table[0], table[0x08] ^ table[0], table[0x04] ^ table[0], table[0x02] ^ table[0], table[0x01] ^ table[0]])
def gen_matrix_based_high_low(high, low):
table = []
for i in range(16):
for j in range(16):
table.append(high[i] ^ low[j])
return gen_matrix_based_table(table)
print(gen_matrix_based_high_low([0x00,0x50,0x14,0x44,0x89,0xd9,0x9d,0xcd,0xde,0x8e,0xca,0x9a,0x57,0x07,0x43,0x13], [0xd3,0x59,0x38,0xb2,0xcc,0x46,0x27,0xad,0x36,0xbc,0xdd,0x57,0x29,0xa3,0xc2,0x48]))
16字节State是这样存储的:
STATE先逆ShiftRows, 再ShiftRows回到初始STATE。
|
逆ShiftRows后=> |
|
再ShiftRows后=> |
|