前言
第四届上海大学生网络安全大赛中有一个 misc easy_py ,给了一个 pyc 文件,打比赛期间现场学了一下 (其实学了好久,这道题真是折腾死我了,不过还好学到了一点东西)
题目
题目给了一个 pyc 文件,最开始进行反编译,但是发现题目不能反编译出来。文件是被破坏了,M4x 说应该跟 python opcode
有关
dis marshal 库
在 Hcamael 师傅这里找到了一些有用的 python 库,用来反编译 pyc 文件,得到一些 pyc 文件信息。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
>>> import dis, marshal
>>> f = open("./easy_py.pyc.bak") # 我备份一份最初的 pyc 文件
>>> f.read(4) # 读出文件开头的 magic 表示 python 编译的版本
'\x03\xf3\r\n'
>>> f.read(4) # 读出文件开头的时间信息
'\xbe\xbc\xce['
>>> code = marshal.load(f) # 我试过不读出前面的 magic 和 时间信息,这里会无法运行
>>> dis.disassemble_string(code.co_code) # 查看类似汇编一样的 python 的 opcode
0 JUMP_ABSOLUTE 6
3 LOAD_CONST 13091 (13091)
>> 6 JUMP_ABSOLUTE 9
>> 9 LOAD_CONST 0 (0) # push co_consts[consti] onto the stack
12 LOAD_CONST 1 (1)
15 LOAD_CONST 2 (2)
18 LOAD_CONST 3 (3)
21 LOAD_CONST 4 (4)
24 LOAD_CONST 5 (5)
27 LOAD_CONST 2 (2)
30 LOAD_CONST 6 (6)
33 LOAD_CONST 6 (6)
36 LOAD_CONST 7 (7)
39 LOAD_CONST 8 (8)
42 LOAD_CONST 9 (9)
45 LOAD_CONST 10 (10)
48 LOAD_CONST 11 (11)
51 LOAD_CONST 12 (12)
54 BUILD_LIST 15
57 STORE_NAME 0 (0) # 存储进名字
60 LOAD_NAME 1 (1)
63 CALL_FUNCTION 0
66 STORE_NAME 2 (2)
69 LOAD_CONST 0 (0)
72 STORE_NAME 3 (3)
75 SETUP_LOOP 91 (to 169) # for 循环 到 169 排的指令
78 LOAD_NAME 2 (2)
81 GET_ITER
82 FOR_ITER 83 (to 168)
85 STORE_NAME 4 (4)
88 LOAD_NAME 5 (5)
91 LOAD_NAME 4 (4)
94 CALL_FUNCTION 1
97 UNARY_INVERT # 取反
98 LOAD_CONST 13 (13)
101 BINARY_AND
102 LOAD_NAME 5 (5)
105 LOAD_NAME 4 (4)
108 CALL_FUNCTION 1
111 LOAD_CONST 18 (18)
114 BINARY_AND
115 BINARY_OR
116 STORE_NAME 4 (4)
119 LOAD_NAME 4 (4)
122 LOAD_NAME 0 (0)
125 LOAD_NAME 3 (3)
128 BINARY_SUBSCR # 对某个列表取第几位的值
129 COMPARE_OP 2 (==)
132 POP_JUMP_IF_FALSE 144
135 LOAD_NAME 3 (3)
138 UNARY_NEGATIVE # TOS -= TOSI
139 LOAD_CONST 14 (14)
142 BINARY_ADD
143 UNARY_NEGATIVE
>> 144 STORE_NAME 3 (3)
147 JUMP_ABSOLUTE 73
150 JUMP_ABSOLUTE 73
153 LOAD_CONST 15 (15)
156 PRINT_ITEM
157 PRINT_NEWLINE
158 LOAD_NAME 6 (6)
161 CALL_FUNCTION 0
164 POP_TOP
165 JUMP_ABSOLUTE 73
>> 168 POP_BLOCK
>> 169 LOAD_CONST 16 (16)
172 PRINT_ITEM
173 PRINT_NEWLINE
174 LOAD_CONST 17 (17)
177 RETURN_VALUE
>>> code.co_consts # 局部常量名
(0, 10, 7, 1, 29, 14, 22, 31, 57, 30, 9, 52, 27, 102, -1, 'wrong', 'right', None, -103)
>>> code.co_names # 当前对象中是用的对象名
('cmp', 'raw_input', 'flag', 'm', 'i', 'ord', 'exit')
思路
感觉这道题并不是指令缺少的错误,应该是文件哪个地方被损坏,导致不能正常运行
一开始觉得这道题是 misc 的题,应该就是修复哪里,就可以得到可运行的文件
这道题目很明显最有问题是 3 LOAD_CONST 13091 (13091)
这一排(看出来的原因,是搜了一些这类的 opcode 代码查看一下,明显 co.co_consts 没有这样一个 const)
而且运行的时候,报错如下
1
2
3
4
5
6
$ ./easy_py.pyc.bak
a
XXX lineno: 4, opcode: 3
Traceback (most recent call last):
File "easy_py.py", line 4, in <module>
SystemError: unknown opcode
最开始的时候写代码更改参数 LOAD_CONST X(Y) 没成功, 更改指令也没成功
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
$ hd ./easy_py.pyc.bak
00000000 03 f3 0d 0a be bc ce 5b 63 00 00 00 00 00 00 00 |.......[c.......|
00000010 00 0f 00 00 00 40 00 00 00 73 b2 00 00 00 71 06 |.....@...s....q.|
00000020 00 64 23 33 71 09 00 64 00 00 64 01 00 64 02 00 |.d#3q..d..d..d..|
00000030 64 03 00 64 04 00 64 05 00 64 02 00 64 06 00 64 |d..d..d..d..d..d|
00000040 06 00 64 07 00 64 08 00 64 09 00 64 0a 00 64 0b |..d..d..d..d..d.|
00000050 00 64 0c 00 67 0f 00 5a 00 00 65 01 00 83 00 00 |.d..g..Z..e.....|
00000060 5a 02 00 64 00 00 5a 03 00 78 5b 00 65 02 00 44 |Z..d..Z..x[.e..D|
00000070 5d 53 00 5a 04 00 65 05 00 65 04 00 83 01 00 0f |]S.Z..e..e......|
00000080 64 0d 00 40 65 05 00 65 04 00 83 01 00 64 12 00 |d..@e..e.....d..|
00000090 40 42 5a 04 00 65 04 00 65 00 00 65 03 00 19 6b |@BZ..e..e..e...k|
000000a0 02 00 72 90 00 65 03 00 0b 64 0e 00 17 0b 5a 03 |..r..e...d....Z.|
000000b0 00 71 49 00 71 49 00 64 0f 00 47 48 65 06 00 83 |.qI.qI.d..GHe...|
000000c0 00 00 01 71 49 00 57 64 10 00 47 48 64 11 00 53 |...qI.Wd..GHd..S|
000000d0 28 13 00 00 00 69 00 00 00 00 69 0a 00 00 00 69 |(....i....i....i|
000000e0 07 00 00 00 69 01 00 00 00 69 1d 00 00 00 69 0e |....i....i....i.|
000000f0 00 00 00 69 16 00 00 00 69 1f 00 00 00 69 39 00 |...i....i....i9.|
00000100 00 00 69 1e 00 00 00 69 09 00 00 00 69 34 00 00 |..i....i....i4..|
00000110 00 69 1b 00 00 00 69 66 00 00 00 69 ff ff ff ff |.i....if...i....|
00000120 74 05 00 00 00 77 72 6f 6e 67 74 05 00 00 00 72 |t....wrongt....r|
00000130 69 67 68 74 4e 69 99 ff ff ff 28 07 00 00 00 74 |ightNi....(....t|
00000140 03 00 00 00 63 6d 70 74 09 00 00 00 72 61 77 5f |....cmpt....raw_|
00000150 69 6e 70 75 74 74 04 00 00 00 66 6c 61 67 74 01 |inputt....flagt.|
00000160 00 00 00 6d 74 01 00 00 00 69 74 03 00 00 00 6f |...mt....it....o|
00000170 72 64 74 04 00 00 00 65 78 69 74 28 00 00 00 00 |rdt....exit(....|
00000180 28 00 00 00 00 28 00 00 00 00 73 0a 00 00 00 65 |(....(....s....e|
00000190 61 73 79 5f 70 79 2e 70 79 74 08 00 00 00 3c 6d |asy_py.pyt....<m|
000001a0 6f 64 75 6c 65 3e 01 00 00 00 73 14 00 00 00 33 |odule>....s....3|
000001b0 01 09 01 06 01 0d 01 1f 01 10 01 0c 01 06 02 05 |................|
000001c0 01 0b 02 |...|
000001c3
即这里的 0x21 022 0x23 这几位,虽然没有能反编译出来正确的文件,但是在 uncompyle2 ./easy_py.pyc > easy_py.py
出现了能看的指令代码 (这是我在 nop 掉 3 4 5 之后得到的结果)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
$ uncompyle2 ./easy_py.pyc > easy_py.py
### Can't uncompyle ./easy_py.pyc
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/uncompyle2/__init__.py", line 197, in main
uncompyle_file(infile, outstream, showasm, showast, deob)
File "/usr/local/lib/python2.7/dist-packages/uncompyle2/__init__.py", line 130, in uncompyle_file
uncompyle(version, co, outstream, showasm, showast, deob)
File "/usr/local/lib/python2.7/dist-packages/uncompyle2/__init__.py", line 98, in uncompyle
ast = walker.build_ast(tokens, customize)
File "/usr/local/lib/python2.7/dist-packages/uncompyle2/Walker.py", line 1471, in build_ast
raise ParserError(e, tokens)
ParserError: --- This code section failed: ---
0 JUMP_ABSOLUTE '6'
3 NOP None
4 NOP None
5 NOP None
6 JUMP_ABSOLUTE '9'
9 LOAD_CONST 0
12 LOAD_CONST 10
15 LOAD_CONST 7
18 LOAD_CONST 1
21 LOAD_CONST 29
24 LOAD_CONST 14
27 LOAD_CONST 7
30 LOAD_CONST 22
33 LOAD_CONST 22
36 LOAD_CONST 31
39 LOAD_CONST 57
42 LOAD_CONST 30
45 LOAD_CONST 9
48 LOAD_CONST 52
51 LOAD_CONST 27
54 BUILD_LIST_15 None
57 STORE_NAME 'cmp'
60 LOAD_NAME 'raw_input'
63 CALL_FUNCTION_0 None
66 STORE_NAME 'flag'
69 LOAD_CONST 0
72 STORE_NAME 'm'
75 SETUP_LOOP '169'
78 LOAD_NAME 'flag'
81 GET_ITER None
82 FOR_ITER '168'
85 STORE_NAME 'i'
88 LOAD_NAME 'ord'
91 LOAD_NAME 'i'
94 CALL_FUNCTION_1 None
97 UNARY_INVERT None
98 LOAD_CONST 102
101 BINARY_AND None
102 LOAD_NAME 'ord'
105 LOAD_NAME 'i'
108 CALL_FUNCTION_1 None
111 LOAD_CONST -103
114 BINARY_AND None
115 BINARY_OR None
116 STORE_NAME 'i'
119 LOAD_NAME 'i'
122 LOAD_NAME 'cmp'
125 LOAD_NAME 'm'
128 BINARY_SUBSCR None
129 COMPARE_OP '=='
132 POP_JUMP_IF_FALSE '144'
135 LOAD_NAME 'm'
138 UNARY_NEGATIVE None
139 LOAD_CONST -1
142 BINARY_ADD None
143 UNARY_NEGATIVE None
144 STORE_NAME 'm'
147 JUMP_BACK '73'
150 JUMP_BACK '73'
153 LOAD_CONST 'wrong'
156 PRINT_ITEM None
157 PRINT_NEWLINE_CONT None
158 LOAD_NAME 'exit'
161 CALL_FUNCTION_0 None
164 POP_TOP None
165 JUMP_BACK '73'
168 POP_BLOCK None
169_0 COME_FROM '75'
169 LOAD_CONST 'right'
172 PRINT_ITEM None
173 PRINT_NEWLINE_CONT None
Syntax error at or near `NOP' token at offset 3
还原程序
最后读指令粗略还原程序代码
1
2
3
4
5
6
7
8
9
10
11
12
13
#!/usr/bin/env python
# -*- coding: utf-8 -*-
flag = ""
cmp = [0, 10, 7, 1, 29, 14, 7, 22, 22, 31, 57, 30, 9, 52, 27]
m = 0
for i in flag:
i = (~ord(i) & (102)) | (ord(i) & (-103&0xff))
if( i == cmp[m] ):
break
m += 1
re 脚本
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#!/usr/bin/env python
# -*- coding: utf-8 -*-
cmp = [0, 10, 7, 1, 29, 14, 7, 22, 22, 31, 57, 30, 9, 52, 27]
flag = ''
for i in range(15):
x = 0
while(1):
if ((~(x) & (102)) | (x & (-103&0xff))) == cmp[i]:
break
x += 1
flag += chr(x)
print(flag)
# flag{happy_xoR}
还是不明白我试过删除那几位字符为什么不能反编译出来
最后
感觉这道题还是很有意思的,主要以前也想去了解一下高级语言是怎么架在低级语言上面的,怎么实现的,这道题刚好需要这样的知识
https://0x48.pw/2017/03/20/0x2f/
https://docs.python.org/2/library/dis.html