python opcode

pyc; opcode

Posted by Vang3lis on November 5, 2018

前言

第四届上海大学生网络安全大赛中有一个 misc easy_py ,给了一个 pyc 文件,打比赛期间现场学了一下 (其实学了好久,这道题真是折腾死我了,不过还好学到了一点东西)

题目

题目给了一个 pyc 文件,最开始进行反编译,但是发现题目不能反编译出来。文件是被破坏了,M4x 说应该跟 python opcode 有关

dis marshal 库

Hcamael 师傅这里找到了一些有用的 python 库,用来反编译 pyc 文件,得到一些 pyc 文件信息。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
>>> import dis, marshal
>>> f = open("./easy_py.pyc.bak")                 # 我备份一份最初的 pyc 文件
>>> f.read(4)                                     # 读出文件开头的 magic 表示 python 编译的版本
'\x03\xf3\r\n'
>>> f.read(4)                                     # 读出文件开头的时间信息
'\xbe\xbc\xce['
>>> code = marshal.load(f)                        # 我试过不读出前面的 magic 和 时间信息,这里会无法运行
>>> dis.disassemble_string(code.co_code)          # 查看类似汇编一样的 python 的 opcode 
          0 JUMP_ABSOLUTE       6
          3 LOAD_CONST      13091 (13091)         
    >>    6 JUMP_ABSOLUTE       9
    >>    9 LOAD_CONST          0 (0)             # push co_consts[consti] onto the stack
         12 LOAD_CONST          1 (1)
         15 LOAD_CONST          2 (2)
         18 LOAD_CONST          3 (3)
         21 LOAD_CONST          4 (4)
         24 LOAD_CONST          5 (5)
         27 LOAD_CONST          2 (2)
         30 LOAD_CONST          6 (6)
         33 LOAD_CONST          6 (6)
         36 LOAD_CONST          7 (7)
         39 LOAD_CONST          8 (8)
         42 LOAD_CONST          9 (9)
         45 LOAD_CONST         10 (10)
         48 LOAD_CONST         11 (11)
         51 LOAD_CONST         12 (12)
         54 BUILD_LIST         15
         57 STORE_NAME          0 (0)             # 存储进名字
         60 LOAD_NAME           1 (1)
         63 CALL_FUNCTION       0
         66 STORE_NAME          2 (2)
         69 LOAD_CONST          0 (0)
         72 STORE_NAME          3 (3)
         75 SETUP_LOOP         91 (to 169)        # for 循环 到 169 排的指令
         78 LOAD_NAME           2 (2)
         81 GET_ITER
         82 FOR_ITER           83 (to 168)
         85 STORE_NAME          4 (4)
         88 LOAD_NAME           5 (5)
         91 LOAD_NAME           4 (4)
         94 CALL_FUNCTION       1
         97 UNARY_INVERT                          # 取反
         98 LOAD_CONST         13 (13)
        101 BINARY_AND             
        102 LOAD_NAME           5 (5)
        105 LOAD_NAME           4 (4)
        108 CALL_FUNCTION       1
        111 LOAD_CONST         18 (18)
        114 BINARY_AND
        115 BINARY_OR
        116 STORE_NAME          4 (4)
        119 LOAD_NAME           4 (4)
        122 LOAD_NAME           0 (0)
        125 LOAD_NAME           3 (3)
        128 BINARY_SUBSCR                         # 对某个列表取第几位的值
        129 COMPARE_OP          2 (==)
        132 POP_JUMP_IF_FALSE   144               
        135 LOAD_NAME           3 (3)
        138 UNARY_NEGATIVE                        # TOS -= TOSI
        139 LOAD_CONST         14 (14)
        142 BINARY_ADD
        143 UNARY_NEGATIVE
    >>  144 STORE_NAME          3 (3)
        147 JUMP_ABSOLUTE      73
        150 JUMP_ABSOLUTE      73
        153 LOAD_CONST         15 (15)
        156 PRINT_ITEM
        157 PRINT_NEWLINE
        158 LOAD_NAME           6 (6)
        161 CALL_FUNCTION       0
        164 POP_TOP
        165 JUMP_ABSOLUTE      73
    >>  168 POP_BLOCK
    >>  169 LOAD_CONST         16 (16)
        172 PRINT_ITEM
        173 PRINT_NEWLINE
        174 LOAD_CONST         17 (17)
        177 RETURN_VALUE
>>> code.co_consts                                # 局部常量名
(0, 10, 7, 1, 29, 14, 22, 31, 57, 30, 9, 52, 27, 102, -1, 'wrong', 'right', None, -103)
>>> code.co_names                                 # 当前对象中是用的对象名
('cmp', 'raw_input', 'flag', 'm', 'i', 'ord', 'exit')

思路

感觉这道题并不是指令缺少的错误,应该是文件哪个地方被损坏,导致不能正常运行

一开始觉得这道题是 misc 的题,应该就是修复哪里,就可以得到可运行的文件

这道题目很明显最有问题是 3 LOAD_CONST 13091 (13091) 这一排(看出来的原因,是搜了一些这类的 opcode 代码查看一下,明显 co.co_consts 没有这样一个 const)

而且运行的时候,报错如下

1
2
3
4
5
6
$ ./easy_py.pyc.bak
a
XXX lineno: 4, opcode: 3
Traceback (most recent call last):
  File "easy_py.py", line 4, in <module>
SystemError: unknown opcode

最开始的时候写代码更改参数 LOAD_CONST X(Y) 没成功, 更改指令也没成功

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
$ hd ./easy_py.pyc.bak
00000000  03 f3 0d 0a be bc ce 5b  63 00 00 00 00 00 00 00  |.......[c.......|
00000010  00 0f 00 00 00 40 00 00  00 73 b2 00 00 00 71 06  |.....@...s....q.|
00000020  00 64 23 33 71 09 00 64  00 00 64 01 00 64 02 00  |.d#3q..d..d..d..|
00000030  64 03 00 64 04 00 64 05  00 64 02 00 64 06 00 64  |d..d..d..d..d..d|
00000040  06 00 64 07 00 64 08 00  64 09 00 64 0a 00 64 0b  |..d..d..d..d..d.|
00000050  00 64 0c 00 67 0f 00 5a  00 00 65 01 00 83 00 00  |.d..g..Z..e.....|
00000060  5a 02 00 64 00 00 5a 03  00 78 5b 00 65 02 00 44  |Z..d..Z..x[.e..D|
00000070  5d 53 00 5a 04 00 65 05  00 65 04 00 83 01 00 0f  |]S.Z..e..e......|
00000080  64 0d 00 40 65 05 00 65  04 00 83 01 00 64 12 00  |d..@e..e.....d..|
00000090  40 42 5a 04 00 65 04 00  65 00 00 65 03 00 19 6b  |@BZ..e..e..e...k|
000000a0  02 00 72 90 00 65 03 00  0b 64 0e 00 17 0b 5a 03  |..r..e...d....Z.|
000000b0  00 71 49 00 71 49 00 64  0f 00 47 48 65 06 00 83  |.qI.qI.d..GHe...|
000000c0  00 00 01 71 49 00 57 64  10 00 47 48 64 11 00 53  |...qI.Wd..GHd..S|
000000d0  28 13 00 00 00 69 00 00  00 00 69 0a 00 00 00 69  |(....i....i....i|
000000e0  07 00 00 00 69 01 00 00  00 69 1d 00 00 00 69 0e  |....i....i....i.|
000000f0  00 00 00 69 16 00 00 00  69 1f 00 00 00 69 39 00  |...i....i....i9.|
00000100  00 00 69 1e 00 00 00 69  09 00 00 00 69 34 00 00  |..i....i....i4..|
00000110  00 69 1b 00 00 00 69 66  00 00 00 69 ff ff ff ff  |.i....if...i....|
00000120  74 05 00 00 00 77 72 6f  6e 67 74 05 00 00 00 72  |t....wrongt....r|
00000130  69 67 68 74 4e 69 99 ff  ff ff 28 07 00 00 00 74  |ightNi....(....t|
00000140  03 00 00 00 63 6d 70 74  09 00 00 00 72 61 77 5f  |....cmpt....raw_|
00000150  69 6e 70 75 74 74 04 00  00 00 66 6c 61 67 74 01  |inputt....flagt.|
00000160  00 00 00 6d 74 01 00 00  00 69 74 03 00 00 00 6f  |...mt....it....o|
00000170  72 64 74 04 00 00 00 65  78 69 74 28 00 00 00 00  |rdt....exit(....|
00000180  28 00 00 00 00 28 00 00  00 00 73 0a 00 00 00 65  |(....(....s....e|
00000190  61 73 79 5f 70 79 2e 70  79 74 08 00 00 00 3c 6d  |asy_py.pyt....<m|
000001a0  6f 64 75 6c 65 3e 01 00  00 00 73 14 00 00 00 33  |odule>....s....3|
000001b0  01 09 01 06 01 0d 01 1f  01 10 01 0c 01 06 02 05  |................|
000001c0  01 0b 02                                          |...|
000001c3

即这里的 0x21 022 0x23 这几位,虽然没有能反编译出来正确的文件,但是在 uncompyle2 ./easy_py.pyc > easy_py.py 出现了能看的指令代码 (这是我在 nop 掉 3 4 5 之后得到的结果)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
$ uncompyle2 ./easy_py.pyc  > easy_py.py
### Can't uncompyle ./easy_py.pyc
Traceback (most recent call last):
  File "/usr/local/lib/python2.7/dist-packages/uncompyle2/__init__.py", line 197, in main
    uncompyle_file(infile, outstream, showasm, showast, deob)
  File "/usr/local/lib/python2.7/dist-packages/uncompyle2/__init__.py", line 130, in uncompyle_file
    uncompyle(version, co, outstream, showasm, showast, deob)
  File "/usr/local/lib/python2.7/dist-packages/uncompyle2/__init__.py", line 98, in uncompyle
    ast = walker.build_ast(tokens, customize)
  File "/usr/local/lib/python2.7/dist-packages/uncompyle2/Walker.py", line 1471, in build_ast
    raise ParserError(e, tokens)
ParserError: --- This code section failed: ---

0	JUMP_ABSOLUTE     '6'
3	NOP               None
4	NOP               None
5	NOP               None
6	JUMP_ABSOLUTE     '9'
9	LOAD_CONST        0
12	LOAD_CONST        10
15	LOAD_CONST        7
18	LOAD_CONST        1
21	LOAD_CONST        29
24	LOAD_CONST        14
27	LOAD_CONST        7
30	LOAD_CONST        22
33	LOAD_CONST        22
36	LOAD_CONST        31
39	LOAD_CONST        57
42	LOAD_CONST        30
45	LOAD_CONST        9
48	LOAD_CONST        52

51	LOAD_CONST        27
54	BUILD_LIST_15     None
57	STORE_NAME        'cmp'

60	LOAD_NAME         'raw_input'
63	CALL_FUNCTION_0   None

66	STORE_NAME        'flag'
69	LOAD_CONST        0
72	STORE_NAME        'm'
75	SETUP_LOOP        '169'
78	LOAD_NAME         'flag'
81	GET_ITER          None
82	FOR_ITER          '168'
85	STORE_NAME        'i'
88	LOAD_NAME         'ord'
91	LOAD_NAME         'i'
94	CALL_FUNCTION_1   None
97	UNARY_INVERT      None
98	LOAD_CONST        102
101	BINARY_AND        None
102	LOAD_NAME         'ord'
105	LOAD_NAME         'i'
108	CALL_FUNCTION_1   None
111	LOAD_CONST        -103
114	BINARY_AND        None
115	BINARY_OR         None
116	STORE_NAME        'i'
119	LOAD_NAME         'i'
122	LOAD_NAME         'cmp'
125	LOAD_NAME         'm'
128	BINARY_SUBSCR     None
129	COMPARE_OP        '=='
132	POP_JUMP_IF_FALSE '144'
135	LOAD_NAME         'm'

138	UNARY_NEGATIVE    None
139	LOAD_CONST        -1
142	BINARY_ADD        None
143	UNARY_NEGATIVE    None

144	STORE_NAME        'm'
147	JUMP_BACK         '73'
150	JUMP_BACK         '73'
153	LOAD_CONST        'wrong'
156	PRINT_ITEM        None
157	PRINT_NEWLINE_CONT None
158	LOAD_NAME         'exit'
161	CALL_FUNCTION_0   None
164	POP_TOP           None
165	JUMP_BACK         '73'
168	POP_BLOCK         None
169_0	COME_FROM         '75'
169	LOAD_CONST        'right'
172	PRINT_ITEM        None
173	PRINT_NEWLINE_CONT None

Syntax error at or near `NOP' token at offset 3

还原程序

最后读指令粗略还原程序代码

1
2
3
4
5
6
7
8
9
10
11
12
13
#!/usr/bin/env python
# -*- coding: utf-8 -*-

flag = ""

cmp = [0, 10, 7, 1, 29, 14, 7, 22, 22, 31, 57, 30, 9, 52, 27]
m = 0

for i in flag:
    i = (~ord(i) & (102)) | (ord(i) & (-103&0xff))
    if( i == cmp[m] ):
        break
    m += 1

re 脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#!/usr/bin/env python
# -*- coding: utf-8 -*-

cmp = [0, 10, 7, 1, 29, 14, 7, 22, 22, 31, 57, 30, 9, 52, 27]
flag = ''

for i in range(15):
    x = 0
    while(1):
        if ((~(x) & (102)) | (x & (-103&0xff))) == cmp[i]:
            break
        x += 1
    flag += chr(x)
print(flag)

# flag{happy_xoR}

还是不明白我试过删除那几位字符为什么不能反编译出来

最后

感觉这道题还是很有意思的,主要以前也想去了解一下高级语言是怎么架在低级语言上面的,怎么实现的,这道题刚好需要这样的知识

https://0x48.pw/2017/03/20/0x2f/
https://docs.python.org/2/library/dis.html