深入Pyd逆向
字数 1504 2025-08-22 12:22:42
PYD文件逆向分析全面指南
1. PYD文件基础认知
PYD文件是Python的扩展模块,本质上是Windows动态链接库(DLL)的一种特殊形式,与.so文件(Linux)和.dll文件(Windows)类似。与PYC文件不同,PYD文件无法直接恢复源码,必须通过逆向工程手段分析其逻辑。
关键特性:
- 编译生成,非解释执行
- 包含机器码而非字节码
- 需要逆向分析而非反编译
- 通常用于性能敏感或需要保护的核心逻辑
2. PYD文件编译实践
2.1 示例代码准备
test.py:
import base64
key = [ord(i) for i in "key"]
res = "GVhilbQktqU="
def check(flag):
if len(flag) != 16:
return False
flag = int(flag, 16)
flag = flag.to_bytes(8, 'big')
flag = encrypt(flag, key)
flag = base64.b64encode(bytes(flag)).decode()
if flag == res:
return True
return False
def encrypt(ptext, key):
S = list(range(256))
j = 0
out = []
for i in range(256):
j = (j + S[i] + key[i % len(key)]) % 256
S[i], S[j] = S[j], S[i]
i = j = 0
for t in ptext:
i = (i + 1) % 256
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
out.append(t ^ S[(S[i] + S[j]) % 256])
return out
def logic():
flag = input("Enter flag: ")
if check(flag):
print("Correct flag!")
else:
print("Wrong flag!")
__test__ = {}
setup.py:
from setuptools import setup, Extension
from Cython.Build import cythonize
ext_modules = [
Extension(
name="test",
sources=["test.py"],
extra_compile_args=["/Zi"],
extra_link_args=["/DEBUG"]
)
]
setup(
name="test",
ext_modules=cythonize(ext_modules, annotate=True),
)
2.2 编译命令
python setup.py build_ext --inplace
编译后生成:
- test.pyd - 编译后的扩展模块
- test.pcb - 调试信息文件
3. PYD文件逆向分析关键点
3.1 关键初始化函数
PYD文件包含多个初始化函数,分析时应重点关注:
-
__Pyx_CreateStringTabAndInitStrings
- 初始化所有字符串常量
- 特征:
_pyx_mstate_global->__pyx_n_s_+ 字符串名 - 包含程序字符串、变量名和模块属性
-
__Pyx_InitCachedBuiltins
- 初始化Python内置函数引用
- 示例:
_pyx_builtin_input = _Pyx_PyObject_GetAttrStrNoError(_pyx_mstate_global->__pyx_b, pyx_n_s_input)
-
__Pyx_InitConstants
- 初始化整数常量
- 示例:
_pyx_mstate_global->__pyx_int_0 = PyLong_FromLong(0LL)
-
__Pyx_InitCachedConstants
- 初始化元组等复合常量
- 示例:
_pyx_mstate_global->__pyx_tuple_ = PyTuple_Pack(2LL, __pyx_int_8, __pyx_n_s_big)
-
PyInit_test
- 模块主初始化函数
- 通过
__pyx_pymod_exec_test执行 - 负责导入依赖库和设置全局变量
3.2 全局变量存储结构
_pyx_mstate_global->__pyx_d- 模块的字典对象,存储全局变量_pyx_mstate_global->__pyx_b- builtins模块引用_pyx_mstate_global->__pyx_int_*- 各种整数常量
4. 核心逻辑逆向分析
4.1 主逻辑流程
-
输入获取:
v5 = _Pyx_PyObject_Call(_pyx_builtin_input, _pyx_mstate_global->__pyx_tuple__3, a3); -
check函数调用:
AttrStrNoError = _PyDict_GetItem_KnownHash(_pyx_mstate_global->__pyx_d, pyx_n_s_check, ...); v20 = _Pyx_PyObject_FastCallDict(AttrStrNoError, &__pyx_callargs[-v16 + 1], ...); -
结果判断:
IsTrue = v20 == (PyObject*)&_Py_TrueStruct; if (IsTrue) { v24 = _Pyx_PyObject_Call(_pyx_builtin_print, _pyx_mstate_global->__pyx_tuple__4, v21); } else { v24 = _Pyx_PyObject_Call(_pyx_builtin_print, _pyx_mstate_global->__pyx_tuple__5, v21); }
4.2 check函数逆向
-
长度检查:
v3 = PyObject_Size(__pyx_self); if (v3 != 16) goto LABEL_4; -
十六进制转换:
v8 = PyTuple_New(2LL); v8[1].ob_type = v2; // flag v8[2].ob_refcnt = (__int64)__pyx_int_16; // base 16 v12 = _Pyx_PyObject_Call(PyLong_Type, v8, v9); -
字节转换:
Attr = PyObject_GetAttr(v12, pyx_n_s_to_bytes); v19 = _Pyx_PyObject_Call(Attr, _pyx_mstate_global->__pyx_tuple_, v18); -
加密调用:
Item_KnownHash = _PyDict_GetItem_KnownHash(_pyx_mstate_global->__pyx_d, v24, ...); BuiltinName = _PyDict_GetItem_KnownHash(v21->__pyx_d, v30, ...); v36 = _Pyx_PyObject_FastCallDict(AttrStrNoError, &__pyx_callargs[-v33 + 1], ...); -
Base64编码:
v43 = _PyDict_GetItem_KnownHash(_pyx_mstate_global->__pyx_d, v42, ...); v47 = PyObject_GetAttr(AttrStrNoError, pyx_n_s_b64encode); v49 = _Pyx_PyObject_FastCallDict(v47, &__pyx_callargs[-v53 - 1], ...); v58 = PyObject_GetAttr(AttrStrNoError, pyx_n_s_decode); v65 = _Pyx_PyObject_FastCallDict(v49, &args[-v61], v62, v59); -
结果比较:
v2 = v65; AttrStrNoError = _PyDict_GetItem_KnownHash(_pyx_mstate_global->__pyx_d, v69, ...); v49 = PyObject_RichCompare(v2, AttrStrNoError, 2LL); // 2表示==操作 IsTrue = _Pyx_PyObject_IsTrue(v49);
4.3 encrypt函数逆向
-
S盒初始化:
v9 = _Pyx_PyObject_Call(_pyx_builtin_range, _pyx_mstate_global->__pyx_tuple__2, __pyx_self); pyx_int_1 = PySequence_List(v9); -
S盒置换:
for (i = 0; i < 256; ++i) { v19 = PyLong_FromLong(i); v20 = (PyObject*)v19; Item = _Pyx_PyObject_GetItem(v13, v20); v9 = PyNumber_Add(pyx_int_0, Item); v23 = PyObject_Size(__pyx_self); v24 = PyLong_FromSsize_t(v23); v4 = PyNumber_Remainder(v6, v24); pyx_int_1 = _Pyx_PyObject_GetItem(__pyx_self, v4); v4 = PyNumber_Add(v9, pyx_int_1); v27 = _Pyx_PyInt_RemainderObjC(v4, _pyx_mstate_global->__pyx_int_256, ...); pyx_int_1 = _Pyx_PyObject_GetItem(obj, v27); v4 = _Pyx_PyObject_GetItem(obj, (PyObject*)v6); PyObject_SetItem(obj, v6, pyx_int_1); PyObject_SetItem(obj, pyx_int_0, v4); } -
加密过程:
Iter = PyObject_GetIter(__pyx_v_key); while (1) { v39 = Iter->tp_iternext(Iter); if (!v39) break; pyx_int_1 = _pyx_mstate_global->__pyx_int_1; v48 = _Pyx_PyInt_RemainderObjC(pyx_int_1, _pyx_mstate_global->__pyx_int_256, ...); v50 = _Pyx_PyObject_GetItem(obj, v48); pyx_int_1 = PyNumber_Add(pyx_int_0, v50); v53 = _Pyx_PyInt_RemainderObjC(pyx_int_1, _pyx_mstate_global->__pyx_int_256, ...); pyx_int_0 = v53; v55 = v53; v9 = _Pyx_PyObject_GetItem(obj, v53); pyx_int_1 = _Pyx_PyObject_GetItem(obj, (PyObject*)v6); PyObject_SetItem(obj, v6, v9); PyObject_SetItem(obj, v55, pyx_int_1); pyx_int_1 = _Pyx_PyObject_GetItem(obj, (PyObject*)v6); v56 = _Pyx_PyObject_GetItem(obj, v55); v5 = PyNumber_Add(pyx_int_1, v56); v9 = _Pyx_PyInt_RemainderObjC(v5, _pyx_mstate_global->__pyx_int_256, ...); v5 = _Pyx_PyObject_GetItem(obj, v9); v9 = PyNumber_Xor(v65, v5); PyList_Append(v64, v9); }
5. 实战案例分析:TEA算法逆向
5.1 题目分析
原始Python代码:
import ez
flag = input()
flag1 = list(flag)
value = []
b = 0
ck = 0
if len(flag1) == 24:
for i in range(0, len(flag1), 4):
b = (ord(flag1[i]) << 16) | (ord(flag1[i+1]) << 8) | ord(flag1[i+2]) | ord(flag1[i+3])
value.append(b)
key = [102, 108, 97, 103]
flag_encrypt = []
for i in range(0, 6, 2):
res = ez.encrypt(value[i], value[i+1], key)
flag_encrypt.append(res)
ck = ez.check(flag_encrypt)
if ck == 3:
print('yes!!!,you get right flag')
else:
print('wrong!!!')
else:
print('wrong!!!')
5.2 加密函数逆向
还原后的Python代码:
def encrypt(v0, v1, key):
v0 = ctypes.c_uint32(v0)
v1 = ctypes.c_uint32(v1)
num = ctypes.c_uint32(0)
for i in range(64):
t1 = v1.value + (v1.value << 3 ^ v1.value >> 6)
t2 = num.value + key[num.value & 3]
t3 = t1 ^ t2
v0.value = v0.value + t3
num.value = num.value + 1415865428
t1 = v0.value + (v0.value << 3 ^ v0.value >> 6)
t2 = num.value + key[num.value >> 11 & 3]
t3 = t1 ^ t2
v1.value = v1.value + t3
return v0.value, v1.value
5.3 解密函数实现
def decrypt(v0, v1, key):
num = 0
delta = 0x54646454
for i in range(64):
num += delta
num = num & 0xFFFFFFFF
for i in range(64):
t1 = (v0 + ((v0 << 3) ^ (v0 >> 6))) & 0xFFFFFFFF
t2 = (num + key[(num >> 11) & 3]) & 0xFFFFFFFF
t3 = t1 ^ t2
v1 = v1 - t3
v1 &= 0xFFFFFFFF
num = num - delta
num = num & 0xFFFFFFFF
t1 = (v1 + ((v1 << 3) ^ (v1 >> 6))) & 0xFFFFFFFF
t2 = (num + key[num & 3]) & 0xFFFFFFFF
t3 = t1 ^ t2
v0 = v0 - t3
v0 &= 0xFFFFFFFF
return v0, v1
5.4 完整解密流程
key = [102, 108, 97, 103]
result = [2129084917, 4147024244, 2971480309, 1606961219, 710575902, 4012634395]
flag = []
for i in range(3):
v0, v1 = result[i*2], result[i*2+1]
flag += decrypt(v0, v1, key)
flag_str = ""
for i in flag:
flag_str += i.to_bytes(4, "big").decode()
print(flag_str)
6. 逆向技巧总结
- 符号恢复:使用已知的PYD文件恢复符号,便于分析
- 字符串追踪:通过
__pyx_n_s_前缀查找关键字符串 - 函数定位:通过导入函数名(如
ez.encrypt)查找对应实现 - 类型识别:注意
ctypes.c_uint32等类型转换操作 - 常量提取:从初始化函数中提取关键常量(如delta值)
- 算法识别:熟悉常见加密算法特征(TEA、RC4等)
通过系统性地分析初始化函数、跟踪数据流和识别算法模式,可以有效逆向PYD文件中的核心逻辑。