开发简单的PHP混淆器与解混淆器
字数 810 2025-08-20 18:17:07
PHP代码混淆与解混淆技术详解
概述
本文详细讲解如何开发PHP代码混淆器与解混淆器,基于PHP-Parser库实现代码的抽象语法树(AST)转换。
基本原理
编译器前端概念
混淆器开发类似于编译器开发,主要分为两个部分:
-
编译器前端:负责代码解析
- 词法分析:将字符流转换为词素(lexeme)和词法单元(token)
- 语法分析:将token序列转换为抽象语法树(AST)
-
编译器后端:负责代码生成
PHP解析工具
PHP提供了以下工具帮助我们处理代码:
token_get_all():将PHP代码转换为token数组php-parser库:将PHP代码解析为AST
基础实现
变量重命名示例
$file = file_get_contents($path);
$variable = 0;
$map = [];
$tokens = token_get_all($file);
foreach ($tokens as $token) {
if ($token[0] === T_VARIABLE) {
if (!isset($map[$token[1]])) {
if (!preg_match('/^\$[a-zA-Z0-9_]+$/', $token[1])) {
$file = str_replace($token[1], '$v' . $variable++, $file);
$map[$token[1]] = $variable;
}
}
}
}
基于AST的基本框架
use PhpParser\Parser;
use PhpParser\ParserFactory;
use PhpParser\NodeTraverser;
use PhpParser\NodeVisitor\NameResolver;
use PhpParser\PrettyPrinter\Standard;
require './vendor/autoload.php';
// 初始化解析器
$parser = (new ParserFactory())->create(ParserFactory::PREFER_PHP7);
// 将代码解析成AST
$ast = $parser->parse(file_get_contents('test/test1.php'));
$traverser = new NodeTraverser();
// 注册自定义Visitor
$traverser->addVisitor(new CustomVisitor($parser));
// 开始遍历AST
$ast = $traverser->traverse($ast);
// 将AST转换成代码
$prettyPrinter = new Standard();
$ret = $prettyPrinter->prettyPrint($ast);
echo '<?php ' . $ret;
混淆技术实现
字符串ROT13混淆
将字符串替换为ROT13编码的函数调用:
class StringToROT13 extends NodeVisitorAbstract
{
public function leaveNode(Node $node)
{
if ($node instanceof Node\Scalar\String_) {
$name = $node->value;
return new Expr\FuncCall(
new Node\Name("str_rot13"),
[new Node\Arg(new Node\Scalar\String_(str_rot13($name)))]
);
}
}
}
常量数组混淆
将分散的常量赋值转换为数组引用:
class ConstantToArray extends NodeVisitorAbstract
{
private $_variableName = '';
private $_constants = [];
private $_inStatic = false;
public function __construct($_parser)
{
$this->_variableName = generate_random_variable(5);
$this->_parser = $_parser;
}
public function afterTraverse(array $nodes)
{
$keys = [];
foreach ($this->_constants as $key => $value) {
$keys[] = unserialize($key);
}
$items = base64_encode(serialize($keys));
$nodes = array_merge($this->_parser->parse(
"<?php \${$this->_variableName}=unserialize(base64_decode('$items'));"
), $nodes);
return $nodes;
}
public function enterNode(Node $node)
{
if ($node instanceof Node\Stmt\Function_) {
$global = new Node\Stmt\Global_([new Expr\Variable($this->_variableName)]);
array_unshift($node->stmts, $global);
}
if ($node instanceof Node\Param || $node instanceof Node\Stmt\Static_) {
$this->_inStatic = true;
}
}
public function leaveNode(Node $node)
{
if ($node instanceof Node\Param || $node instanceof Node\Stmt\Static_) {
$this->_inStatic = false;
}
if ($this->_inStatic) {
return;
}
if ($node instanceof Node\Scalar && (!$node instanceof Node\Scalar\MagicConst)) {
$name = serialize($node->value);
if (!isset($this->_constants[$name])) {
$this->_constants[$name] = count($this->_constants);
}
return new Expr\ArrayDimFetch(
new Expr\Variable($this->_variableName),
Node\Scalar\LNumber::fromString($this->_constants[$name])
);
}
if ($node instanceof Node\Expr\ConstFetch && $node->name instanceof Node\Name && count($node->name->parts) === 1) {
$name = $node->name->parts[0];
switch (strtolower($name)) {
case 'true': $name = true; break;
case 'false': $name = false; break;
case 'null': $name = null; break;
default: return;
}
$name = serialize($name);
if (!isset($this->_constants[$name])) {
$this->_constants[$name] = count($this->_constants);
}
return new Expr\ArrayDimFetch(
new Expr\Variable($this->_variableName),
Node\Scalar\LNumber::fromString($this->_constants[$name])
);
}
}
}
解混淆技术实现
ROT13字符串还原
class ROT13ToString extends NodeVisitorAbstract
{
public function leaveNode(Node $node)
{
if ($node instanceof Node\Expr\FuncCall &&
$node->name instanceof Node\Name &&
$node->name->parts[0] == 'str_rot13' &&
$node->args[0]->value instanceof Node\Scalar\String_
) {
$value = $node->args[0]->value->value;
return new Node\Scalar\String_(str_rot13($value));
}
}
}
常量数组还原
class ArrayToConstant extends NodeVisitorAbstract
{
private $_variableName = '';
private $_constants = [];
public function enterNode(Node $node)
{
if ($node instanceof Node\Expr\Assign &&
$node->expr instanceof Node\Expr\FuncCall &&
$node->expr->name instanceof Node\Name &&
$node->expr->name->parts[0] == 'unserialize' &&
count($node->expr->args) === 1 &&
$node->expr->args[0] instanceof Node\Arg &&
$node->expr->args[0]->value instanceof Node\Expr\FuncCall &&
$node->expr->args[0]->value->name instanceof Node\Name &&
$node->expr->args[0]->value->name->parts[0] == 'base64_decode'
) {
$string = $node->expr->args[0]->value->args[0]->value->value;
$array = unserialize(base64_decode($string));
$this->_variableName = $node->var->name;
$this->_constants = $array;
return new Node\Expr\Assign($node->var, Node\Scalar\LNumber::fromString("0"));
}
}
public function leaveNode(Node $node)
{
if ($this->_variableName === '') return;
if ($node instanceof Node\Expr\ArrayDimFetch && $node->var->name === $this->_variableName) {
$val = $this->_constants[$node->dim->value];
if (is_string($val)) {
return new Node\Scalar\String_($val);
} elseif (is_double($val)) {
return new Node\Scalar\DNumber($val);
} elseif (is_int($val)) {
return new Node\Scalar\LNumber($val);
} else {
return new Node\Expr\ConstFetch(new Node\Name\FullyQualified(json_encode($val)));
}
}
}
}
开发注意事项
混淆器开发要点
- 收集大量PHP样本,处理各种语法兼容问题
- 利用信息不对称性,将混淆代码与业务代码混合
- 尽可能破坏原始代码结构,去除可识别信息(如变量名)
解混淆器开发要点
- 准确识别混淆模式及其依赖的外部信息
- 提取运行时才能获取的密钥和数据
- 当信息无法完全恢复时,使用规则还原近似信息
高级混淆技术
- 控制流混淆:打乱代码执行流程,如yakpro-po工具
- 模块化设计:使用组合模式,便于维护和扩展
- 多层混淆:多种混淆技术叠加使用
总结
本文详细介绍了PHP代码混淆与解混淆的核心技术,包括:
- 基于AST的代码转换原理
- 基础混淆技术实现(变量重命名、字符串编码)
- 高级混淆技术(常量数组转换)
- 对应的解混淆技术实现
- 开发中的注意事项和最佳实践
通过合理运用这些技术,可以开发出比市面上大多数混淆器更强大的工具。解混淆器的开发思路与混淆器类似,但侧重点不同,需要针对特定混淆模式进行精确识别和还原。