Ghidra-Processor创建教程——从汇编代码到伪代码
字数 827 2025-08-24 07:48:33
Ghidra Processor创建教程:从汇编代码到伪代码
前言
本教程是Ghidra Processor创建系列的第二部分,重点讲解如何将汇编代码翻译为伪代码(P-code)。在第一部分中,我们已经学习了如何将二进制代码转换为汇编代码,现在我们将在此基础上增加语义解析功能。
P-code简介
Ghidra P-code是专为逆向工程设计的寄存器传输语言,能够对许多不同的处理器进行建模。在创建Ghidra Processor时,将二进制翻译为指令后,下一步就是生成伪代码。这可以通过将指令定义为一系列P-code指令来实现,Ghidra会根据这些P-code指令生成伪C代码。
基础概念
定义P-code操作
在开始之前,我们需要定义一些基本的P-code操作:
define pcodeop bss_;
define pcodeop stack_;
define pcodeop syscall;
这些操作符将用于处理不同的内存区域和系统调用。
上下文寄存器
我们需要定义一个上下文寄存器来跟踪不同的模式:
define context contextreg addrmode = (0,2) switchmode = (3,6);
指令定义
1. MOV指令
MOV指令有多种形式,我们需要为每种形式定义P-code语义。
基本形式
:mov dl rn, rm is op=1; dl & inst_switch=0 ; rn ; rm {
rn = rm;
}
访问BSS段
:mov dl rn, "bss"[imm64] is op=1; dl & inst_switch=1 ; rn; imm64 {
local bss_addr:8 = bss_(*[const]:8 imm64:8);
rn = *bss_addr;
}
:mov dl "bss"[imm64], rn is op=1; dl & inst_switch=2 ; imm64; rn {
local bss_addr:8 = bss_(*[const]:8 imm64:8);
*bss_addr = rn;
}
访问栈
:mov dl rn, "stack"[imm64] is op=1; dl & inst_switch=3 ; rn; imm64 {
local stack_addr:8 = stack_(*[const]:8 imm64:8);
rn = *stack_addr;
}
:mov dl "stack"[imm64], rn is op=1; dl & inst_switch=4 ; imm64; rn {
local stack_addr:8 = stack_(*[const]:8 imm64:8);
*stack_addr = rn;
}
立即数
:mov dl rn, imm is op=1; dl & inst_switch=5 ; rn; imm {
rn = imm;
}
寄存器间接寻址
:mov dl "bss"[rn], rm is op=1; dl & inst_switch=0xb ; rn; rm {
local bss_addr:8 = bss_(rn);
*bss_addr = rm;
}
:mov dl rn, "bss"[rm] is op=1; dl & inst_switch=0xc ; rn; rm {
local bss_addr:8 = bss_(rm);
rn = *bss_addr;
}
:mov dl "stack"[rn], rm is op=1; dl & inst_switch=0xd ; rn; rm {
local stack_addr:8 = stack_(rn);
*stack_addr = rm;
}
:mov dl rn, "stack"[rm] is op=1; dl & inst_switch=0xe ; rn; rm {
local stack_addr:8 = stack_(rm);
rn = *stack_addr;
}
2. 算术指令
我们需要重构算术指令的定义,使其能够处理不同的操作数类型。
操作数定义
oprand: rm is rm & switchmode = 0 { export rm; }
oprand: imm is imm & switchmode = 5 { export imm; }
具体算术指令
:add dl rn, oprand is op=2; dl & inst_switch; rn ; oprand {
rn = rn + oprand;
}
:dec dl rn, oprand is op=3; dl & inst_switch; rn ; oprand {
rn = rn - oprand;
}
:mul dl rn, oprand is op=4; dl & inst_switch; rn ; oprand {
rn = rn * oprand;
}
:div dl rn, oprand is op=5; dl & inst_switch; rn ; oprand {
rn = rn / oprand;
}
:mod dl rn, oprand is op=6; dl & inst_switch; rn ; oprand {
rn = rn % oprand;
}
:xor dl rn, oprand is op=7; dl & inst_switch; rn ; oprand {
rn = rn ^ oprand;
}
:or dl rn, oprand is op=8; dl & inst_switch; rn ; oprand {
rn = rn | oprand;
}
:and dl rn, oprand is op=9; dl & inst_switch; rn ; oprand {
rn = rn & oprand;
}
:shl dl rn, oprand is op=0xa; dl & inst_switch; rn ; oprand {
rn = rn << oprand;
}
:shr dl rn, oprand is op=0xb; dl & inst_switch; rn ; oprand {
rn = rn >> oprand;
}
:not dl rn is op=0xc; dl & inst_switch=6; rn {
rn = ~rn;
}
3. 栈操作指令
:pop dl rn is op=0xd; dl & inst_switch=6; rn {
local stack_addr:8 = stack_(sp);
rn = *stack_addr;
sp = sp + 8;
}
:push dl rn is op=0xe; dl & inst_switch=6; rn {
sp = sp - 8;
local stack_addr:8 = stack_(sp);
*stack_addr = rn;
}
4. 分支指令
CALL指令
:call rn is op=0x10; inst_switch=6; rn {
sp = sp - 8;
local stack_addr:8 = stack_(sp);
*:8 stack_addr = inst_next;
call [rn];
}
:call rel is op=0x10; inst_switch=7; rel {
sp = sp - 8;
local stack_addr:8 = stack_(sp);
*:8 stack_addr = inst_next;
call [rel];
}
RET指令
:ret is op=0x11; inst_switch {
local stack_addr:8 = stack_(sp);
pc = *stack_addr;
sp = sp + 8;
return [pc];
}
CMP指令
define register offset=0x300 size=1 [ZF];
:cmp dl rn, rm is op=0x12; dl & inst_switch=0; rn; rm {
ZF = rn == rm;
}
:cmp dl rn, imm is op=0x12; dl & inst_switch=5; rn ; imm {
ZF = rn == imm;
}
跳转指令
:jmp addr is op=0x13; addr {
goto addr;
}
:je addr is op=0x14; addr {
if(ZF==1) goto addr;
}
:jne addr is op=0x15; addr {
if(ZF==0) goto addr;
}
5. 系统调用指令
:syscall is op=0x20; inst_switch {
r0 = syscall(r0, r1, r2, r3);
}
6. HALT指令
:halt is op=0; inst_switch & data_length {
goto inst_start;
}
完整定义
以下是完整的处理器定义:
define token opbyte(8) op = (0, 5) rn = (0, 3) rm = (0, 3);
define token oplength(8) inst_switch = (0, 3) data_length = (4, 6);
define token data8(8) imm8 = (0, 7) simm8 = (0, 7) signed;
define token data16(16) imm16 = (0, 15);
define token data32(32) imm32 = (0, 31);
define token data64(64) imm64_8 = (0, 7) imm64_16 = (0, 15) imm64_32 = (0, 31) imm64 = (0, 63);
define context contextreg addrmode = (0,2) switchmode = (3,6);
define pcodeop bss_;
define pcodeop stack_;
define pcodeop syscall;
attach variables [rn rm] [r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14 r15];
dl: "" is data_length=0 {}
dl: "byte" is data_length=1 & inst_switch [addrmode =1; switchmode=inst_switch;]{}
dl: "word" is data_length=2 & inst_switch [addrmode =2; switchmode=inst_switch;]{}
dl: "dword" is data_length=3 & inst_switch [addrmode =3; switchmode=inst_switch;]{}
dl: "qword" is data_length>=4 & inst_switch [addrmode =4; switchmode=inst_switch;]{}
imm: imm8 is addrmode=1 ; imm8 {export *[const]:8 imm8;}
imm: imm16 is addrmode=2 ; imm16 {export *[const]:8 imm16;}
imm: imm32 is addrmode=3 ; imm32 {export *[const]:8 imm32;}
imm: imm64 is addrmode=4 ; imm64 {export *[const]:8 imm64;}
rel: reloc is simm8 & addrmode=1 [reloc = inst_next + simm8;] {export *[ram]:8 reloc;}
rel: reloc is imm16 & addrmode=2 [reloc = inst_next + imm16;] {export *[ram]:8 reloc;}
rel: reloc is imm32 & addrmode=3 [reloc = inst_next + imm32;] {export *[ram]:8 reloc;}
rel: reloc is imm64 & addrmode=4 [reloc = inst_next + imm64;] {export *[ram]:8 reloc;}
addr: rn is inst_switch=6; rn {export rn;}
addr: rel is dl&inst_switch=7; rel {export rel;}
addr: "bss"[imm64] is inst_switch=8; imm64 { local bss_addr:8 = bss_(*[const]:8 imm64:8); export *bss_addr;}
oprand: rm is rm & switchmode = 0 { export rm; }
oprand: imm is imm & switchmode = 5 { export imm; }
:halt is op=0; inst_switch & data_length {goto inst_start;}
:mov dl rn, rm is op=1; dl & inst_switch=0 ; rn ; rm { rn = rm;}
:mov dl rn, "bss"[imm64] is op=1; dl & inst_switch=1 ; rn; imm64 { local bss_addr:8 = bss_(*[const]:8 imm64:8); rn = *bss_addr; }
:mov dl "bss"[imm64], rn is op=1; dl & inst_switch=2 ; imm64; rn {local bss_addr:8 = bss_(*[const]:8 imm64:8); *bss_addr = rn;}
:mov dl rn, "stack"[imm64] is op=1; dl & inst_switch=3 ; rn; imm64 { local stack_addr:8 = stack_(*[const]:8 imm64:8); rn = *stack_addr;}
:mov dl "stack"[imm64], rn is op=1; dl & inst_switch=4 ; imm64; rn {local stack_addr:8 = stack_(*[const]:8 imm64:8); *stack_addr = rn;}
:mov dl rn, imm is op=1; dl & inst_switch=5 ; rn; imm { rn = imm;}
:mov dl "bss"[rn], rm is op=1; dl & inst_switch=0xb ; rn; rm {local bss_addr:8 = bss_(rn); *bss_addr = rm;}
:mov dl rn, "bss"[rm] is op=1; dl & inst_switch=0xc ; rn; rm {local bss_addr:8 = bss_(rm); rn = *bss_addr;}
:mov dl "stack"[rn], rm is op=1; dl & inst_switch=0xd ; rn; rm {local stack_addr:8 = stack_(rn); *stack_addr = rm;}
:mov dl rn, "stack"[rm] is op=1; dl & inst_switch=0xe ; rn; rm {local stack_addr:8 = stack_(rm); rn = *stack_addr;}
:add dl rn, oprand is op=2; dl & inst_switch; rn ; oprand { rn = rn + oprand;}
:dec dl rn, oprand is op=3; dl & inst_switch; rn ; oprand { rn = rn - oprand;}
:mul dl rn, oprand is op=4; dl & inst_switch; rn ; oprand { rn = rn * oprand;}
:div dl rn, oprand is op=5; dl & inst_switch; rn ; oprand { rn = rn / oprand;}
:mod dl rn, oprand is op=6; dl & inst_switch; rn ; oprand { rn = rn % oprand;}
:xor dl rn, oprand is op=7; dl & inst_switch; rn ; oprand { rn = rn ^ oprand;}
:or dl rn, oprand is op=8; dl & inst_switch; rn ; oprand { rn = rn | oprand;}
:and dl rn, oprand is op=9; dl & inst_switch; rn ; oprand { rn = rn & oprand;}
:shl dl rn, oprand is op=0xa; dl & inst_switch; rn ; oprand { rn = rn << oprand;}
:shr dl rn, oprand is op=0xb; dl & inst_switch; rn ; oprand { rn = rn >> oprand;}
:not dl rn is op=0xc; dl & inst_switch=6; rn { rn = ~rn; }
:pop dl rn is op=0xd; dl & inst_switch=6; rn { local stack_addr:8 = stack_(sp); rn = *stack_addr; sp = sp + 8;}
:push dl rn is op=0xe; dl & inst_switch=6; rn { sp = sp - 8; local stack_addr:8 = stack_(sp); *stack_addr = rn;}
:call rn is op=0x10; inst_switch=6; rn { sp = sp - 8; local stack_addr:8 = stack_(sp); *:8 stack_addr = inst_next; call [rn]; }
:call rel is op=0x10; inst_switch=7; rel { sp = sp - 8; local stack_addr:8 = stack_(sp); *:8 stack_addr = inst_next; call [rel]; }
:ret is op=0x11; inst_switch {local stack_addr:8 = stack_(sp); pc = *stack_addr; sp = sp + 8 ;return [pc];}
:cmp dl rn, rm is op=0x12; dl & inst_switch=0; rn; rm {ZF = rn == rm;}
:cmp dl rn, imm is op=0x12; dl & inst_switch=5; rn ; imm {ZF = rn == imm;}
:jmp addr is op=0x13; addr {goto addr;}
:je addr is op=0x14; addr {if(ZF==1) goto addr;}
:jne addr is op=0x15; addr {if(ZF==0) goto addr;}
:jle addr is op=0x16; addr {}
:jg addr is op=0x17; addr {}
:jl addr is op=0x18; addr {}
:jge addr is op=0x19; addr {}
:jbe addr is op=0x1a; addr {}
:ja addr is op=0x1b; addr {}
:jnb addr is op=0x1c; addr {}
:jb addr is op=0x1d; addr {}
:syscall is op=0x20; inst_switch { r0 = syscall(r0, r1, r2, r3); }
总结
通过本教程,我们学习了如何为Ghidra Processor定义P-code语义,将汇编指令转换为伪代码。关键点包括:
- 定义P-code操作符(如bss_, stack_, syscall)
- 处理不同寻址模式的MOV指令
- 实现算术运算指令
- 处理栈操作(PUSH/POP)
- 实现分支指令(CALL/RET/JMP等)
- 定义系统调用和HALT指令
完整的处理器定义包含了所有这些元素的实现,能够将二进制代码转换为可读性更高的伪代码,大大提高了逆向工程的效率。