Angr 符号执行 模拟执行

简介

angr 是一个多架构的二进制分析工具包,能够对二进制文件执行动态符号执行(如 Mayhem、KLEE 等)和各种静态分析;angr 的核心特性包括控制流图(CFG)恢复、污点分析、符号执行、路径探索以及自动漏洞挖掘等,使用 Python 编写,接口灵活,适合快速搭建复杂的二进制分析工具

符号执行

输入: 符号变量X → 程序执行 → 收集所有可能的路径和约束条件

1
2
3
4
5
6
7
8
# 当程序读取输入时,angr 创建符号变量
input_symbol = angr.BVS('stdin_0', 64) # 64位符号变量

# 程序执行过程中收集约束条件
if input == "HelloAngr":
goto good_address # 这个路径到达我们的目标
else:
goto bad_address # 这个路径不是我们要的

安装

1
2
3
python3 -m venv angr_env
source angr_env/bin/activate
pip install angr

本文章的 angr 版本是 9.2.177

基本使用

参考 jakespringer/angr_ctf

00_angr_find

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
int __cdecl main(int argc, const char **argv, const char **envp)
{
int i; // [esp+1Ch] [ebp-1Ch]
char s1[9]; // [esp+23h] [ebp-15h] BYREF
unsigned int v6; // [esp+2Ch] [ebp-Ch]

v6 = __readgsdword(0x14u);
printf("Enter the password: ");
__isoc99_scanf("%8s", s1);
for ( i = 0; i <= 7; ++i )
s1[i] = complex_function(s1[i], i);
if ( !strcmp(s1, "JACEJGCS") )
puts("Good Job.");
else
puts("Try again.");
return 0;
}

int __cdecl complex_function(int a1, int a2)
{
if ( a1 <= 64 || a1 > 90 )
{
puts("Try again.");
exit(1);
}
return (3 * a2 + a1 - 65) % 26 + 65;
}

关键汇编

1
2
3
4
5
.text:08048675 loc_8048675:                            ; CODE XREF: main+9A↑j
.text:08048675 sub esp, 0Ch
.text:08048678 push offset aGoodJob ; "Good Job."
.text:0804867D call _puts
.text:08048682 add esp, 10h

Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import angr
import sys

def main(argv):
# 创建 Angr 项目
path_to_binary = "./00_angr_find" # 文件路径
project = angr.Project(path_to_binary)

# 设置初始状态
initial_state = project.factory.entry_state() # 从程序入口点开始执行

# 创建模拟管理器
simulation = project.factory.simgr(initial_state) # simulation_manager,管理程序执行不同的路径

# 符号执行搜索
print_good_address = 0x0804867D # 打印 "Good Job" 的地址
simulation.explore(find=print_good_address)

# 检查是否找到
if simulation.found: # 包含所有到达目标地址的状态列表
solution_state = simulation.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno())) # 提取该状态下输入到 stdin 的内容
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)

运行结果可以看到成功找到了解决方案并且得到正确输入

01_angr_avoid

用反编译器查看 main 函数时会反编译失败,提示 “The function is too large to analyze”

查看源码这道题应该是因为定义了一个巨大的宏字符串常量,宏展开导致 main 内嵌了超大字符串,编译后 main 变成超大函数

1
2
3
4
#define USERDEF "{{ userdef }}"
#define LEN_USERDEF {{ len_userdef }}

strncpy(password, USERDEF, LEN_USERDEF);

字符串搜索找到 “Good Job” 的地方

为了节省时间提高分析效率,在这里可以使用到 angr 中的 avoid 功能

Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import angr
import sys

def main(argv):
path_to_binary = "./01_angr_avoid"
project = angr.Project(path_to_binary)

# 从程序入口开始执行并初始化程序的内存布局,寄存器状态
initial_state = project.factory.entry_state(
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS
}
)

simulation = project.factory.simgr(initial_state)

print_good_address = 0x080485e5 # 打印 "Good Job" 的地址
will_not_succeed_address = 0x080485ef # should_succeed 检查失败跳转的地址,会从这里一直跳转到打印 "Try again"

simulation.explore(find=print_good_address, avoid=will_not_succeed_address)

if simulation.found:
solution_state = simulation.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()))
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)


02_angr_find_condition

反编译看一下

先用 00 的方法试一下,find 的地址直接取了 main 函数中输出 “Good Job” 的地址 0x08049012

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import angr
import sys

def main(argv):
path_to_binary = "./02_angr_find_condition"
project = angr.Project(path_to_binary)

initial_state = project.factory.entry_state(
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS
}
)

simulation = project.factory.simgr(initial_state)

print_good_address = 0x08049012

simulation.explore(find=print_good_address)

if simulation.found:
solution_state = simulation.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()))
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)

直接成功了

看看官方解答
在不知道具体的成功指令地址,只知道成功特征(比如输出 “Good Job”)的情况下,可以通过设置条件来找到满足特定条件的路径

Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import angr
import sys

def main(argv):
path_to_binary = "./02_angr_find_condition"
project = angr.Project(path_to_binary)

initial_state = project.factory.entry_state(
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS
}
)

simulation = project.factory.simgr(initial_state)

def is_successful(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b"Good Job." in stdout_output

def should_abort(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b"Try again." in stdout_output

simulation.explore(find=is_successful,avoid=should_abort)

if simulation.found:
solution_state = simulation.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()))
else:
raise Exception('Could not find the solution')



if __name__ == '__main__':
main(sys.argv)

依旧能得到正确结果

03_angr_symbolic_register

其中的三个 complex_function 就是很复杂的三个函数

直接找输出 “Good Job” 的地址 0x080489ee

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import angr
import sys

def main(argv):
path_to_binary = "./03_angr_symbolic_registers"
project = angr.Project(path_to_binary)

initial_state = project.factory.entry_state(
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS
}
)

simulation = project.factory.simgr(initial_state)

print_good_address = 0x080489ee

simulation.explore(find=print_good_address)

if simulation.found:
solution_state = simulation.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()))
else:
raise Exception('Could not find the solution')



if __name__ == '__main__':
main(sys.argv)

可以得到正确结果

这道题的背景应该是针对旧版本的 Angr,当 scanf 读取多个输入时,例如 scanf(“%u %u”),Angr 内置的 scanf 不能直接模拟,需要手动创建符号变量并将其存储在正确的内存位置或寄存器中

大概是如下的模式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import angr
import claripy # angr 的约束求解器,用于创建符号变量
import sys

def main(argv):
path_to_binary = "./03_angr_symbolic_registers"
project = angr.Project(path_to_binary)

start_address = 0x08048937 # scanf 调用之后第一次访问输入数据的地址
initial_state = project.factory.blank_state(addr=start_address)

# 创建符号位向量
password0_size_in_bits = 32 # 32位整数
password0 = claripy.BVS('password0', password0_size_in_bits)
password1_size_in_bits = 32
password1 = claripy.BVS('password1', password1_size_in_bits)
password2_size_in_bits = 32
password2 = claripy.BVS('password2', password2_size_in_bits)

# 将符号变量注入寄存器
initial_state.regs.eax = password0
initial_state.regs.ebx = password1
initial_state.regs.edx = password2

# 创建符号执行管理器
simulation = project.factory.simgr(initial_state)

def is_successful(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Good Job.' in stdout_output

def should_abort(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Try again.' in stdout_output

simulation.explore(find=is_successful, avoid=should_abort)

if simulation.found:
solution_state = simulation.found[0]
solution0 = solution_state.solver.eval(password0)
solution1 = solution_state.solver.eval(password1)
solution2 = solution_state.solver.eval(password2)

solution = "{} {} {}".format(solution0,solution1,solution2)
print(solution)

if __name__ == '__main__':
main(sys.argv)

04_angr_symbolic_stack

搜索字符串定位输出到 “Good Job” 的地址 080486e9

和之前的脚本一样,只需要改个 path 和 print_good_address

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import angr
import sys

def main(argv):
path_to_binary = "./04_angr_symbolic_stack"
project = angr.Project(path_to_binary)

initial_state = project.factory.entry_state(
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS
}
)

simulation = project.factory.simgr(initial_state)

print_good_address = 0x080486E9

simulation.explore(find=print_good_address)

if simulation.found:
solution_state = simulation.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()))
else:
raise Exception('Could not find the solution')



if __name__ == '__main__':
main(sys.argv)

得到正确输入

这道题是在学习符号化栈

Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import angr
import claripy
import sys

def main(argv):
path_to_binary = "./04_angr_symbolic_stack"
project = angr.Project(path_to_binary)

# 从scanf调用完成且栈清理后开始
start_address = 0x08048697 # mov eax, dword [ebp-0xc]
initial_state = project.factory.blank_state(addr=start_address)

# 设置栈帧
# 08048679 55 push ebp ; 保存旧的ebp
# 0804867a 89e5 mov ebp, esp ; 设置新的栈帧基址
# 0804867c 83ec18 sub esp, 0x18 ; 为局部变量分配栈空间
initial_state.regs.ebp = initial_state.regs.esp + 0x18

# 创建符号变量
password0 = claripy.BVS('password0', 32)
password1 = claripy.BVS('password1', 32)

# 从汇编可以看出:
# password0 位于 ebp - 0xc 到 ebp - 0x9 (4字节)
# password1 位于 ebp - 0x10 到 ebp - 0xd (4字节)
# 用 .memory.store() 主动把符号变量写入栈中对应的位置
initial_state.memory.store(initial_state.regs.ebp - 0xc, password0, endness='Iend_LE') # 指定字节序列为小端序
initial_state.memory.store(initial_state.regs.ebp - 0x10, password1, endness='Iend_LE')

simulation = project.factory.simgr(initial_state)

def is_successful(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Good Job.' in stdout_output

def should_abort(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Try again.' in stdout_output

simulation.explore(find=is_successful, avoid=should_abort)

if simulation.found:
solution_state = simulation.found[0]
solution0 = solution_state.solver.eval(password0)
solution1 = solution_state.solver.eval(password1)
print(f"{solution0} {solution1}")
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)

# 1704280884 2382341151

05_angr_symbolic_memory

直接用 print_good_address = 0x08048672 依旧可以得到正确输入

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import angr
import sys

def main(argv):
path_to_binary = "./05_angr_symbolic_memory"
project = angr.Project(path_to_binary)

initial_state = project.factory.entry_state(
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS
}
)

simulation = project.factory.simgr(initial_state)

print_good_address = 0x08048672

simulation.explore(find=print_good_address)

if simulation.found:
solution_state = simulation.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()))
else:
raise Exception('Could not find the solution')



if __name__ == '__main__':
main(sys.argv)

这道题是在学习符号化内存,本质上是在符号化 .bss 段(未初始化的全局变量)

Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import angr
import claripy
import sys

def main(argv):
path_to_binary = "./05_angr_symbolic_memory"
project = angr.Project(path_to_binary)

start_address = 0x08048601
initial_state = project.factory.blank_state(
addr=start_address,
add_options = { angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS}
)

password0 = claripy.BVS('password0', 64)
password1 = claripy.BVS('password1', 64)
password2 = claripy.BVS('password2', 64)
password3 = claripy.BVS('password3', 64)


# 080485e0 68d8a11b0a push 0xa1ba1d8 {var_30}
# 080485e5 68d0a11b0a push 0xa1ba1d0 {var_34}
# 080485ea 68c8a11b0a push 0xa1ba1c8 {var_38}
# 080485ef 68c0a11b0a push user_input {var_3c}

password0_address = 0xa1ba1c0 # user_input
initial_state.memory.store(password0_address, password0)

password1_address = 0xa1ba1c8
initial_state.memory.store(password1_address, password1)

password2_address = 0xa1ba1d0
initial_state.memory.store(password2_address, password2)

password3_address = 0xa1ba1d8
initial_state.memory.store(password3_address, password3)

simulation = project.factory.simgr(initial_state)

def is_successful(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Good Job.' in stdout_output

def should_abort(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Try again.' in stdout_output

simulation.explore(find=is_successful, avoid=should_abort)

if simulation.found:
solution_state = simulation.found[0]

solution0 = solution_state.solver.eval(password0, cast_to=bytes) # 指定输出类型为字节型
solution1 = solution_state.solver.eval(password1, cast_to=bytes)
solution2 = solution_state.solver.eval(password2, cast_to=bytes)
solution3 = solution_state.solver.eval(password3, cast_to=bytes)

# 去掉空字节并解码为字符串
solution = f"{solution0.rstrip(b'\\x00').decode()} {solution1.rstrip(b'\\x00').decode()} {solution2.rstrip(b'\\x00').decode()} {solution3.rstrip(b'\\x00').decode()}"
print(solution)
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)

# NAXTHGNR JVSFTPWE LMGAUHWC XMDCPAL

06_angr_symbolic_dynamic_memory

用 print_good_address = 0x0804875e 可以得到正确输入

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import angr
import sys

def main(argv):
path_to_binary = "./06_angr_symbolic_dynamic_memory"
project = angr.Project(path_to_binary)

initial_state = project.factory.entry_state(
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS
}
)

simulation = project.factory.simgr(initial_state)

print_good_address = 0x0804875e

simulation.explore(find=print_good_address)

if simulation.found:
solution_state = simulation.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()))
else:
raise Exception('Could not find the solution')



if __name__ == '__main__':
main(sys.argv)

从汇编中可以看出,这道题的输入不是直接存储在栈上,而是通过 malloc 动态分配内存,然后将输入存储在这些动态分配的内存中

这就需要符号化动态内存
伪造一个地址,指向任意未使用的内存区域,
Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import angr
import claripy
import sys

def main(argv):
path_to_binary = "./06_angr_symbolic_dynamic_memory"
project = angr.Project(path_to_binary)

start_address = 0x08048699 # mov dword [ebp-0xc {i}], 0x0
initial_state = project.factory.blank_state(
addr=start_address,
add_options = { angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS}
)

password0 = claripy.BVS('password0', 64)
password1 = claripy.BVS('password1', 64)

# 在 x86 汇编中:
# a3 a4c8bc0a = mov [0xabc8a4], eax
# a1 a4c8bc0a = mov eax, [0xabc8a4]

# 伪造堆地址 - 选择任意未使用的内存区域
fake_heap_address0 = 0x10000000
fake_heap_address1 = 0x10000010


# 08048623 e8e8fdffff call malloc ; 第一次malloc调用
# 08048628 83c410 add esp, 0x10
# 0804862b a3a4c8bc0a mov dword [buffer0], eax

# 08048635 e8d6fdffff call malloc ; 第二次malloc调用
# 0804863a 83c410 add esp, 0x10
# 0804863d a3acc8bc0a mov dword [buffer1], eax

pointer_to_malloc_memory_address0 = 0xabc8a4 # buffer0
pointer_to_malloc_memory_address1 = 0xabc8ac # buffer1

# 将全局指针指向伪造的堆地址
initial_state.memory.store(pointer_to_malloc_memory_address0, fake_heap_address0, endness=project.arch.memory_endness)
initial_state.memory.store(pointer_to_malloc_memory_address1, fake_heap_address1, endness=project.arch.memory_endness)

# 将符号值存储在伪造的堆地址处
# scanf会将输入存储在这些地址
initial_state.memory.store(fake_heap_address0, password0)
initial_state.memory.store(fake_heap_address1, password1)

simulation = project.factory.simgr(initial_state)

def is_successful(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Good Job.' in stdout_output

def should_abort(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Try again.' in stdout_output

simulation.explore(find=is_successful, avoid=should_abort)

if simulation.found:
solution_state = simulation.found[0]

solution0 = solution_state.solver.eval(password0, cast_to=bytes)
solution1 = solution_state.solver.eval(password1, cast_to=bytes)

solution = solution0.decode() + ' ' + solution1.decode()
print(solution)
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)

从上面的几个例子也能看到有的地方新版的 angr 已经可以自动符号化,不需要手动创建符号化

07_angr_symbolic_file

这个挑战和其他的不同,涉及到文件操作

其中 ignore_me 函数是将用户从控制台的输入写入一个文件(OJKSQYDP.txt),在 main 函数中通过 fread 从这个文件读取数据并进行验证

需要做的是:

  1. 确定程序中 fread 从哪个文件读取数据
  2. 使用 Angr 模拟文件系统,并用自定义的文件来替换真实文件
  3. 用符号变量初始化这个文件,使其被 fread 读取并传播到程序的执行状态中
  4. 使用 Angr 求解器来计算出正确的符号输入

Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import angr 
import claripy
import sys

def main(argv):
path_to_binary = "./07_angr_symbolic_file"
project = angr.Project(path_to_binary)

# 起始地址选在调用 ignore_me 函数之后
start_address = 0x080488d6
initial_state = project.factory.blank_state(
addr=start_address,
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS
}
)

filename = "OJKSQYDP.txt"
symbolic_file_size_bytes = 64

# 创建符号位向量作为密码
password = claripy.BVS('password', symbolic_file_size_bytes * 8)

# 创建一个模拟文件系统
password_file = angr.SimFile(filename, content=password)

# 将文件添加到文件系统
initial_state.fs.insert(filename, password_file)

simulation = project.factory.simgr(initial_state)

def is_successful(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Good Job.' in stdout_output

def should_abort(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Try again.' in stdout_output

simulation.explore(find=is_successful, avoid=should_abort)

if simulation.found:
solution_state = simulation.found[0]

solution_bytes = solution_state.solver.eval(password, cast_to=bytes)

printable_chars = ''.join(chr(b) if 32 <= b <= 126 else '.' for b in solution_bytes)
print("Solution (printable): {}".format(printable_chars))
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)

运行结果

08_angr_constraints

check 函数是这种模式

1
2
3
4
5
6
7
8
9
int check_equals_AABBCCDDEEFFGGHH(char* to_check, size_t length) {
uint32_t num_correct = 0;
for (int i = 0; i < length; ++i) {
if (to_check[i] == REFERENCE_PASSWORD[i]) { // 每个字符一一对比
num_correct += 1;
}
}
return num_correct == length; // 只有全部相等才返回真
}

这道题里面的比较函数是逐字符比较的,虽然和 strcmp(to_check, “AABBCCDDEEFFGGHH”) == 0 是同样的效果,但是用 angr 进行符号执行时,每一个要比较的字符 to_check[i] 都是一个未知的符号变量,所以这个 if 是一个分支点,每比较一种字符都有两种可能的路经(相等或不相等),这里长度是 16,总共有 2^16 = 65536 种可能的路径,这就产生了 angr 中的一个问题: 路径爆炸,路径爆炸会导致符号执行变得非常慢,甚至无法在合理时间内完成

要解决这个问题,可以使用 angr 的约束求解功能

在比较发生前停止,把被比较的数据直接约束为参考值,然后让 SMT 求解器反解出输入 password

Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import angr 
import claripy
import sys

def main(argv):
path_to_binary = "./08_angr_constraints"
project = angr.Project(path_to_binary)

start_address = 0x08048625 # scanf 调用之后
initial_state = project.factory.blank_state(
addr=start_address,
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS
}
)

password = claripy.BVS('password',16*8)
password_address = 0x804a050 # buffer 地址
initial_state.memory.store(password_address,password)

simulation = project.factory.simgr(initial_state)

address_to_check_constraint = 0x08048669 # 调用 check 函数前的地址
simulation.explore(find=address_to_check_constraint)

if simulation.found:
solution_state = simulation.found[0]
constrained_parameter_address = 0x804a050
constrained_parameter_size_bytes = 16 # 16 字节
constrained_parameter_bitvector = solution_state.memory.load(
constrained_parameter_address,
constrained_parameter_size_bytes
)

constrained_parameter_desired_value = b"AUPDNNPROEZRJWKB" # 目标字符串
solution_state.add_constraints(constrained_parameter_bitvector == constrained_parameter_desired_value)

solution = solution_state.solver.eval(password, cast_to=bytes)
# print(solution)
print(solution.decode('utf-8', errors='ignore'))
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)

09_angr_hooks

程序读取了两次输入,第一次输入经过 complex_function 之后与目标字符串 password 比较,第二次输入与变换后的 password 比较

如果用符号执行解决的话,需要关注的依旧是检查函数 check_equals_XYMKBKUHNIQYNQXE,除了上文条件约束的方法,还可以使用 Angr 的 hook 功能
@project.hook(addr, length=n)
在执行到 addr 时,调用 Python 函数替代原本的机器码
length=n 告诉 angr 这段原始机器码占 n 个字节,执行完 hook 后需要跳过这些字节,避免重复执行原指令

Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import angr
import claripy
import sys

def main(argv):
path_to_binary = "./09_angr_hooks"
project = angr.Project(path_to_binary)


initial_state = project.factory.entry_state(
add_options = { angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS}
)

check_equals_called_address = 0x080486b3

# angr.Hook 中的 length 参数指定了在 hook 执行完之后,执行引擎需要跳过的字节数
instruction_to_skip_length = 5
@project.hook(check_equals_called_address, length=instruction_to_skip_length)
def skip_check_equals_(state):
user_input_buffer_address = 0x804a054
user_input_buffer_length = 16

user_input_string = state.memory.load(
user_input_buffer_address,
user_input_buffer_length
)

check_against_string = b"XYMKBKUHNIQYNQXE"

# 在 gcc 中,如果返回值是整数类型,会用 eax 来存储返回值
# 我们需要在 check_against_string == user_input_string 时将 eax 设为 1,否则设为 0
# 使用 claripy 内置的处理条件表达式的函数
state.regs.eax = claripy.If(
user_input_string == check_against_string,
claripy.BVV(1, 32),
claripy.BVV(0, 32)
)

simulation = project.factory.simgr(initial_state)

def is_successful(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Good Job.' in stdout_output

def should_abort(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Try again.' in stdout_output

simulation.explore(find=is_successful, avoid=should_abort)

if simulation.found:
solution_state = simulation.found[0]

solution = solution_state.posix.dumps(sys.stdin.fileno()).decode()
print(solution)
else:
raise Exception("Could not find a solution")

if __name__ == "__main__":
main(sys.argv)

10_angr_simprocedures

这个挑战与之前的相似,也是要处理 check 函数,但是通过汇编可以看到,check_equals_ 被调用的次数太多了,如果在每个调用点单独进行 hook,那就不太实际了;而且这道题的输入也是存储在动态的栈地址,

可以使用 SimProcedure 来编写自己的 check_equals_ 实现,然后 hook check_equals_ 这个符号,从而一次性替换所有对 check_equals_ 的调用

定义一个继承自 angr.SimProcedure 的类,以便利用 Angr 的 SimProcedure 功能
例如

1
2
3
4
5
6
7
int add_if_positive(int a, int b) {
if (a > 0 && b > 0) {
return a + b;
} else {
return 0;
}
}

这个函数可以用下面的方式进行模拟:

1
2
3
4
5
6
7
class ReplacementAddIfPositive(angr.SimProcedure):
def run(self, a, b):
if a >= 0 and b >=0:
return a + b
else:
return 0

Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import angr
import claripy
import sys

def main(argv):
path_to_binary = "./10_angr_simprocedures"
project = angr.Project(path_to_binary)


initial_state = project.factory.entry_state(
add_options = { angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS}
)

class ReplacementCheckEquals(angr.SimProcedure):
def run(self, to_check, length):
# 这里不写具体的地址,而是通过参数获取
user_input_buffer_address = to_check
user_input_buffer_length = length

# 使用 self.state 来访问 SimProcedure 中程序当前的状态
user_input_string = self.state.memory.load(
user_input_buffer_address,
user_input_buffer_length
)

check_against_string = b"ORSDDWXHZURJRBDH"
return claripy.If(
user_input_string == check_against_string,
claripy.BVV(1, 32),
claripy.BVV(0, 32)
)


check_equals_symbol = "check_equals_ORSDDWXHZURJRBDH"
project.hook_symbol(check_equals_symbol, ReplacementCheckEquals())
simulation = project.factory.simgr(initial_state)

def is_successful(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Good Job.' in stdout_output

def should_abort(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Try again.' in stdout_output

simulation.explore(find=is_successful, avoid=should_abort)

if simulation.found:
solution_state = simulation.found[0]

solution = solution_state.posix.dumps(sys.stdin.fileno()).decode()
print(solution)
else:
raise Exception("Could not find a solution")

if __name__ == "__main__":
main(sys.argv)

11_angr_sim_scanf

这道题的背景是旧版的 Angr 不支持 scanf 读取多个输入,所以 hook 掉 scanf,替换成自己实现的函数
但是新版的 Angr 已经支持 scanf 读取多个输入了,用最开始的方法直接符号执行也能成功

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import angr
import sys

def main(argv):
path_to_binary = "./11_angr_sim_scanf"
project = angr.Project(path_to_binary)

initial_state = project.factory.entry_state(
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS
}
)

simulation = project.factory.simgr(initial_state)

def is_successful(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b"Good Job." in stdout_output

def should_abort(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b"Try again." in stdout_output

simulation.explore(find=is_successful,avoid=should_abort)

if simulation.found:
solution_state = simulation.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()))
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)

贴一下 hook scanf 的代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import angr
import claripy
import sys

def main(argv):
path_to_binary = "./11_angr_sim_scanf"
project = angr.Project(path_to_binary)

initial_state = project.factory.entry_state()

class ReplacementScanf(angr.SimProcedure):
def run(self, format_string, scanf0_address, scanf1_address):
scanf0 = claripy.BVS('scanf0', 32)
scanf1 = claripy.BVS('scanf1', 32)

# 存储到指定的内存地址
self.state.memory.store(scanf0_address, scanf0, endness=project.arch.memory_endness)
self.state.memory.store(scanf1_address, scanf1, endness=project.arch.memory_endness)

# 保存到 globals 以便后续提取
self.state.globals['solution0'] = scanf0
self.state.globals['solution1'] = scanf1

return 2 # 返回成功读取的参数数量

# Hook scanf符号
project.hook_symbol('__isoc99_scanf', ReplacementScanf())

simulation = project.factory.simgr(initial_state)

def is_successful(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Good Job.' in stdout_output

def should_abort(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b'Try again.' in stdout_output

simulation.explore(find=is_successful, avoid=should_abort)

if simulation.found:
solution_state = simulation.found[0]

# 从globals中提取解
solution0 = solution_state.solver.eval(solution_state.globals['solution0'])
solution1 = solution_state.solver.eval(solution_state.globals['solution1'])

print(f"{solution0} {solution1}")
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)

# 1448564819 1398294103

12_angr_veritesting

与之前的不同,这道题是对每个输入字符单独混淆并且每次字符立即比较,产生了大量的分支

使用 Angr 中的一个优化技术 Veritesting 减少符号执行的路径爆炸问题
simulation = project.factory.simgr(initial_state, veritesting=True)

Veritesting 结合了静态分析和动态分析,当遇到可能导致路径爆炸的代码块时,它暂停盲目的路径分叉,将接下来的一片代码区域当作一个整体进行静态分析和合并,生成统一的路径约束,用条件表达式(如 if-then-else)表示不同路径的结果,从而避免每遇到分支就分裂状态

Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import angr
import sys

def main(argv):
path_to_binary = "./12_angr_veritesting"
project = angr.Project(path_to_binary)

initial_state = project.factory.entry_state(
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS
}
)

# 使用 veritesting 来减少路径爆炸
simulation = project.factory.simgr(initial_state,veritesting=True)

def is_successful(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b"Good Job." in stdout_output

def should_abort(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b"Try again." in stdout_output

simulation.explore(find=is_successful,avoid=should_abort)

if simulation.found:
solution_state = simulation.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()))
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)

13_angr_static_binary

对于静态链接的二进制文件,需要手动 hook 这些库函数,把它们替换成 Angr 内置的 SimProcedure,这样就可以跳过库函数内部复杂的执行
下面是 angr 已有的一些 SimProcedures,它们实现了标准库函数
更多的可以参考 https://github.com/angr/angr/tree/master/angr/procedures/libc
angr.SIM_PROCEDURES[‘libc’][‘malloc’]

angr.SIM_PROCEDURES[‘libc’][‘fopen’]

angr.SIM_PROCEDURES[‘libc’][‘fclose’]

angr.SIM_PROCEDURES[‘libc’][‘fwrite’]

angr.SIM_PROCEDURES[‘libc’][‘getchar’]

angr.SIM_PROCEDURES[‘libc’][‘strncmp’]

angr.SIM_PROCEDURES[‘libc’][‘strcmp’]

angr.SIM_PROCEDURES[‘libc’][‘scanf’]

angr.SIM_PROCEDURES[‘libc’][‘printf’]

angr.SIM_PROCEDURES[‘libc’][‘puts’]

angr.SIM_PROCEDURES[‘libc’][‘exit’]

angr.SIM_PROCEDURES[‘glibc’][‘__libc_start_main’]

Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import angr
import sys

def main(argv):
path_to_binary = "./13_angr_static_binary"
project = angr.Project(path_to_binary)

initial_state = project.factory.entry_state(
add_options = { angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS}
)

# 里面填写的这些地址是静态链接二进制文件中库函数的入口地址
project.hook(0x0804ed40, angr.SIM_PROCEDURES['libc']['printf']())
project.hook(0x0804ed80, angr.SIM_PROCEDURES['libc']['scanf']())
project.hook(0x0804f350, angr.SIM_PROCEDURES['libc']['puts']())
project.hook(0x08048d10, angr.SIM_PROCEDURES['glibc']['__libc_start_main']())

simulation = project.factory.simgr(initial_state)

def is_successful(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b"Good Job." in stdout_output

def should_abort(state):
stdout_output = state.posix.dumps(sys.stdout.fileno())
return b"Try again." in stdout_output

simulation.explore(find=is_successful, avoid=should_abort)

if simulation.found:
solution_state = simulation.found[0]
print(solution_state.posix.dumps(sys.stdin.fileno()).decode())
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)

14_angr_shared_library

这道题给了一个共享链接库文件,程序中的 validate 函数就是从这个库文件中导入的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
000106d7    uint32_t validate(char* arg1, int32_t arg2)

000106d7 55 push ebp {__saved_ebp}
000106d8 89e5 mov ebp, esp {__saved_ebp}
000106da 56 push esi {__saved_esi}
000106db 53 push ebx {__saved_ebx} {_GLOBAL_OFFSET_TABLE_}
000106dc 83ec20 sub esp, 0x20
000106df e81cfeffff call __x86.get_pc_thunk.bx
000106e4 81c31c190000 add ebx, 0x191c {_GLOBAL_OFFSET_TABLE_}
000106ea 837d0c07 cmp dword [ebp+0xc {arg2}], 0x7
000106ee 7f0a jg 0x106fa

000106f0 b800000000 mov eax, 0x0
000106f5 e983000000 jmp 0x1077d

000106fa c745f400000000 mov dword [ebp-0xc {i}], 0x0
00010701 eb0f jmp 0x10712

00010703 8d55dc lea edx, [ebp-0x24 {var_28}]
00010706 8b45f4 mov eax, dword [ebp-0xc {i}]
00010709 01d0 add eax, edx {var_28}
0001070b c60000 mov byte [eax], 0x0
0001070e 8345f401 add dword [ebp-0xc {i}], 0x1

00010712 837df413 cmp dword [ebp-0xc {i}], 0x13
00010716 7eeb jle 0x10703

00010718 8d45dc lea eax, [ebp-0x24 {var_28}]
0001071b c7005056424c mov dword [eax {var_28}], 0x4c425650
00010721 c7400456544654 mov dword [eax+0x4 {var_24}], 0x54465456
00010728 c745f000000000 mov dword [ebp-0x10 {i_1}], 0x0
0001072f eb2c jmp 0x1075d

00010731 8b55f0 mov edx, dword [ebp-0x10 {i_1}]
00010734 8b4508 mov eax, dword [ebp+0x8 {arg1}]
00010737 8d3402 lea esi, [edx+eax]
0001073a 8b55f0 mov edx, dword [ebp-0x10 {i_1}]
0001073d 8b4508 mov eax, dword [ebp+0x8 {arg1}]
00010740 01d0 add eax, edx
00010742 0fb600 movzx eax, byte [eax]
00010745 0fbec0 movsx eax, al
00010748 83ec08 sub esp, 0x8
0001074b ff75f0 push dword [ebp-0x10 {i_1}] {var_38_1}
0001074e 50 push eax {var_3c_1}
0001074f e86cfdffff call complex_function
00010754 83c410 add esp, 0x10
00010757 8806 mov byte [esi], al
00010759 8345f001 add dword [ebp-0x10 {i_1}], 0x1

0001075d 837df007 cmp dword [ebp-0x10 {i_1}], 0x7
00010761 7ece jle 0x10731

00010763 83ec08 sub esp, 0x8
00010766 8d45dc lea eax, [ebp-0x24 {var_28}]
00010769 50 push eax {var_28} {var_38_2}
0001076a ff7508 push dword [ebp+0x8 {arg1}] {var_3c_2}
0001076d e82efdffff call strcmp
00010772 83c410 add esp, 0x10
00010775 85c0 test eax, eax
00010777 0f94c0 sete al
0001077a 0fb6c0 movzx eax, al

0001077d 8d65f8 lea esp, [ebp-0x8]
00010780 5b pop ebx {__saved_ebx}
00010781 5e pop esi {__saved_esi}
00010782 5d pop ebp {__saved_ebp}
00010783 c3 retn {__return_addr}

Angr 代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import angr
import claripy
import sys

def main(argv):
# 注意这里填的是共享库文件,而不是调用这个库的可执行文件
path_to_binary = "./lib14_angr_shared_library.so"
base = 0x10000
project = angr.Project(path_to_binary, load_options={
'main_opts' : {
'base_addr' : base
}
})
# 设置符号输入缓冲区地址,这个地址将作为 validate 函数的第一个参数传入,代表 char *buffer
buffer_pointer = claripy.BVV(0x3000000, 32) # 32 位架构指针占用 32 字节

# 构造初始符号执行状态(call_state)
validate_function_address = base + 0x6d7

initial_state = project.factory.call_state(
validate_function_address,
buffer_pointer,
claripy.BVV(8, 32),
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS
})

password = claripy.BVS('password', 8*8)
initial_state.memory.store(buffer_pointer, password)
simulation = project.factory.simgr(initial_state)

check_constraint_addres = base + 0x77d # 该地址处在函数返回之前,返回值(eax)已经确定
simulation.explore(find=check_constraint_addres)

if simulation.found:
solution_state = simulation.found[0]
solution_state.add_constraints(solution_state.regs.eax != 0)
solution = solution_state.solver.eval(password,cast_to=bytes).decode()
print(solution)
else:
raise Exception('Could not find the solution')

if __name__ == '__main__':
main(sys.argv)

参考文章:
https://github.com/jakespringer/angr_ctf/tree/master/solutions
https://3cly.github.io/2023/04/16/Angr%E7%AC%A6%E5%8F%B7%E6%89%A7%E8%A1%8C%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/
https://www.kn0sky.com/?p=0e2f9462-df20-4705-83c2-4ff36b5b0c40#angr-%E5%88%9D%E5%AD%A6%E7%AC%94%E8%AE%B0


Angr 符号执行 模拟执行
http://example.com/2025/10/02/angr/
作者
Eleven
发布于
2025年10月2日
许可协议