第7章:Native层分析技术
7.1 ELF文件格式分析
7.1.1 ELF文件结构概述
ELF(Executable and Linkable Format)是Linux系统中可执行文件、目标文件、共享库和核心转储的标准文件格式。Android的Native库(.so文件)也采用ELF格式。
7.1.2 ELF文件头分析
import struct
from enum import IntEnum
class ELFAnalyzer:
"""ELF文件分析器"""
class ELFClass(IntEnum):
ELFCLASSNONE = 0
ELFCLASS32 = 1
ELFCLASS64 = 2
class ELFData(IntEnum):
ELFDATANONE = 0
ELFDATA2LSB = 1 # Little endian
ELFDATA2MSB = 2 # Big endian
class ELFType(IntEnum):
ET_NONE = 0 # No file type
ET_REL = 1 # Relocatable file
ET_EXEC = 2 # Executable file
ET_DYN = 3 # Shared object file
ET_CORE = 4 # Core file
class ELFMachine(IntEnum):
EM_NONE = 0
EM_386 = 3 # Intel 80386
EM_ARM = 40 # ARM
EM_X86_64 = 62 # AMD x86-64
EM_AARCH64 = 183 # ARM 64-bit
def __init__(self, elf_data):
self.data = elf_data
self.header = None
self.is_64bit = False
self.is_little_endian = True
self.sections = []
self.symbols = []
self.strings = {}
def parse_header(self):
"""解析ELF文件头"""
if len(self.data) < 64: # ELF64头部最小长度
raise ValueError("File too small to be a valid ELF file")
# 检查ELF魔数
if self.data[:4] != b'\x7fELF':
raise ValueError("Invalid ELF magic number")
# 解析ELF标识
ei_class = self.data[4]
ei_data = self.data[5]
ei_version = self.data[6]
ei_osabi = self.data[7]
ei_abiversion = self.data[8]
self.is_64bit = (ei_class == self.ELFClass.ELFCLASS64)
self.is_little_endian = (ei_data == self.ELFData.ELFDATA2LSB)
# 设置字节序
endian = '<' if self.is_little_endian else '>'
if self.is_64bit:
# ELF64头部格式
header_format = f'{endian}16s H H I Q Q Q I H H H H H H'
header_size = 64
else:
# ELF32头部格式
header_format = f'{endian}16s H H I I I I I H H H H H H'
header_size = 52
header_data = struct.unpack(header_format, self.data[:header_size])
self.header = {
'e_ident': header_data[0],
'e_type': header_data[1],
'e_machine': header_data[2],
'e_version': header_data[3],
'e_entry': header_data[4],
'e_phoff': header_data[5],
'e_shoff': header_data[6],
'e_flags': header_data[7],
'e_ehsize': header_data[8],
'e_phentsize': header_data[9],
'e_phnum': header_data[10],
'e_shentsize': header_data[11],
'e_shnum': header_data[12],
'e_shstrndx': header_data[13]
}
# 添加解析后的信息
self.header['class'] = 'ELF64' if self.is_64bit else 'ELF32'
self.header['data'] = 'Little Endian' if self.is_little_endian else 'Big Endian'
self.header['type_name'] = self.get_type_name(self.header['e_type'])
self.header['machine_name'] = self.get_machine_name(self.header['e_machine'])
return self.header
def get_type_name(self, e_type):
"""获取ELF类型名称"""
type_names = {
self.ELFType.ET_NONE: "None",
self.ELFType.ET_REL: "Relocatable",
self.ELFType.ET_EXEC: "Executable",
self.ELFType.ET_DYN: "Shared Object",
self.ELFType.ET_CORE: "Core File"
}
return type_names.get(e_type, f"Unknown ({e_type})")
def get_machine_name(self, e_machine):
"""获取机器架构名称"""
machine_names = {
self.ELFMachine.EM_NONE: "None",
self.ELFMachine.EM_386: "Intel 80386",
self.ELFMachine.EM_ARM: "ARM",
self.ELFMachine.EM_X86_64: "AMD x86-64",
self.ELFMachine.EM_AARCH64: "ARM 64-bit"
}
return machine_names.get(e_machine, f"Unknown ({e_machine})")
def parse_section_headers(self):
"""解析节头表"""
if not self.header:
self.parse_header()
if self.header['e_shnum'] == 0:
return []
endian = '<' if self.is_little_endian else '>'
if self.is_64bit:
sh_format = f'{endian}I I Q Q Q Q I I Q Q'
sh_size = 64
else:
sh_format = f'{endian}I I I I I I I I I I'
sh_size = 40
# 解析所有节头
sh_offset = self.header['e_shoff']
for i in range(self.header['e_shnum']):
offset = sh_offset + i * sh_size
if offset + sh_size > len(self.data):
break
sh_data = struct.unpack(sh_format, self.data[offset:offset + sh_size])
section = {
'index': i,
'sh_name': sh_data[0],
'sh_type': sh_data[1],
'sh_flags': sh_data[2],
'sh_addr': sh_data[3],
'sh_offset': sh_data[4],
'sh_size': sh_data[5],
'sh_link': sh_data[6],
'sh_info': sh_data[7],
'sh_addralign': sh_data[8],
'sh_entsize': sh_data[9]
}
# 添加节类型名称
section['type_name'] = self.get_section_type_name(section['sh_type'])
self.sections.append(section)
# 解析节名称字符串表
if self.header['e_shstrndx'] < len(self.sections):
shstrtab = self.sections[self.header['e_shstrndx']]
self.parse_string_table(shstrtab, 'section_names')
# 为每个节添加名称
for section in self.sections:
section['name'] = self.get_string('section_names', section['sh_name'])
return self.sections
def get_section_type_name(self, sh_type):
"""获取节类型名称"""
type_names = {
0: "SHT_NULL",
1: "SHT_PROGBITS",
2: "SHT_SYMTAB",
3: "SHT_STRTAB",
4: "SHT_RELA",
5: "SHT_HASH",
6: "SHT_DYNAMIC",
7: "SHT_NOTE",
8: "SHT_NOBITS",
9: "SHT_REL",
10: "SHT_SHLIB",
11: "SHT_DYNSYM",
0x70000001: "SHT_ARM_EXIDX",
0x70000003: "SHT_ARM_ATTRIBUTES"
}
return type_names.get(sh_type, f"Unknown ({sh_type})")
def parse_string_table(self, section, table_name):
"""解析字符串表"""
if section['sh_size'] == 0:
return
offset = section['sh_offset']
size = section['sh_size']
if offset + size > len(self.data):
return
string_data = self.data[offset:offset + size]
# 解析字符串
strings = {}
current_offset = 0
while current_offset < len(string_data):
# 查找下一个null字节
null_pos = string_data.find(b'\x00', current_offset)
if null_pos == -1:
break
if null_pos > current_offset:
string_value = string_data[current_offset:null_pos].decode('utf-8', errors='replace')
strings[current_offset] = string_value
current_offset = null_pos + 1
self.strings[table_name] = strings
def get_string(self, table_name, offset):
"""从字符串表获取字符串"""
if table_name in self.strings and offset in self.strings[table_name]:
return self.strings[table_name][offset]
return f"<unknown_{offset}>"
def parse_symbol_table(self):
"""解析符号表"""
if not self.sections:
self.parse_section_headers()
# 查找符号表和动态符号表
symtab_sections = []
for section in self.sections:
if section['sh_type'] in [2, 11]: # SHT_SYMTAB or SHT_DYNSYM
symtab_sections.append(section)
for symtab in symtab_sections:
self.parse_single_symbol_table(symtab)
return self.symbols
def parse_single_symbol_table(self, symtab_section):
"""解析单个符号表"""
if symtab_section['sh_size'] == 0:
return
# 获取对应的字符串表
if symtab_section['sh_link'] >= len(self.sections):
return
strtab_section = self.sections[symtab_section['sh_link']]
strtab_name = f"symtab_strings_{symtab_section['index']}"
self.parse_string_table(strtab_section, strtab_name)
endian = '<' if self.is_little_endian else '>'
if self.is_64bit:
sym_format = f'{endian}I B B H Q Q'
sym_size = 24
else:
sym_format = f'{endian}I I I B B H'
sym_size = 16
offset = symtab_section['sh_offset']
num_symbols = symtab_section['sh_size'] // sym_size
for i in range(num_symbols):
sym_offset = offset + i * sym_size
if sym_offset + sym_size > len(self.data):
break
sym_data = struct.unpack(sym_format, self.data[sym_offset:sym_offset + sym_size])
if self.is_64bit:
symbol = {
'st_name': sym_data[0],
'st_info': sym_data[1],
'st_other': sym_data[2],
'st_shndx': sym_data[3],
'st_value': sym_data[4],
'st_size': sym_data[5]
}
else:
symbol = {
'st_name': sym_data[0],
'st_value': sym_data[1],
'st_size': sym_data[2],
'st_info': sym_data[3],
'st_other': sym_data[4],
'st_shndx': sym_data[5]
}
# 解析符号信息
symbol['bind'] = (symbol['st_info'] >> 4) & 0xf
symbol['type'] = symbol['st_info'] & 0xf
symbol['visibility'] = symbol['st_other'] & 0x3
# 获取符号名称
symbol['name'] = self.get_string(strtab_name, symbol['st_name'])
# 添加类型和绑定名称
symbol['bind_name'] = self.get_symbol_bind_name(symbol['bind'])
symbol['type_name'] = self.get_symbol_type_name(symbol['type'])
self.symbols.append(symbol)
def get_symbol_bind_name(self, bind):
"""获取符号绑定类型名称"""
bind_names = {
0: "STB_LOCAL",
1: "STB_GLOBAL",
2: "STB_WEAK"
}
return bind_names.get(bind, f"Unknown ({bind})")
def get_symbol_type_name(self, sym_type):
"""获取符号类型名称"""
type_names = {
0: "STT_NOTYPE",
1: "STT_OBJECT",
2: "STT_FUNC",
3: "STT_SECTION",
4: "STT_FILE",
5: "STT_COMMON",
6: "STT_TLS"
}
return type_names.get(sym_type, f"Unknown ({sym_type})")
def get_analysis_summary(self):
"""获取分析摘要"""
if not self.header:
self.parse_header()
if not self.sections:
self.parse_section_headers()
if not self.symbols:
self.parse_symbol_table()
# 统计信息
code_sections = [s for s in self.sections if s['name'] in ['.text', '.init', '.fini']]
data_sections = [s for s in self.sections if s['name'] in ['.data', '.rodata', '.bss']]
function_symbols = [s for s in self.symbols if s['type'] == 2] # STT_FUNC
return {
'file_info': {
'class': self.header['class'],
'data': self.header['data'],
'type': self.header['type_name'],
'machine': self.header['machine_name'],
'entry_point': f"0x{self.header['e_entry']:x}"
},
'sections': {
'total': len(self.sections),
'code_sections': len(code_sections),
'data_sections': len(data_sections)
},
'symbols': {
'total': len(self.symbols),
'functions': len(function_symbols)
},
'code_sections_info': [
{
'name': s['name'],
'address': f"0x{s['sh_addr']:x}",
'size': s['sh_size'],
'offset': s['sh_offset']
} for s in code_sections
],
'exported_functions': [
{
'name': s['name'],
'address': f"0x{s['st_value']:x}",
'size': s['st_size']
} for s in function_symbols
if s['bind'] == 1 and s['st_value'] != 0 # Global and not undefined
][:20] # 只显示前20个
}
# 使用示例
def analyze_so_file(so_path):
"""分析SO文件"""
with open(so_path, 'rb') as f:
elf_data = f.read()
analyzer = ELFAnalyzer(elf_data)
summary = analyzer.get_analysis_summary()
print("=== ELF Analysis Summary ===")
print(f"File: {so_path}")
print(f"Class: {summary['file_info']['class']}")
print(f"Data: {summary['file_info']['data']}")
print(f"Type: {summary['file_info']['type']}")
print(f"Machine: {summary['file_info']['machine']}")
print(f"Entry Point: {summary['file_info']['entry_point']}")
print(f"\nSections: {summary['sections']['total']}")
print(f"Code Sections: {summary['sections']['code_sections']}")
print(f"Data Sections: {summary['sections']['data_sections']}")
print(f"\nSymbols: {summary['symbols']['total']}")
print(f"Functions: {summary['symbols']['functions']}")
print(f"\nExported Functions:")
for func in summary['exported_functions']:
print(f" {func['name']} @ {func['address']} (size: {func['size']})")
return analyzer
7.1.3 动态链接分析
class DynamicLinkingAnalyzer:
"""动态链接分析器"""
def __init__(self, elf_analyzer):
self.elf = elf_analyzer
self.dynamic_section = None
self.dynamic_entries = []
self.needed_libraries = []
self.imported_symbols = []
self.exported_symbols = []
def find_dynamic_section(self):
"""查找动态链接段"""
for section in self.elf.sections:
if section['sh_type'] == 6: # SHT_DYNAMIC
self.dynamic_section = section
return section
return None
def parse_dynamic_entries(self):
"""解析动态链接条目"""
if not self.dynamic_section:
self.find_dynamic_section()
if not self.dynamic_section:
return []
endian = '<' if self.elf.is_little_endian else '>'
if self.elf.is_64bit:
entry_format = f'{endian}Q Q'
entry_size = 16
else:
entry_format = f'{endian}I I'
entry_size = 8
offset = self.dynamic_section['sh_offset']
size = self.dynamic_section['sh_size']
num_entries = size // entry_size
for i in range(num_entries):
entry_offset = offset + i * entry_size
if entry_offset + entry_size > len(self.elf.data):
break
entry_data = struct.unpack(entry_format,
self.elf.data[entry_offset:entry_offset + entry_size])
entry = {
'd_tag': entry_data[0],
'd_val': entry_data[1],
'tag_name': self.get_dynamic_tag_name(entry_data[0])
}
self.dynamic_entries.append(entry)
# 如果是DT_NULL,表示结束
if entry_data[0] == 0:
break
return self.dynamic_entries
def get_dynamic_tag_name(self, tag):
"""获取动态标签名称"""
tag_names = {
0: "DT_NULL",
1: "DT_NEEDED",
2: "DT_PLTRELSZ",
3: "DT_PLTGOT",
4: "DT_HASH",
5: "DT_STRTAB",
6: "DT_SYMTAB",
7: "DT_RELA",
8: "DT_RELASZ",
9: "DT_RELAENT",
10: "DT_STRSZ",
11: "DT_SYMENT",
12: "DT_INIT",
13: "DT_FINI",
14: "DT_SONAME",
15: "DT_RPATH",
16: "DT_SYMBOLIC",
17: "DT_REL",
18: "DT_RELSZ",
19: "DT_RELENT",
20: "DT_PLTREL",
21: "DT_DEBUG",
22: "DT_TEXTREL",
23: "DT_JMPREL",
24: "DT_BIND_NOW",
25: "DT_INIT_ARRAY",
26: "DT_FINI_ARRAY",
27: "DT_INIT_ARRAYSZ",
28: "DT_FINI_ARRAYSZ"
}
return tag_names.get(tag, f"Unknown ({tag})")
def extract_needed_libraries(self):
"""提取依赖的库"""
if not self.dynamic_entries:
self.parse_dynamic_entries()
# 查找字符串表
strtab_addr = None
for entry in self.dynamic_entries:
if entry['d_tag'] == 5: # DT_STRTAB
strtab_addr = entry['d_val']
break
if not strtab_addr:
return []
# 查找字符串表对应的节
strtab_section = None
for section in self.elf.sections:
if section['sh_addr'] == strtab_addr:
strtab_section = section
break
if not strtab_section:
return []
# 解析字符串表
self.elf.parse_string_table(strtab_section, 'dynamic_strings')
# 提取DT_NEEDED条目
for entry in self.dynamic_entries:
if entry['d_tag'] == 1: # DT_NEEDED
lib_name = self.elf.get_string('dynamic_strings', entry['d_val'])
self.needed_libraries.append(lib_name)
return self.needed_libraries
def analyze_relocations(self):
"""分析重定位信息"""
relocations = []
# 查找重定位段
for section in self.elf.sections:
if section['sh_type'] in [4, 9]: # SHT_RELA or SHT_REL
relocs = self.parse_relocation_section(section)
relocations.extend(relocs)
return relocations
def parse_relocation_section(self, section):
"""解析重定位段"""
relocations = []
is_rela = (section['sh_type'] == 4) # SHT_RELA
endian = '<' if self.elf.is_little_endian else '>'
if self.elf.is_64bit:
if is_rela:
rel_format = f'{endian}Q Q q'
rel_size = 24
else:
rel_format = f'{endian}Q Q'
rel_size = 16
else:
if is_rela:
rel_format = f'{endian}I I i'
rel_size = 12
else:
rel_format = f'{endian}I I'
rel_size = 8
offset = section['sh_offset']
size = section['sh_size']
num_relocs = size // rel_size
for i in range(num_relocs):
rel_offset = offset + i * rel_size
if rel_offset + rel_size > len(self.elf.data):
break
rel_data = struct.unpack(rel_format,
self.elf.data[rel_offset:rel_offset + rel_size])
relocation = {
'r_offset': rel_data[0],
'r_info': rel_data[1],
'r_addend': rel_data[2] if is_rela else 0,
'type': rel_data[1] & 0xff,
'symbol': rel_data[1] >> 8 if self.elf.is_64bit else rel_data[1] >> 8
}
relocations.append(relocation)
return relocations
def generate_dynamic_analysis_report(self):
"""生成动态链接分析报告"""
self.parse_dynamic_entries()
self.extract_needed_libraries()
relocations = self.analyze_relocations()
report = "# Dynamic Linking Analysis Report\n\n"
# 依赖库
report += "## Needed Libraries\n\n"
if self.needed_libraries:
for lib in self.needed_libraries:
report += f"- {lib}\n"
else:
report += "No library dependencies found.\n"
report += "\n"
# 动态条目
report += "## Dynamic Entries\n\n"
for entry in self.dynamic_entries[:20]: # 只显示前20个
report += f"- **{entry['tag_name']}:** 0x{entry['d_val']:x}\n"
if len(self.dynamic_entries) > 20:
report += f"... and {len(self.dynamic_entries) - 20} more entries\n"
report += "\n"
# 重定位信息
report += "## Relocations\n\n"
report += f"Total relocations: {len(relocations)}\n"
if relocations:
report += "\nFirst 10 relocations:\n"
for i, rel in enumerate(relocations[:10]):
report += f"{i+1}. Offset: 0x{rel['r_offset']:x}, Type: {rel['type']}, Symbol: {rel['symbol']}\n"
return report
# 使用示例
def analyze_dynamic_linking(so_path):
"""分析动态链接"""
with open(so_path, 'rb') as f:
elf_data = f.read()
elf_analyzer = ELFAnalyzer(elf_data)
elf_analyzer.parse_header()
elf_analyzer.parse_section_headers()
dynamic_analyzer = DynamicLinkingAnalyzer(elf_analyzer)
report = dynamic_analyzer.generate_dynamic_analysis_report()
print(report)
return dynamic_analyzer
7.2 ARM汇编代码分析
7.2.1 ARM指令集深入分析
import struct
import capstone
class ARMDisassembler:
"""ARM反汇编器"""
def __init__(self, architecture='arm', mode='arm'):
"""
初始化反汇编器
architecture: 'arm' 或 'arm64'
mode: 'arm', 'thumb', 'arm64'
"""
self.arch = architecture
self.mode = mode
# 初始化Capstone反汇编引擎
if architecture == 'arm64':
self.cs = capstone.Cs(capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM)
else:
if mode == 'thumb':
self.cs = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB)
else:
self.cs = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM)
# 启用详细信息
self.cs.detail = True
self.instructions = []
self.functions = []
self.basic_blocks = []
def disassemble(self, code_bytes, base_address=0x1000):
"""反汇编代码"""
self.instructions = []
for instruction in self.cs.disasm(code_bytes, base_address):
inst_info = {
'address': instruction.address,
'mnemonic': instruction.mnemonic,
'op_str': instruction.op_str,
'bytes': instruction.bytes,
'size': instruction.size,
'groups': instruction.groups,
'operands': []
}
# 分析操作数
if instruction.operands:
for op in instruction.operands:
operand = self.analyze_operand(op)
inst_info['operands'].append(operand)
# 分析指令类型
inst_info['type'] = self.classify_instruction(instruction)
self.instructions.append(inst_info)
return self.instructions
def analyze_operand(self, operand):
"""分析操作数"""
op_info = {
'type': operand.type,
'value': None,
'register': None,
'immediate': None,
'memory': None
}
if operand.type == capstone.CS_OP_REG:
op_info['register'] = operand.reg
op_info['value'] = self.cs.reg_name(operand.reg)
elif operand.type == capstone.CS_OP_IMM:
op_info['immediate'] = operand.imm
op_info['value'] = f"#0x{operand.imm:x}"
elif operand.type == capstone.CS_OP_MEM:
op_info['memory'] = {
'base': operand.mem.base,
'index': operand.mem.index,
'disp': operand.mem.disp
}
# 构建内存操作数字符串
mem_str = "["
if operand.mem.base != 0:
mem_str += self.cs.reg_name(operand.mem.base)
if operand.mem.index != 0:
mem_str += f", {self.cs.reg_name(operand.mem.index)}"
if operand.mem.disp != 0:
mem_str += f", #0x{operand.mem.disp:x}"
mem_str += "]"
op_info['value'] = mem_str
return op_info
def classify_instruction(self, instruction):
"""分类指令类型"""
groups = instruction.groups
if capstone.CS_GRP_JUMP in groups:
return 'jump'
elif capstone.CS_GRP_CALL in groups:
return 'call'
elif capstone.CS_GRP_RET in groups:
return 'return'
elif capstone.CS_GRP_BRANCH_RELATIVE in groups:
return 'branch'
elif instruction.mnemonic.startswith('ld') or instruction.mnemonic.startswith('st'):
return 'memory'
elif instruction.mnemonic in ['add', 'sub', 'mul', 'div', 'and', 'orr', 'eor']:
return 'arithmetic'
elif instruction.mnemonic in ['mov', 'mvn']:
return 'data_transfer'
elif instruction.mnemonic.startswith('cmp') or instruction.mnemonic.startswith('tst'):
return 'comparison'
else:
return 'other'
def identify_functions(self):
"""识别函数"""
functions = []
current_function = None
for i, inst in enumerate(self.instructions):
# 函数开始的标志
if (inst['type'] == 'call' and i > 0 and
self.instructions[i-1]['type'] in ['jump', 'return']):
# 新函数开始
if current_function:
current_function['end_address'] = self.instructions[i-1]['address']
functions.append(current_function)
current_function = {
'start_address': inst['address'],
'instructions': [],
'calls': [],
'returns': []
}
if current_function:
current_function['instructions'].append(inst)
if inst['type'] == 'call':
current_function['calls'].append(inst)
elif inst['type'] == 'return':
current_function['returns'].append(inst)
# 处理最后一个函数
if current_function:
current_function['end_address'] = self.instructions[-1]['address']
functions.append(current_function)
self.functions = functions
return functions
def analyze_control_flow(self):
"""分析控制流"""
basic_blocks = []
leaders = set([0]) # 第一条指令总是leader
# 找到所有的leader
for i, inst in enumerate(self.instructions):
if inst['type'] in ['jump', 'branch', 'call']:
# 分支指令的下一条指令是leader
if i + 1 < len(self.instructions):
leaders.add(i + 1)
# 分支目标也是leader(如果能确定的话)
if inst['type'] in ['jump', 'branch'] and inst['operands']:
for op in inst['operands']:
if op['type'] == capstone.CS_OP_IMM:
target_addr = op['immediate']
# 找到对应的指令索引
for j, target_inst in enumerate(self.instructions):
if target_inst['address'] == target_addr:
leaders.add(j)
break
# 创建基本块
sorted_leaders = sorted(leaders)
for i in range(len(sorted_leaders)):
start_idx = sorted_leaders[i]
end_idx = sorted_leaders[i + 1] - 1 if i + 1 < len(sorted_leaders) else len(self.instructions) - 1
if start_idx < len(self.instructions):
basic_block = {
'id': len(basic_blocks),
'start_address': self.instructions[start_idx]['address'],
'end_address': self.instructions[end_idx]['address'],
'instructions': self.instructions[start_idx:end_idx + 1],
'successors': [],
'predecessors': []
}
basic_blocks.append(basic_block)
self.basic_blocks = basic_blocks
return basic_blocks
def generate_disassembly_report(self):
"""生成反汇编报告"""
self.identify_functions()
self.analyze_control_flow()
report = f"# ARM Disassembly Report\n\n"
report += f"**Architecture:** {self.arch}\n"
report += f"**Mode:** {self.mode}\n"
report += f"**Instructions:** {len(self.instructions)}\n"
report += f"**Functions:** {len(self.functions)}\n"
report += f"**Basic Blocks:** {len(self.basic_blocks)}\n\n"
# 指令统计
inst_types = {}
for inst in self.instructions:
inst_type = inst['type']
inst_types[inst_type] = inst_types.get(inst_type, 0) + 1
report += "## Instruction Statistics\n\n"
for inst_type, count in sorted(inst_types.items(), key=lambda x: x[1], reverse=True):
report += f"- **{inst_type}:** {count}\n"
report += "\n"
# 函数信息
if self.functions:
report += "## Functions\n\n"
for i, func in enumerate(self.functions[:10]): # 只显示前10个函数
report += f"### Function {i + 1}\n"
report += f"- **Start:** 0x{func['start_address']:x}\n"
report += f"- **End:** 0x{func['end_address']:x}\n"
report += f"- **Instructions:** {len(func['instructions'])}\n"
report += f"- **Calls:** {len(func['calls'])}\n"
report += f"- **Returns:** {len(func['returns'])}\n\n"
# 反汇编代码(前50条指令)
report += "## Disassembly (First 50 Instructions)\n\n"
report += "```assembly\n"
for inst in self.instructions[:50]:
report += f"0x{inst['address']:08x}: {inst['mnemonic']} {inst['op_str']}\n"
if len(self.instructions) > 50:
report += f"... and {len(self.instructions) - 50} more instructions\n"
report += "```\n"
return report
# 使用示例
def disassemble_arm_code(code_bytes, arch='arm', mode='arm'):
"""反汇编ARM代码"""
disasm = ARMDisassembler(arch, mode)
instructions = disasm.disassemble(code_bytes)
report = disasm.generate_disassembly_report()
print(report)
return disasm
# 测试ARM代码
arm_code = bytes([
0x04, 0x00, 0x2d, 0xe9, # push {r2}
0x00, 0x00, 0x90, 0xe5, # ldr r0, [r0]
0x01, 0x10, 0x81, 0xe2, # add r1, r1, #1
0x00, 0x00, 0x51, 0xe3, # cmp r1, #0
0x02, 0x00, 0x00, 0x1a, # bne 0x1014
0x04, 0x00, 0xbd, 0xe8, # pop {r2}
0x1e, 0xff, 0x2f, 0xe1 # bx lr
])
disasm = disassemble_arm_code(arm_code)
7.2.2 函数调用约定分析
class ARMCallingConventionAnalyzer:
"""ARM调用约定分析器"""
def __init__(self, disassembler):
self.disasm = disassembler
self.calling_conventions = {
'AAPCS': {
'param_registers': ['r0', 'r1', 'r2', 'r3'],
'return_register': 'r0',
'callee_saved': ['r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10', 'r11'],
'caller_saved': ['r0', 'r1', 'r2', 'r3', 'r12'],
'stack_pointer': 'sp',
'frame_pointer': 'r11',
'link_register': 'lr'
},
'AAPCS64': {
'param_registers': ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'],
'return_register': 'x0',
'callee_saved': ['x19', 'x20', 'x21', 'x22', 'x23', 'x24', 'x25', 'x26', 'x27', 'x28'],
'caller_saved': ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10', 'x11', 'x12', 'x13', 'x14', 'x15', 'x16', 'x17', 'x18'],
'stack_pointer': 'sp',
'frame_pointer': 'x29',
'link_register': 'x30'
}
}
self.current_convention = 'AAPCS' if disassembler.arch == 'arm' else 'AAPCS64'
def analyze_function_prologue(self, function):
"""分析函数序言"""
prologue_info = {
'stack_allocation': 0,
'saved_registers': [],
'frame_setup': False,
'instructions': []
}
convention = self.calling_conventions[self.current_convention]
# 分析前几条指令
for inst in function['instructions'][:10]:
prologue_info['instructions'].append(inst)
# 检查栈分配
if (inst['mnemonic'] == 'sub' and
len(inst['operands']) >= 3 and
inst['operands'][0]['value'] == convention['stack_pointer']):
# sub sp, sp, #imm
if inst['operands'][2]['type'] == capstone.CS_OP_IMM:
prologue_info['stack_allocation'] = inst['operands'][2]['immediate']
# 检查寄存器保存
elif inst['mnemonic'] in ['push', 'str', 'stp']:
# 解析保存的寄存器
for op in inst['operands']:
if op['type'] == capstone.CS_OP_REG:
reg_name = op['value']
if reg_name in convention['callee_saved']:
prologue_info['saved_registers'].append(reg_name)
# 检查帧指针设置
elif (inst['mnemonic'] == 'mov' and
len(inst['operands']) >= 2 and
inst['operands'][0]['value'] == convention['frame_pointer'] and
inst['operands'][1]['value'] == convention['stack_pointer']):
prologue_info['frame_setup'] = True
return prologue_info
def analyze_function_epilogue(self, function):
"""分析函数尾声"""
epilogue_info = {
'stack_deallocation': 0,
'restored_registers': [],
'return_instruction': None,
'instructions': []
}
convention = self.calling_conventions[self.current_convention]
# 分析最后几条指令
for inst in function['instructions'][-10:]:
epilogue_info['instructions'].append(inst)
# 检查栈释放
if (inst['mnemonic'] == 'add' and
len(inst['operands']) >= 3 and
inst['operands'][0]['value'] == convention['stack_pointer']):
if inst['operands'][2]['type'] == capstone.CS_OP_IMM:
epilogue_info['stack_deallocation'] = inst['operands'][2]['immediate']
# 检查寄存器恢复
elif inst['mnemonic'] in ['pop', 'ldr', 'ldp']:
for op in inst['operands']:
if op['type'] == capstone.CS_OP_REG:
reg_name = op['value']
if reg_name in convention['callee_saved']:
epilogue_info['restored_registers'].append(reg_name)
# 检查返回指令
elif inst['type'] == 'return':
epilogue_info['return_instruction'] = inst
return epilogue_info
def analyze_function_calls(self, function):
"""分析函数调用"""
calls_info = []
convention = self.calling_conventions[self.current_convention]
for i, inst in enumerate(function['instructions']):
if inst['type'] == 'call':
call_info = {
'instruction': inst,
'target': None,
'arguments': [],
'preparation_instructions': []
}
# 提取调用目标
if inst['operands']:
for op in inst['operands']:
if op['type'] == capstone.CS_OP_IMM:
call_info['target'] = f"0x{op['immediate']:x}"
elif op['type'] == capstone.CS_OP_REG:
call_info['target'] = op['value']
# 分析参数准备(向前查找几条指令)
start_idx = max(0, i - 10)
for j in range(start_idx, i):
prep_inst = function['instructions'][j]
# 检查是否设置参数寄存器
if (prep_inst['mnemonic'] in ['mov', 'ldr', 'add'] and
prep_inst['operands'] and
prep_inst['operands'][0]['value'] in convention['param_registers']):
arg_info = {
'register': prep_inst['operands'][0]['value'],
'instruction': prep_inst
}
call_info['arguments'].append(arg_info)
call_info['preparation_instructions'].append(prep_inst)
calls_info.append(call_info)
return calls_info
def generate_calling_convention_report(self):
"""生成调用约定分析报告"""
if not self.disasm.functions:
self.disasm.identify_functions()
report = f"# ARM Calling Convention Analysis\n\n"
report += f"**Convention:** {self.current_convention}\n\n"
convention = self.calling_conventions[self.current_convention]
report += "## Calling Convention Details\n\n"
report += f"- **Parameter Registers:** {', '.join(convention['param_registers'])}\n"
report += f"- **Return Register:** {convention['return_register']}\n"
report += f"- **Callee Saved:** {', '.join(convention['callee_saved'])}\n"
report += f"- **Stack Pointer:** {convention['stack_pointer']}\n"
report += f"- **Frame Pointer:** {convention['frame_pointer']}\n"
report += f"- **Link Register:** {convention['link_register']}\n\n"
# 分析每个函数
for i, function in enumerate(self.disasm.functions[:5]): # 只分析前5个函数
report += f"## Function {i + 1} Analysis\n\n"
report += f"**Address:** 0x{function['start_address']:x} - 0x{function['end_address']:x}\n\n"
# 序言分析
prologue = self.analyze_function_prologue(function)
report += "### Prologue\n"
report += f"- **Stack Allocation:** {prologue['stack_allocation']} bytes\n"
report += f"- **Saved Registers:** {', '.join(prologue['saved_registers'])}\n"
report += f"- **Frame Setup:** {'Yes' if prologue['frame_setup'] else 'No'}\n\n"
# 尾声分析
epilogue = self.analyze_function_epilogue(function)
report += "### Epilogue\n"
report += f"- **Stack Deallocation:** {epilogue['stack_deallocation']} bytes\n"
report += f"- **Restored Registers:** {', '.join(epilogue['restored_registers'])}\n"
report += f"- **Return Instruction:** {epilogue['return_instruction']['mnemonic'] if epilogue['return_instruction'] else 'None'}\n\n"
# 函数调用分析
calls = self.analyze_function_calls(function)
if calls:
report += "### Function Calls\n"
for j, call in enumerate(calls):
report += f"#### Call {j + 1}\n"
report += f"- **Target:** {call['target']}\n"
report += f"- **Arguments:** {len(call['arguments'])}\n"
for arg in call['arguments']:
report += f" - {arg['register']}: {arg['instruction']['mnemonic']} {arg['instruction']['op_str']}\n"
report += "\n"
return report
# 使用示例
def analyze_calling_convention(so_path):
"""分析SO文件的调用约定"""
# 首先分析ELF文件获取代码段
with open(so_path, 'rb') as f:
elf_data = f.read()
elf_analyzer = ELFAnalyzer(elf_data)
elf_analyzer.parse_header()
elf_analyzer.parse_section_headers()
# 找到.text段
text_section = None
for section in elf_analyzer.sections:
if section['name'] == '.text':
text_section = section
break
if not text_section:
print("No .text section found")
return
# 提取代码
code_offset = text_section['sh_offset']
code_size = text_section['sh_size']
code_bytes = elf_data[code_offset:code_offset + code_size]
# 反汇编
arch = 'arm64' if elf_analyzer.header['e_machine'] == 183 else 'arm'
disasm = ARMDisassembler(arch)
disasm.disassemble(code_bytes, text_section['sh_addr'])
# 分析调用约定
cc_analyzer = ARMCallingConventionAnalyzer(disasm)
report = cc_analyzer.generate_calling_convention_report()
print(report)
return cc_analyzer
7.3 Native代码逆向技术
7.3.1 静态分析技术
class NativeStaticAnalyzer:
"""Native代码静态分析器"""
def __init__(self, elf_analyzer, disassembler):
self.elf = elf_analyzer
self.disasm = disassembler
self.cross_references = {}
self.string_references = {}
self.function_graph = {}
self.data_structures = []
def build_cross_references(self):
"""构建交叉引用"""
# 分析指令间的引用关系
for inst in self.disasm.instructions:
if inst['type'] in ['call', 'jump', 'branch']:
# 分析跳转目标
for op in inst['operands']:
if op['type'] == capstone.CS_OP_IMM:
target_addr = op['immediate']
if target_addr not in self.cross_references:
self.cross_references[target_addr] = []
self.cross_references[target_addr].append({
'from': inst['address'],
'type': inst['type'],
'instruction': inst
})
return self.cross_references
def analyze_string_references(self):
"""分析字符串引用"""
# 查找字符串常量段
string_sections = []
for section in self.elf.sections:
if section['name'] in ['.rodata', '.data', '.rodata.str1.1']:
string_sections.append(section)
# 提取字符串
strings = {}
for section in string_sections:
section_strings = self.extract_strings_from_section(section)
strings.update(section_strings)
# 分析字符串引用
for inst in self.disasm.instructions:
if inst['mnemonic'] in ['ldr', 'adr', 'mov']:
for op in inst['operands']:
if op['type'] == capstone.CS_OP_IMM:
addr = op['immediate']
if addr in strings:
if addr not in self.string_references:
self.string_references[addr] = []
self.string_references[addr].append({
'from': inst['address'],
'instruction': inst,
'string': strings[addr]
})
return self.string_references
def extract_strings_from_section(self, section):
"""从段中提取字符串"""
strings = {}
if section['sh_size'] == 0:
return strings
offset = section['sh_offset']
size = section['sh_size']
data = self.elf.data[offset:offset + size]
current_offset = 0
while current_offset < len(data):
# 查找可打印字符串
start = current_offset
while (current_offset < len(data) and
32 <= data[current_offset] <= 126): # 可打印ASCII字符
current_offset += 1
# 如果找到足够长的字符串
if current_offset - start >= 4:
try:
string_value = data[start:current_offset].decode('ascii')
string_addr = section['sh_addr'] + start
strings[string_addr] = string_value
except UnicodeDecodeError:
pass
current_offset += 1
return strings
def build_function_call_graph(self):
"""构建函数调用图"""
if not self.disasm.functions:
self.disasm.identify_functions()
for function in self.disasm.functions:
func_addr = function['start_address']
self.function_graph[func_addr] = {
'calls': [],
'called_by': []
}
# 分析函数内的调用
for inst in function['instructions']:
if inst['type'] == 'call':
for op in inst['operands']:
if op['type'] == capstone.CS_OP_IMM:
target_addr = op['immediate']
self.function_graph[func_addr]['calls'].append(target_addr)
# 更新被调用函数的信息
if target_addr not in self.function_graph:
self.function_graph[target_addr] = {
'calls': [],
'called_by': []
}
self.function_graph[target_addr]['called_by'].append(func_addr)
return self.function_graph
def analyze_data_structures(self):
"""分析数据结构"""
data_structures = []
# 分析内存访问模式
memory_accesses = {}
for inst in self.disasm.instructions:
if inst['type'] == 'memory':
for op in inst['operands']:
if op['type'] == capstone.CS_OP_MEM and op['memory']:
mem = op['memory']
# 分析基址+偏移的访问模式
if mem['base'] != 0 and mem['disp'] != 0:
base_reg = self.disasm.cs.reg_name(mem['base'])
offset = mem['disp']
key = f"{base_reg}+{offset}"
if key not in memory_accesses:
memory_accesses[key] = []
memory_accesses[key].append({
'instruction': inst,
'access_type': 'read' if inst['mnemonic'].startswith('ld') else 'write'
})
# 推断数据结构
for access_pattern, accesses in memory_accesses.items():
if len(accesses) >= 2: # 至少有2次访问才考虑为数据结构
structure = {
'pattern': access_pattern,
'accesses': accesses,
'size_hint': max([acc['instruction']['operands'][0].get('size', 4) for acc in accesses if acc['instruction']['operands']]),
'type_hint': self.infer_data_type(accesses)
}
data_structures.append(structure)
self.data_structures = data_structures
return data_structures
def infer_data_type(self, accesses):
"""推断数据类型"""
# 简单的类型推断逻辑
read_count = sum(1 for acc in accesses if acc['access_type'] == 'read')
write_count = sum(1 for acc in accesses if acc['access_type'] == 'write')
if read_count > write_count * 2:
return 'const_data'
elif write_count > read_count:
return 'mutable_data'
else:
return 'mixed_data'
def detect_crypto_patterns(self):
"""检测加密算法模式"""
crypto_patterns = []
# 常见加密算法的特征指令序列
aes_patterns = [
['aese', 'aesmc'], # AES加密轮
['aesd', 'aesimc'], # AES解密轮
]
sha_patterns = [
['sha1h', 'sha1c'], # SHA-1
['sha256h', 'sha256h2'], # SHA-256
]
# 检查指令序列
for i in range(len(self.disasm.instructions) - 1):
inst1 = self.disasm.instructions[i]
inst2 = self.disasm.instructions[i + 1]
# 检查AES模式
for pattern in aes_patterns:
if (inst1['mnemonic'] == pattern[0] and
inst2['mnemonic'] == pattern[1]):
crypto_patterns.append({
'type': 'AES',
'address': inst1['address'],
'pattern': pattern,
'instructions': [inst1, inst2]
})
# 检查SHA模式
for pattern in sha_patterns:
if (inst1['mnemonic'] == pattern[0] and
inst2['mnemonic'] == pattern[1]):
crypto_patterns.append({
'type': 'SHA',
'address': inst1['address'],
'pattern': pattern,
'instructions': [inst1, inst2]
})
return crypto_patterns
def generate_static_analysis_report(self):
"""生成静态分析报告"""
self.build_cross_references()
self.analyze_string_references()
self.build_function_call_graph()
self.analyze_data_structures()
crypto_patterns = self.detect_crypto_patterns()
report = "# Native Static Analysis Report\n\n"
# 交叉引用统计
report += "## Cross References\n\n"
report += f"Total cross references: {len(self.cross_references)}\n\n"
if self.cross_references:
report += "Top referenced addresses:\n"
sorted_refs = sorted(self.cross_references.items(),
key=lambda x: len(x[1]), reverse=True)
for addr, refs in sorted_refs[:10]:
report += f"- **0x{addr:x}:** {len(refs)} references\n"
report += "\n"
# 字符串引用
report += "## String References\n\n"
report += f"Total string references: {len(self.string_references)}\n\n"
if self.string_references:
report += "Strings found:\n"
for addr, refs in list(self.string_references.items())[:10]:
string_value = refs[0]['string']
report += f"- **0x{addr:x}:** \"{string_value}\" ({len(refs)} references)\n"
report += "\n"
# 函数调用图
report += "## Function Call Graph\n\n"
report += f"Total functions: {len(self.function_graph)}\n\n"
if self.function_graph:
# 找出调用最多的函数
most_called = sorted(self.function_graph.items(),
key=lambda x: len(x[1]['called_by']), reverse=True)
report += "Most called functions:\n"
for addr, info in most_called[:5]:
report += f"- **0x{addr:x}:** called by {len(info['called_by'])} functions\n"
report += "\n"
# 数据结构分析
report += "## Data Structures\n\n"
report += f"Potential data structures: {len(self.data_structures)}\n\n"
for i, struct in enumerate(self.data_structures[:5]):
report += f"### Structure {i + 1}\n"
report += f"- **Pattern:** {struct['pattern']}\n"
report += f"- **Accesses:** {len(struct['accesses'])}\n"
report += f"- **Type Hint:** {struct['type_hint']}\n\n"
# 加密算法检测
report += "## Cryptographic Patterns\n\n"
if crypto_patterns:
report += f"Found {len(crypto_patterns)} cryptographic patterns:\n\n"
for pattern in crypto_patterns:
report += f"- **{pattern['type']}** at 0x{pattern['address']:x}\n"
else:
report += "No cryptographic patterns detected.\n"
return report
# 使用示例
def perform_native_static_analysis(so_path):
"""执行Native静态分析"""
# 分析ELF文件
with open(so_path, 'rb') as f:
elf_data = f.read()
elf_analyzer = ELFAnalyzer(elf_data)
summary = elf_analyzer.get_analysis_summary()
# 提取代码段进行反汇编
text_section = None
for section in elf_analyzer.sections:
if section['name'] == '.text':
text_section = section
break
if not text_section:
print("No .text section found")
return
code_offset = text_section['sh_offset']
code_size = min(text_section['sh_size'], 10000) # 限制分析大小
code_bytes = elf_data[code_offset:code_offset + code_size]
# 反汇编
arch = 'arm64' if elf_analyzer.header['e_machine'] == 183 else 'arm'
disasm = ARMDisassembler(arch)
disasm.disassemble(code_bytes, text_section['sh_addr'])
# 静态分析
static_analyzer = NativeStaticAnalyzer(elf_analyzer, disasm)
report = static_analyzer.generate_static_analysis_report()
print("=== ELF Summary ===")
print(f"Architecture: {summary['file_info']['machine']}")
print(f"Entry Point: {summary['file_info']['entry_point']}")
print(f"Exported Functions: {len(summary['exported_functions'])}")
print("\n=== Static Analysis Report ===")
print(report)
return static_analyzer
7.3.2 动态分析准备
class NativeDynamicAnalysisPrep:
"""Native动态分析准备"""
def __init__(self, static_analyzer):
self.static = static_analyzer
self.hook_points = []
self.trace_points = []
self.breakpoints = []
def identify_hook_points(self):
"""识别Hook点"""
hook_points = []
# 1. 导出函数作为Hook点
for symbol in self.static.elf.symbols:
if (symbol['bind'] == 1 and # STB_GLOBAL
symbol['type'] == 2 and # STT_FUNC
symbol['st_value'] != 0):
hook_points.append({
'type': 'exported_function',
'name': symbol['name'],
'address': f"0x{symbol['st_value']:x}",
'size': symbol['st_size'],
'reason': 'Exported function - likely API entry point'
})
# 2. 字符串引用点作为Hook点
for addr, refs in self.static.string_references.items():
string_value = refs[0]['string']
# 关注敏感字符串
sensitive_keywords = [
'password', 'key', 'token', 'secret', 'auth',
'encrypt', 'decrypt', 'hash', 'sign', 'verify',
'http', 'https', 'url', 'api', 'server',
'file', 'read', 'write', 'open', 'close'
]
if any(keyword in string_value.lower() for keyword in sensitive_keywords):
for ref in refs:
hook_points.append({
'type': 'string_reference',
'name': f"string_ref_{addr:x}",
'address': f"0x{ref['from']:x}",
'string': string_value,
'reason': f'References sensitive string: "{string_value}"'
})
# 3. 系统调用点
for inst in self.static.disasm.instructions:
if inst['mnemonic'] in ['svc', 'swi']: # 系统调用指令
hook_points.append({
'type': 'syscall',
'name': f"syscall_{inst['address']:x}",
'address': f"0x{inst['address']:x}",
'instruction': f"{inst['mnemonic']} {inst['op_str']}",
'reason': 'System call - potential security-relevant operation'
})
# 4. 加密算法相关
crypto_patterns = self.static.detect_crypto_patterns()
for pattern in crypto_patterns:
hook_points.append({
'type': 'crypto_operation',
'name': f"{pattern['type'].lower()}_{pattern['address']:x}",
'address': f"0x{pattern['address']:x}",
'crypto_type': pattern['type'],
'reason': f'{pattern["type"]} cryptographic operation detected'
})
self.hook_points = hook_points
return hook_points
def generate_frida_script(self):
"""生成Frida Hook脚本"""
if not self.hook_points:
self.identify_hook_points()
script = """
// Auto-generated Frida script for Native analysis
console.log("[+] Starting Native analysis...");
// Get module base address
var module_base = Module.findBaseAddress("TARGET_MODULE_NAME");
if (!module_base) {
console.log("[-] Target module not found");
Java.perform(function() {
// Module might be loaded later, try again
setTimeout(function() {
module_base = Module.findBaseAddress("TARGET_MODULE_NAME");
if (module_base) {
console.log("[+] Module found at: " + module_base);
setupHooks();
}
}, 1000);
});
} else {
console.log("[+] Module base address: " + module_base);
setupHooks();
}
function setupHooks() {
"""
# 添加Hook点
for hook in self.hook_points:
if hook['type'] == 'exported_function':
script += f"""
// Hook exported function: {hook['name']}
try {{
var func_addr = module_base.add({hook['address']});
Interceptor.attach(func_addr, {{
onEnter: function(args) {{
console.log("[+] Entering {hook['name']} at " + func_addr);
console.log(" Reason: {hook['reason']}");
// Log arguments (adjust based on calling convention)
for (var i = 0; i < 4; i++) {{
console.log(" arg" + i + ": " + args[i]);
}}
// Save context for onLeave
this.start_time = Date.now();
}},
onLeave: function(retval) {{
var duration = Date.now() - this.start_time;
console.log("[+] Leaving {hook['name']}");
console.log(" Return value: " + retval);
console.log(" Duration: " + duration + "ms");
}}
}});
console.log("[+] Hooked {hook['name']} at " + func_addr);
}} catch (e) {{
console.log("[-] Failed to hook {hook['name']}: " + e);
}}
"""
elif hook['type'] == 'string_reference':
script += f"""
// Hook string reference: {hook['string'][:50]}...
try {{
var ref_addr = module_base.add({hook['address']});
Interceptor.attach(ref_addr, {{
onEnter: function(args) {{
console.log("[+] String reference at " + ref_addr);
console.log(" String: {hook['string']}");
console.log(" Reason: {hook['reason']}");
// Dump memory around the reference
console.log(" Memory dump:");
console.log(hexdump(ref_addr, {{length: 64}}));
}}
}});
console.log("[+] Hooked string reference at " + ref_addr);
}} catch (e) {{
console.log("[-] Failed to hook string reference: " + e);
}}
"""
elif hook['type'] == 'crypto_operation':
script += f"""
// Hook crypto operation: {hook['crypto_type']}
try {{
var crypto_addr = module_base.add({hook['address']});
Interceptor.attach(crypto_addr, {{
onEnter: function(args) {{
console.log("[+] Crypto operation ({hook['crypto_type']}) at " + crypto_addr);
console.log(" Reason: {hook['reason']}");
// Log crypto-specific information
console.log(" Registers:");
console.log(" R0: " + this.context.r0);
console.log(" R1: " + this.context.r1);
console.log(" R2: " + this.context.r2);
console.log(" R3: " + this.context.r3);
// Dump potential key/data buffers
if (this.context.r0.toInt32() > 0x1000) {{
console.log(" Buffer at R0:");
console.log(hexdump(this.context.r0, {{length: 32}}));
}}
}}
}});
console.log("[+] Hooked crypto operation at " + crypto_addr);
}} catch (e) {{
console.log("[-] Failed to hook crypto operation: " + e);
}}
"""
script += """
}
// Memory monitoring functions
function monitorMemoryWrites(start_addr, size) {
Memory.protect(start_addr, size, 'r--');
Process.setExceptionHandler(function(details) {
if (details.type === 'access-violation') {
console.log("[+] Memory write detected at: " + details.address);
console.log(" From: " + details.context.pc);
console.log(" Data: " + hexdump(details.address, {length: 16}));
// Restore write permission temporarily
Memory.protect(start_addr, size, 'rw-');
return true;
}
return false;
});
}
// Utility functions
function dumpRegisters(context) {
console.log("Registers:");
console.log(" R0: " + context.r0);
console.log(" R1: " + context.r1);
console.log(" R2: " + context.r2);
console.log(" R3: " + context.r3);
console.log(" SP: " + context.sp);
console.log(" LR: " + context.lr);
console.log(" PC: " + context.pc);
}
console.log("[+] Frida script loaded successfully");
"""
return script
def generate_gdb_script(self):
"""生成GDB调试脚本"""
if not self.hook_points:
self.identify_hook_points()
script = """# Auto-generated GDB script for Native analysis
set confirm off
set pagination off
# Connect to target
target remote :1234
# Set breakpoints
"""
for hook in self.hook_points:
if hook['type'] == 'exported_function':
script += f"""
# Breakpoint for {hook['name']}
break *{hook['address']}
commands
echo [+] Hit breakpoint at {hook['name']} ({hook['address']})\\n
echo Reason: {hook['reason']}\\n
info registers
x/10i $pc
continue
end
"""
script += """
# Custom commands
define dump_context
echo === Register Context ===\\n
info registers
echo === Stack Dump ===\\n
x/20xw $sp
echo === Code Context ===\\n
x/10i $pc
end
define trace_calls
set logging file trace.log
set logging on
set trace-commands on
continue
end
# Start execution
echo [+] GDB script loaded, starting execution...\\n
continue
"""
return script
def generate_analysis_plan(self):
"""生成分析计划"""
plan = {
'static_analysis_complete': True,
'hook_points_identified': len(self.hook_points),
'recommended_tools': [],
'analysis_steps': [],
'expected_findings': []
}
# 推荐工具
if any(h['type'] == 'crypto_operation' for h in self.hook_points):
plan['recommended_tools'].append('Frida (for crypto analysis)')
plan['expected_findings'].append('Cryptographic operations and key material')
if any(h['type'] == 'string_reference' for h in self.hook_points):
plan['recommended_tools'].append('String tracing tools')
plan['expected_findings'].append('Sensitive string usage patterns')
if any(h['type'] == 'exported_function' for h in self.hook_points):
plan['recommended_tools'].append('API monitoring tools')
plan['expected_findings'].append('API call patterns and parameters')
# 分析步骤
plan['analysis_steps'] = [
"1. Load target application/library",
"2. Attach Frida and load generated script",
"3. Exercise application functionality",
"4. Monitor hook point activations",
"5. Analyze captured data and control flow",
"6. Identify security-relevant behaviors",
"7. Generate detailed analysis report"
]
return plan
def generate_prep_report(self):
"""生成准备报告"""
self.identify_hook_points()
plan = self.generate_analysis_plan()
report = "# Native Dynamic Analysis Preparation Report\n\n"
# Hook点统计
hook_types = {}
for hook in self.hook_points:
hook_type = hook['type']
hook_types[hook_type] = hook_types.get(hook_type, 0) + 1
report += "## Hook Points Summary\n\n"
report += f"Total hook points identified: {len(self.hook_points)}\n\n"
for hook_type, count in hook_types.items():
report += f"- **{hook_type}:** {count}\n"
report += "\n"
# 详细Hook点列表
report += "## Detailed Hook Points\n\n"
for hook_type in hook_types.keys():
type_hooks = [h for h in self.hook_points if h['type'] == hook_type]
report += f"### {hook_type.replace('_', ' ').title()}\n\n"
for hook in type_hooks[:10]: # 只显示前10个
report += f"- **{hook['name']}** at {hook['address']}\n"
report += f" - Reason: {hook['reason']}\n"
if 'string' in hook:
report += f" - String: \"{hook['string']}\"\n"
report += "\n"
if len(type_hooks) > 10:
report += f"... and {len(type_hooks) - 10} more {hook_type} hooks\n\n"
# 分析计划
report += "## Analysis Plan\n\n"
report += f"**Recommended Tools:**\n"
for tool in plan['recommended_tools']:
report += f"- {tool}\n"
report += "\n"
report += f"**Analysis Steps:**\n"
for step in plan['analysis_steps']:
report += f"{step}\n"
report += "\n"
report += f"**Expected Findings:**\n"
for finding in plan['expected_findings']:
report += f"- {finding}\n"
return report
# 使用示例
def prepare_native_dynamic_analysis(so_path):
"""准备Native动态分析"""
# 执行静态分析
static_analyzer = perform_native_static_analysis(so_path)
# 准备动态分析
prep = NativeDynamicAnalysisPrep(static_analyzer)
report = prep.generate_prep_report()
print("\n=== Dynamic Analysis Preparation ===")
print(report)
# 生成脚本
frida_script = prep.generate_frida_script()
gdb_script = prep.generate_gdb_script()
# 保存脚本到文件
with open("native_analysis.js", "w") as f:
f.write(frida_script)
with open("native_analysis.gdb", "w") as f:
f.write(gdb_script)
print("\n[+] Generated analysis scripts:")
print(" - native_analysis.js (Frida script)")
print(" - native_analysis.gdb (GDB script)")
return prep
7.4 本章小结
本章深入介绍了Native层分析技术:
- ELF文件格式分析:理解ELF文件结构、节头表、符号表、动态链接等
- ARM汇编代码分析:掌握ARM指令集、反汇编技术、函数调用约定
- Native代码逆向技术:学习静态分析、交叉引用、数据结构分析等
- 动态分析准备:识别Hook点、生成分析脚本、制定分析计划
Native层分析是安卓逆向工程的高级技能,需要深入理解底层系统架构和汇编语言。通过这些技术可以分析加密算法、发现隐藏功能、理解性能关键代码。
学习检查清单:
- 理解ELF文件格式和结构
- 掌握ARM汇编指令集基础
- 熟悉ARM调用约定
- 能够进行静态代码分析
- 了解交叉引用和数据流分析
- 掌握加密算法模式识别
- 能够准备动态分析环境
- 会生成Frida和GDB分析脚本
下一章预告:
在下一章中,我们将学习高级静态分析技术,包括代码混淆对抗、自动化分析工具开发、大规模代码分析等高级主题。
243

被折叠的 条评论
为什么被折叠?



