0基础安卓逆向原理与实践:第7章:Native层分析技术

第7章:Native层分析技术

7.1 ELF文件格式分析

7.1.1 ELF文件结构概述

ELF(Executable and Linkable Format)是Linux系统中可执行文件、目标文件、共享库和核心转储的标准文件格式。Android的Native库(.so文件)也采用ELF格式。

主要Section
ELF文件结构
.text - 代码段
.data - 初始化数据
.bss - 未初始化数据
.rodata - 只读数据
.symtab - 符号表
.strtab - 字符串表
.dynamic - 动态链接信息
ELF Header
Program Header Table
Section 1
Section 2
Section N
Section Header Table

7.1.2 ELF文件头分析

import struct
from enum import IntEnum

class ELFAnalyzer:
    """ELF文件分析器"""
    
    class ELFClass(IntEnum):
        ELFCLASSNONE = 0
        ELFCLASS32 = 1
        ELFCLASS64 = 2
    
    class ELFData(IntEnum):
        ELFDATANONE = 0
        ELFDATA2LSB = 1  # Little endian
        ELFDATA2MSB = 2  # Big endian
    
    class ELFType(IntEnum):
        ET_NONE = 0      # No file type
        ET_REL = 1       # Relocatable file
        ET_EXEC = 2      # Executable file
        ET_DYN = 3       # Shared object file
        ET_CORE = 4      # Core file
    
    class ELFMachine(IntEnum):
        EM_NONE = 0
        EM_386 = 3       # Intel 80386
        EM_ARM = 40      # ARM
        EM_X86_64 = 62   # AMD x86-64
        EM_AARCH64 = 183 # ARM 64-bit
    
    def __init__(self, elf_data):
        self.data = elf_data
        self.header = None
        self.is_64bit = False
        self.is_little_endian = True
        self.sections = []
        self.symbols = []
        self.strings = {}
    
    def parse_header(self):
        """解析ELF文件头"""
        if len(self.data) < 64:  # ELF64头部最小长度
            raise ValueError("File too small to be a valid ELF file")
        
        # 检查ELF魔数
        if self.data[:4] != b'\x7fELF':
            raise ValueError("Invalid ELF magic number")
        
        # 解析ELF标识
        ei_class = self.data[4]
        ei_data = self.data[5]
        ei_version = self.data[6]
        ei_osabi = self.data[7]
        ei_abiversion = self.data[8]
        
        self.is_64bit = (ei_class == self.ELFClass.ELFCLASS64)
        self.is_little_endian = (ei_data == self.ELFData.ELFDATA2LSB)
        
        # 设置字节序
        endian = '<' if self.is_little_endian else '>'
        
        if self.is_64bit:
            # ELF64头部格式
            header_format = f'{endian}16s H H I Q Q Q I H H H H H H'
            header_size = 64
        else:
            # ELF32头部格式
            header_format = f'{endian}16s H H I I I I I H H H H H H'
            header_size = 52
        
        header_data = struct.unpack(header_format, self.data[:header_size])
        
        self.header = {
            'e_ident': header_data[0],
            'e_type': header_data[1],
            'e_machine': header_data[2],
            'e_version': header_data[3],
            'e_entry': header_data[4],
            'e_phoff': header_data[5],
            'e_shoff': header_data[6],
            'e_flags': header_data[7],
            'e_ehsize': header_data[8],
            'e_phentsize': header_data[9],
            'e_phnum': header_data[10],
            'e_shentsize': header_data[11],
            'e_shnum': header_data[12],
            'e_shstrndx': header_data[13]
        }
        
        # 添加解析后的信息
        self.header['class'] = 'ELF64' if self.is_64bit else 'ELF32'
        self.header['data'] = 'Little Endian' if self.is_little_endian else 'Big Endian'
        self.header['type_name'] = self.get_type_name(self.header['e_type'])
        self.header['machine_name'] = self.get_machine_name(self.header['e_machine'])
        
        return self.header
    
    def get_type_name(self, e_type):
        """获取ELF类型名称"""
        type_names = {
            self.ELFType.ET_NONE: "None",
            self.ELFType.ET_REL: "Relocatable",
            self.ELFType.ET_EXEC: "Executable",
            self.ELFType.ET_DYN: "Shared Object",
            self.ELFType.ET_CORE: "Core File"
        }
        return type_names.get(e_type, f"Unknown ({e_type})")
    
    def get_machine_name(self, e_machine):
        """获取机器架构名称"""
        machine_names = {
            self.ELFMachine.EM_NONE: "None",
            self.ELFMachine.EM_386: "Intel 80386",
            self.ELFMachine.EM_ARM: "ARM",
            self.ELFMachine.EM_X86_64: "AMD x86-64",
            self.ELFMachine.EM_AARCH64: "ARM 64-bit"
        }
        return machine_names.get(e_machine, f"Unknown ({e_machine})")
    
    def parse_section_headers(self):
        """解析节头表"""
        if not self.header:
            self.parse_header()
        
        if self.header['e_shnum'] == 0:
            return []
        
        endian = '<' if self.is_little_endian else '>'
        
        if self.is_64bit:
            sh_format = f'{endian}I I Q Q Q Q I I Q Q'
            sh_size = 64
        else:
            sh_format = f'{endian}I I I I I I I I I I'
            sh_size = 40
        
        # 解析所有节头
        sh_offset = self.header['e_shoff']
        
        for i in range(self.header['e_shnum']):
            offset = sh_offset + i * sh_size
            
            if offset + sh_size > len(self.data):
                break
            
            sh_data = struct.unpack(sh_format, self.data[offset:offset + sh_size])
            
            section = {
                'index': i,
                'sh_name': sh_data[0],
                'sh_type': sh_data[1],
                'sh_flags': sh_data[2],
                'sh_addr': sh_data[3],
                'sh_offset': sh_data[4],
                'sh_size': sh_data[5],
                'sh_link': sh_data[6],
                'sh_info': sh_data[7],
                'sh_addralign': sh_data[8],
                'sh_entsize': sh_data[9]
            }
            
            # 添加节类型名称
            section['type_name'] = self.get_section_type_name(section['sh_type'])
            
            self.sections.append(section)
        
        # 解析节名称字符串表
        if self.header['e_shstrndx'] < len(self.sections):
            shstrtab = self.sections[self.header['e_shstrndx']]
            self.parse_string_table(shstrtab, 'section_names')
            
            # 为每个节添加名称
            for section in self.sections:
                section['name'] = self.get_string('section_names', section['sh_name'])
        
        return self.sections
    
    def get_section_type_name(self, sh_type):
        """获取节类型名称"""
        type_names = {
            0: "SHT_NULL",
            1: "SHT_PROGBITS",
            2: "SHT_SYMTAB",
            3: "SHT_STRTAB",
            4: "SHT_RELA",
            5: "SHT_HASH",
            6: "SHT_DYNAMIC",
            7: "SHT_NOTE",
            8: "SHT_NOBITS",
            9: "SHT_REL",
            10: "SHT_SHLIB",
            11: "SHT_DYNSYM",
            0x70000001: "SHT_ARM_EXIDX",
            0x70000003: "SHT_ARM_ATTRIBUTES"
        }
        return type_names.get(sh_type, f"Unknown ({sh_type})")
    
    def parse_string_table(self, section, table_name):
        """解析字符串表"""
        if section['sh_size'] == 0:
            return
        
        offset = section['sh_offset']
        size = section['sh_size']
        
        if offset + size > len(self.data):
            return
        
        string_data = self.data[offset:offset + size]
        
        # 解析字符串
        strings = {}
        current_offset = 0
        
        while current_offset < len(string_data):
            # 查找下一个null字节
            null_pos = string_data.find(b'\x00', current_offset)
            if null_pos == -1:
                break
            
            if null_pos > current_offset:
                string_value = string_data[current_offset:null_pos].decode('utf-8', errors='replace')
                strings[current_offset] = string_value
            
            current_offset = null_pos + 1
        
        self.strings[table_name] = strings
    
    def get_string(self, table_name, offset):
        """从字符串表获取字符串"""
        if table_name in self.strings and offset in self.strings[table_name]:
            return self.strings[table_name][offset]
        return f"<unknown_{offset}>"
    
    def parse_symbol_table(self):
        """解析符号表"""
        if not self.sections:
            self.parse_section_headers()
        
        # 查找符号表和动态符号表
        symtab_sections = []
        for section in self.sections:
            if section['sh_type'] in [2, 11]:  # SHT_SYMTAB or SHT_DYNSYM
                symtab_sections.append(section)
        
        for symtab in symtab_sections:
            self.parse_single_symbol_table(symtab)
        
        return self.symbols
    
    def parse_single_symbol_table(self, symtab_section):
        """解析单个符号表"""
        if symtab_section['sh_size'] == 0:
            return
        
        # 获取对应的字符串表
        if symtab_section['sh_link'] >= len(self.sections):
            return
        
        strtab_section = self.sections[symtab_section['sh_link']]
        strtab_name = f"symtab_strings_{symtab_section['index']}"
        self.parse_string_table(strtab_section, strtab_name)
        
        endian = '<' if self.is_little_endian else '>'
        
        if self.is_64bit:
            sym_format = f'{endian}I B B H Q Q'
            sym_size = 24
        else:
            sym_format = f'{endian}I I I B B H'
            sym_size = 16
        
        offset = symtab_section['sh_offset']
        num_symbols = symtab_section['sh_size'] // sym_size
        
        for i in range(num_symbols):
            sym_offset = offset + i * sym_size
            
            if sym_offset + sym_size > len(self.data):
                break
            
            sym_data = struct.unpack(sym_format, self.data[sym_offset:sym_offset + sym_size])
            
            if self.is_64bit:
                symbol = {
                    'st_name': sym_data[0],
                    'st_info': sym_data[1],
                    'st_other': sym_data[2],
                    'st_shndx': sym_data[3],
                    'st_value': sym_data[4],
                    'st_size': sym_data[5]
                }
            else:
                symbol = {
                    'st_name': sym_data[0],
                    'st_value': sym_data[1],
                    'st_size': sym_data[2],
                    'st_info': sym_data[3],
                    'st_other': sym_data[4],
                    'st_shndx': sym_data[5]
                }
            
            # 解析符号信息
            symbol['bind'] = (symbol['st_info'] >> 4) & 0xf
            symbol['type'] = symbol['st_info'] & 0xf
            symbol['visibility'] = symbol['st_other'] & 0x3
            
            # 获取符号名称
            symbol['name'] = self.get_string(strtab_name, symbol['st_name'])
            
            # 添加类型和绑定名称
            symbol['bind_name'] = self.get_symbol_bind_name(symbol['bind'])
            symbol['type_name'] = self.get_symbol_type_name(symbol['type'])
            
            self.symbols.append(symbol)
    
    def get_symbol_bind_name(self, bind):
        """获取符号绑定类型名称"""
        bind_names = {
            0: "STB_LOCAL",
            1: "STB_GLOBAL",
            2: "STB_WEAK"
        }
        return bind_names.get(bind, f"Unknown ({bind})")
    
    def get_symbol_type_name(self, sym_type):
        """获取符号类型名称"""
        type_names = {
            0: "STT_NOTYPE",
            1: "STT_OBJECT",
            2: "STT_FUNC",
            3: "STT_SECTION",
            4: "STT_FILE",
            5: "STT_COMMON",
            6: "STT_TLS"
        }
        return type_names.get(sym_type, f"Unknown ({sym_type})")
    
    def get_analysis_summary(self):
        """获取分析摘要"""
        if not self.header:
            self.parse_header()
        if not self.sections:
            self.parse_section_headers()
        if not self.symbols:
            self.parse_symbol_table()
        
        # 统计信息
        code_sections = [s for s in self.sections if s['name'] in ['.text', '.init', '.fini']]
        data_sections = [s for s in self.sections if s['name'] in ['.data', '.rodata', '.bss']]
        function_symbols = [s for s in self.symbols if s['type'] == 2]  # STT_FUNC
        
        return {
            'file_info': {
                'class': self.header['class'],
                'data': self.header['data'],
                'type': self.header['type_name'],
                'machine': self.header['machine_name'],
                'entry_point': f"0x{self.header['e_entry']:x}"
            },
            'sections': {
                'total': len(self.sections),
                'code_sections': len(code_sections),
                'data_sections': len(data_sections)
            },
            'symbols': {
                'total': len(self.symbols),
                'functions': len(function_symbols)
            },
            'code_sections_info': [
                {
                    'name': s['name'],
                    'address': f"0x{s['sh_addr']:x}",
                    'size': s['sh_size'],
                    'offset': s['sh_offset']
                } for s in code_sections
            ],
            'exported_functions': [
                {
                    'name': s['name'],
                    'address': f"0x{s['st_value']:x}",
                    'size': s['st_size']
                } for s in function_symbols 
                if s['bind'] == 1 and s['st_value'] != 0  # Global and not undefined
            ][:20]  # 只显示前20个
        }

# 使用示例
def analyze_so_file(so_path):
    """分析SO文件"""
    with open(so_path, 'rb') as f:
        elf_data = f.read()
    
    analyzer = ELFAnalyzer(elf_data)
    summary = analyzer.get_analysis_summary()
    
    print("=== ELF Analysis Summary ===")
    print(f"File: {so_path}")
    print(f"Class: {summary['file_info']['class']}")
    print(f"Data: {summary['file_info']['data']}")
    print(f"Type: {summary['file_info']['type']}")
    print(f"Machine: {summary['file_info']['machine']}")
    print(f"Entry Point: {summary['file_info']['entry_point']}")
    
    print(f"\nSections: {summary['sections']['total']}")
    print(f"Code Sections: {summary['sections']['code_sections']}")
    print(f"Data Sections: {summary['sections']['data_sections']}")
    
    print(f"\nSymbols: {summary['symbols']['total']}")
    print(f"Functions: {summary['symbols']['functions']}")
    
    print(f"\nExported Functions:")
    for func in summary['exported_functions']:
        print(f"  {func['name']} @ {func['address']} (size: {func['size']})")
    
    return analyzer

7.1.3 动态链接分析

class DynamicLinkingAnalyzer:
    """动态链接分析器"""
    
    def __init__(self, elf_analyzer):
        self.elf = elf_analyzer
        self.dynamic_section = None
        self.dynamic_entries = []
        self.needed_libraries = []
        self.imported_symbols = []
        self.exported_symbols = []
    
    def find_dynamic_section(self):
        """查找动态链接段"""
        for section in self.elf.sections:
            if section['sh_type'] == 6:  # SHT_DYNAMIC
                self.dynamic_section = section
                return section
        return None
    
    def parse_dynamic_entries(self):
        """解析动态链接条目"""
        if not self.dynamic_section:
            self.find_dynamic_section()
        
        if not self.dynamic_section:
            return []
        
        endian = '<' if self.elf.is_little_endian else '>'
        
        if self.elf.is_64bit:
            entry_format = f'{endian}Q Q'
            entry_size = 16
        else:
            entry_format = f'{endian}I I'
            entry_size = 8
        
        offset = self.dynamic_section['sh_offset']
        size = self.dynamic_section['sh_size']
        num_entries = size // entry_size
        
        for i in range(num_entries):
            entry_offset = offset + i * entry_size
            
            if entry_offset + entry_size > len(self.elf.data):
                break
            
            entry_data = struct.unpack(entry_format, 
                                     self.elf.data[entry_offset:entry_offset + entry_size])
            
            entry = {
                'd_tag': entry_data[0],
                'd_val': entry_data[1],
                'tag_name': self.get_dynamic_tag_name(entry_data[0])
            }
            
            self.dynamic_entries.append(entry)
            
            # 如果是DT_NULL,表示结束
            if entry_data[0] == 0:
                break
        
        return self.dynamic_entries
    
    def get_dynamic_tag_name(self, tag):
        """获取动态标签名称"""
        tag_names = {
            0: "DT_NULL",
            1: "DT_NEEDED",
            2: "DT_PLTRELSZ",
            3: "DT_PLTGOT",
            4: "DT_HASH",
            5: "DT_STRTAB",
            6: "DT_SYMTAB",
            7: "DT_RELA",
            8: "DT_RELASZ",
            9: "DT_RELAENT",
            10: "DT_STRSZ",
            11: "DT_SYMENT",
            12: "DT_INIT",
            13: "DT_FINI",
            14: "DT_SONAME",
            15: "DT_RPATH",
            16: "DT_SYMBOLIC",
            17: "DT_REL",
            18: "DT_RELSZ",
            19: "DT_RELENT",
            20: "DT_PLTREL",
            21: "DT_DEBUG",
            22: "DT_TEXTREL",
            23: "DT_JMPREL",
            24: "DT_BIND_NOW",
            25: "DT_INIT_ARRAY",
            26: "DT_FINI_ARRAY",
            27: "DT_INIT_ARRAYSZ",
            28: "DT_FINI_ARRAYSZ"
        }
        return tag_names.get(tag, f"Unknown ({tag})")
    
    def extract_needed_libraries(self):
        """提取依赖的库"""
        if not self.dynamic_entries:
            self.parse_dynamic_entries()
        
        # 查找字符串表
        strtab_addr = None
        for entry in self.dynamic_entries:
            if entry['d_tag'] == 5:  # DT_STRTAB
                strtab_addr = entry['d_val']
                break
        
        if not strtab_addr:
            return []
        
        # 查找字符串表对应的节
        strtab_section = None
        for section in self.elf.sections:
            if section['sh_addr'] == strtab_addr:
                strtab_section = section
                break
        
        if not strtab_section:
            return []
        
        # 解析字符串表
        self.elf.parse_string_table(strtab_section, 'dynamic_strings')
        
        # 提取DT_NEEDED条目
        for entry in self.dynamic_entries:
            if entry['d_tag'] == 1:  # DT_NEEDED
                lib_name = self.elf.get_string('dynamic_strings', entry['d_val'])
                self.needed_libraries.append(lib_name)
        
        return self.needed_libraries
    
    def analyze_relocations(self):
        """分析重定位信息"""
        relocations = []
        
        # 查找重定位段
        for section in self.elf.sections:
            if section['sh_type'] in [4, 9]:  # SHT_RELA or SHT_REL
                relocs = self.parse_relocation_section(section)
                relocations.extend(relocs)
        
        return relocations
    
    def parse_relocation_section(self, section):
        """解析重定位段"""
        relocations = []
        
        is_rela = (section['sh_type'] == 4)  # SHT_RELA
        endian = '<' if self.elf.is_little_endian else '>'
        
        if self.elf.is_64bit:
            if is_rela:
                rel_format = f'{endian}Q Q q'
                rel_size = 24
            else:
                rel_format = f'{endian}Q Q'
                rel_size = 16
        else:
            if is_rela:
                rel_format = f'{endian}I I i'
                rel_size = 12
            else:
                rel_format = f'{endian}I I'
                rel_size = 8
        
        offset = section['sh_offset']
        size = section['sh_size']
        num_relocs = size // rel_size
        
        for i in range(num_relocs):
            rel_offset = offset + i * rel_size
            
            if rel_offset + rel_size > len(self.elf.data):
                break
            
            rel_data = struct.unpack(rel_format, 
                                   self.elf.data[rel_offset:rel_offset + rel_size])
            
            relocation = {
                'r_offset': rel_data[0],
                'r_info': rel_data[1],
                'r_addend': rel_data[2] if is_rela else 0,
                'type': rel_data[1] & 0xff,
                'symbol': rel_data[1] >> 8 if self.elf.is_64bit else rel_data[1] >> 8
            }
            
            relocations.append(relocation)
        
        return relocations
    
    def generate_dynamic_analysis_report(self):
        """生成动态链接分析报告"""
        self.parse_dynamic_entries()
        self.extract_needed_libraries()
        relocations = self.analyze_relocations()
        
        report = "# Dynamic Linking Analysis Report\n\n"
        
        # 依赖库
        report += "## Needed Libraries\n\n"
        if self.needed_libraries:
            for lib in self.needed_libraries:
                report += f"- {lib}\n"
        else:
            report += "No library dependencies found.\n"
        report += "\n"
        
        # 动态条目
        report += "## Dynamic Entries\n\n"
        for entry in self.dynamic_entries[:20]:  # 只显示前20个
            report += f"- **{entry['tag_name']}:** 0x{entry['d_val']:x}\n"
        
        if len(self.dynamic_entries) > 20:
            report += f"... and {len(self.dynamic_entries) - 20} more entries\n"
        report += "\n"
        
        # 重定位信息
        report += "## Relocations\n\n"
        report += f"Total relocations: {len(relocations)}\n"
        
        if relocations:
            report += "\nFirst 10 relocations:\n"
            for i, rel in enumerate(relocations[:10]):
                report += f"{i+1}. Offset: 0x{rel['r_offset']:x}, Type: {rel['type']}, Symbol: {rel['symbol']}\n"
        
        return report

# 使用示例
def analyze_dynamic_linking(so_path):
    """分析动态链接"""
    with open(so_path, 'rb') as f:
        elf_data = f.read()
    
    elf_analyzer = ELFAnalyzer(elf_data)
    elf_analyzer.parse_header()
    elf_analyzer.parse_section_headers()
    
    dynamic_analyzer = DynamicLinkingAnalyzer(elf_analyzer)
    report = dynamic_analyzer.generate_dynamic_analysis_report()
    
    print(report)
    return dynamic_analyzer

7.2 ARM汇编代码分析

7.2.1 ARM指令集深入分析

import struct
import capstone

class ARMDisassembler:
    """ARM反汇编器"""
    
    def __init__(self, architecture='arm', mode='arm'):
        """
        初始化反汇编器
        architecture: 'arm' 或 'arm64'
        mode: 'arm', 'thumb', 'arm64'
        """
        self.arch = architecture
        self.mode = mode
        
        # 初始化Capstone反汇编引擎
        if architecture == 'arm64':
            self.cs = capstone.Cs(capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM)
        else:
            if mode == 'thumb':
                self.cs = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB)
            else:
                self.cs = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM)
        
        # 启用详细信息
        self.cs.detail = True
        
        self.instructions = []
        self.functions = []
        self.basic_blocks = []
    
    def disassemble(self, code_bytes, base_address=0x1000):
        """反汇编代码"""
        self.instructions = []
        
        for instruction in self.cs.disasm(code_bytes, base_address):
            inst_info = {
                'address': instruction.address,
                'mnemonic': instruction.mnemonic,
                'op_str': instruction.op_str,
                'bytes': instruction.bytes,
                'size': instruction.size,
                'groups': instruction.groups,
                'operands': []
            }
            
            # 分析操作数
            if instruction.operands:
                for op in instruction.operands:
                    operand = self.analyze_operand(op)
                    inst_info['operands'].append(operand)
            
            # 分析指令类型
            inst_info['type'] = self.classify_instruction(instruction)
            
            self.instructions.append(inst_info)
        
        return self.instructions
    
    def analyze_operand(self, operand):
        """分析操作数"""
        op_info = {
            'type': operand.type,
            'value': None,
            'register': None,
            'immediate': None,
            'memory': None
        }
        
        if operand.type == capstone.CS_OP_REG:
            op_info['register'] = operand.reg
            op_info['value'] = self.cs.reg_name(operand.reg)
        elif operand.type == capstone.CS_OP_IMM:
            op_info['immediate'] = operand.imm
            op_info['value'] = f"#0x{operand.imm:x}"
        elif operand.type == capstone.CS_OP_MEM:
            op_info['memory'] = {
                'base': operand.mem.base,
                'index': operand.mem.index,
                'disp': operand.mem.disp
            }
            # 构建内存操作数字符串
            mem_str = "["
            if operand.mem.base != 0:
                mem_str += self.cs.reg_name(operand.mem.base)
            if operand.mem.index != 0:
                mem_str += f", {self.cs.reg_name(operand.mem.index)}"
            if operand.mem.disp != 0:
                mem_str += f", #0x{operand.mem.disp:x}"
            mem_str += "]"
            op_info['value'] = mem_str
        
        return op_info
    
    def classify_instruction(self, instruction):
        """分类指令类型"""
        groups = instruction.groups
        
        if capstone.CS_GRP_JUMP in groups:
            return 'jump'
        elif capstone.CS_GRP_CALL in groups:
            return 'call'
        elif capstone.CS_GRP_RET in groups:
            return 'return'
        elif capstone.CS_GRP_BRANCH_RELATIVE in groups:
            return 'branch'
        elif instruction.mnemonic.startswith('ld') or instruction.mnemonic.startswith('st'):
            return 'memory'
        elif instruction.mnemonic in ['add', 'sub', 'mul', 'div', 'and', 'orr', 'eor']:
            return 'arithmetic'
        elif instruction.mnemonic in ['mov', 'mvn']:
            return 'data_transfer'
        elif instruction.mnemonic.startswith('cmp') or instruction.mnemonic.startswith('tst'):
            return 'comparison'
        else:
            return 'other'
    
    def identify_functions(self):
        """识别函数"""
        functions = []
        current_function = None
        
        for i, inst in enumerate(self.instructions):
            # 函数开始的标志
            if (inst['type'] == 'call' and i > 0 and 
                self.instructions[i-1]['type'] in ['jump', 'return']):
                # 新函数开始
                if current_function:
                    current_function['end_address'] = self.instructions[i-1]['address']
                    functions.append(current_function)
                
                current_function = {
                    'start_address': inst['address'],
                    'instructions': [],
                    'calls': [],
                    'returns': []
                }
            
            if current_function:
                current_function['instructions'].append(inst)
                
                if inst['type'] == 'call':
                    current_function['calls'].append(inst)
                elif inst['type'] == 'return':
                    current_function['returns'].append(inst)
        
        # 处理最后一个函数
        if current_function:
            current_function['end_address'] = self.instructions[-1]['address']
            functions.append(current_function)
        
        self.functions = functions
        return functions
    
    def analyze_control_flow(self):
        """分析控制流"""
        basic_blocks = []
        leaders = set([0])  # 第一条指令总是leader
        
        # 找到所有的leader
        for i, inst in enumerate(self.instructions):
            if inst['type'] in ['jump', 'branch', 'call']:
                # 分支指令的下一条指令是leader
                if i + 1 < len(self.instructions):
                    leaders.add(i + 1)
                
                # 分支目标也是leader(如果能确定的话)
                if inst['type'] in ['jump', 'branch'] and inst['operands']:
                    for op in inst['operands']:
                        if op['type'] == capstone.CS_OP_IMM:
                            target_addr = op['immediate']
                            # 找到对应的指令索引
                            for j, target_inst in enumerate(self.instructions):
                                if target_inst['address'] == target_addr:
                                    leaders.add(j)
                                    break
        
        # 创建基本块
        sorted_leaders = sorted(leaders)
        
        for i in range(len(sorted_leaders)):
            start_idx = sorted_leaders[i]
            end_idx = sorted_leaders[i + 1] - 1 if i + 1 < len(sorted_leaders) else len(self.instructions) - 1
            
            if start_idx < len(self.instructions):
                basic_block = {
                    'id': len(basic_blocks),
                    'start_address': self.instructions[start_idx]['address'],
                    'end_address': self.instructions[end_idx]['address'],
                    'instructions': self.instructions[start_idx:end_idx + 1],
                    'successors': [],
                    'predecessors': []
                }
                basic_blocks.append(basic_block)
        
        self.basic_blocks = basic_blocks
        return basic_blocks
    
    def generate_disassembly_report(self):
        """生成反汇编报告"""
        self.identify_functions()
        self.analyze_control_flow()
        
        report = f"# ARM Disassembly Report\n\n"
        report += f"**Architecture:** {self.arch}\n"
        report += f"**Mode:** {self.mode}\n"
        report += f"**Instructions:** {len(self.instructions)}\n"
        report += f"**Functions:** {len(self.functions)}\n"
        report += f"**Basic Blocks:** {len(self.basic_blocks)}\n\n"
        
        # 指令统计
        inst_types = {}
        for inst in self.instructions:
            inst_type = inst['type']
            inst_types[inst_type] = inst_types.get(inst_type, 0) + 1
        
        report += "## Instruction Statistics\n\n"
        for inst_type, count in sorted(inst_types.items(), key=lambda x: x[1], reverse=True):
            report += f"- **{inst_type}:** {count}\n"
        report += "\n"
        
        # 函数信息
        if self.functions:
            report += "## Functions\n\n"
            for i, func in enumerate(self.functions[:10]):  # 只显示前10个函数
                report += f"### Function {i + 1}\n"
                report += f"- **Start:** 0x{func['start_address']:x}\n"
                report += f"- **End:** 0x{func['end_address']:x}\n"
                report += f"- **Instructions:** {len(func['instructions'])}\n"
                report += f"- **Calls:** {len(func['calls'])}\n"
                report += f"- **Returns:** {len(func['returns'])}\n\n"
        
        # 反汇编代码(前50条指令)
        report += "## Disassembly (First 50 Instructions)\n\n"
        report += "```assembly\n"
        for inst in self.instructions[:50]:
            report += f"0x{inst['address']:08x}: {inst['mnemonic']} {inst['op_str']}\n"
        
        if len(self.instructions) > 50:
            report += f"... and {len(self.instructions) - 50} more instructions\n"
        
        report += "```\n"
        
        return report

# 使用示例
def disassemble_arm_code(code_bytes, arch='arm', mode='arm'):
    """反汇编ARM代码"""
    disasm = ARMDisassembler(arch, mode)
    instructions = disasm.disassemble(code_bytes)
    
    report = disasm.generate_disassembly_report()
    print(report)
    
    return disasm

# 测试ARM代码
arm_code = bytes([
    0x04, 0x00, 0x2d, 0xe9,  # push {r2}
    0x00, 0x00, 0x90, 0xe5,  # ldr r0, [r0]
    0x01, 0x10, 0x81, 0xe2,  # add r1, r1, #1
    0x00, 0x00, 0x51, 0xe3,  # cmp r1, #0
    0x02, 0x00, 0x00, 0x1a,  # bne 0x1014
    0x04, 0x00, 0xbd, 0xe8,  # pop {r2}
    0x1e, 0xff, 0x2f, 0xe1   # bx lr
])

disasm = disassemble_arm_code(arm_code)

7.2.2 函数调用约定分析

class ARMCallingConventionAnalyzer:
    """ARM调用约定分析器"""
    
    def __init__(self, disassembler):
        self.disasm = disassembler
        self.calling_conventions = {
            'AAPCS': {
                'param_registers': ['r0', 'r1', 'r2', 'r3'],
                'return_register': 'r0',
                'callee_saved': ['r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10', 'r11'],
                'caller_saved': ['r0', 'r1', 'r2', 'r3', 'r12'],
                'stack_pointer': 'sp',
                'frame_pointer': 'r11',
                'link_register': 'lr'
            },
            'AAPCS64': {
                'param_registers': ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'],
                'return_register': 'x0',
                'callee_saved': ['x19', 'x20', 'x21', 'x22', 'x23', 'x24', 'x25', 'x26', 'x27', 'x28'],
                'caller_saved': ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10', 'x11', 'x12', 'x13', 'x14', 'x15', 'x16', 'x17', 'x18'],
                'stack_pointer': 'sp',
                'frame_pointer': 'x29',
                'link_register': 'x30'
            }
        }
        
        self.current_convention = 'AAPCS' if disassembler.arch == 'arm' else 'AAPCS64'
    
    def analyze_function_prologue(self, function):
        """分析函数序言"""
        prologue_info = {
            'stack_allocation': 0,
            'saved_registers': [],
            'frame_setup': False,
            'instructions': []
        }
        
        convention = self.calling_conventions[self.current_convention]
        
        # 分析前几条指令
        for inst in function['instructions'][:10]:
            prologue_info['instructions'].append(inst)
            
            # 检查栈分配
            if (inst['mnemonic'] == 'sub' and 
                len(inst['operands']) >= 3 and
                inst['operands'][0]['value'] == convention['stack_pointer']):
                # sub sp, sp, #imm
                if inst['operands'][2]['type'] == capstone.CS_OP_IMM:
                    prologue_info['stack_allocation'] = inst['operands'][2]['immediate']
            
            # 检查寄存器保存
            elif inst['mnemonic'] in ['push', 'str', 'stp']:
                # 解析保存的寄存器
                for op in inst['operands']:
                    if op['type'] == capstone.CS_OP_REG:
                        reg_name = op['value']
                        if reg_name in convention['callee_saved']:
                            prologue_info['saved_registers'].append(reg_name)
            
            # 检查帧指针设置
            elif (inst['mnemonic'] == 'mov' and
                  len(inst['operands']) >= 2 and
                  inst['operands'][0]['value'] == convention['frame_pointer'] and
                  inst['operands'][1]['value'] == convention['stack_pointer']):
                prologue_info['frame_setup'] = True
        
        return prologue_info
    
    def analyze_function_epilogue(self, function):
        """分析函数尾声"""
        epilogue_info = {
            'stack_deallocation': 0,
            'restored_registers': [],
            'return_instruction': None,
            'instructions': []
        }
        
        convention = self.calling_conventions[self.current_convention]
        
        # 分析最后几条指令
        for inst in function['instructions'][-10:]:
            epilogue_info['instructions'].append(inst)
            
            # 检查栈释放
            if (inst['mnemonic'] == 'add' and 
                len(inst['operands']) >= 3 and
                inst['operands'][0]['value'] == convention['stack_pointer']):
                if inst['operands'][2]['type'] == capstone.CS_OP_IMM:
                    epilogue_info['stack_deallocation'] = inst['operands'][2]['immediate']
            
            # 检查寄存器恢复
            elif inst['mnemonic'] in ['pop', 'ldr', 'ldp']:
                for op in inst['operands']:
                    if op['type'] == capstone.CS_OP_REG:
                        reg_name = op['value']
                        if reg_name in convention['callee_saved']:
                            epilogue_info['restored_registers'].append(reg_name)
            
            # 检查返回指令
            elif inst['type'] == 'return':
                epilogue_info['return_instruction'] = inst
        
        return epilogue_info
    
    def analyze_function_calls(self, function):
        """分析函数调用"""
        calls_info = []
        
        convention = self.calling_conventions[self.current_convention]
        
        for i, inst in enumerate(function['instructions']):
            if inst['type'] == 'call':
                call_info = {
                    'instruction': inst,
                    'target': None,
                    'arguments': [],
                    'preparation_instructions': []
                }
                
                # 提取调用目标
                if inst['operands']:
                    for op in inst['operands']:
                        if op['type'] == capstone.CS_OP_IMM:
                            call_info['target'] = f"0x{op['immediate']:x}"
                        elif op['type'] == capstone.CS_OP_REG:
                            call_info['target'] = op['value']
                
                # 分析参数准备(向前查找几条指令)
                start_idx = max(0, i - 10)
                for j in range(start_idx, i):
                    prep_inst = function['instructions'][j]
                    
                    # 检查是否设置参数寄存器
                    if (prep_inst['mnemonic'] in ['mov', 'ldr', 'add'] and
                        prep_inst['operands'] and
                        prep_inst['operands'][0]['value'] in convention['param_registers']):
                        
                        arg_info = {
                            'register': prep_inst['operands'][0]['value'],
                            'instruction': prep_inst
                        }
                        call_info['arguments'].append(arg_info)
                        call_info['preparation_instructions'].append(prep_inst)
                
                calls_info.append(call_info)
        
        return calls_info
    
    def generate_calling_convention_report(self):
        """生成调用约定分析报告"""
        if not self.disasm.functions:
            self.disasm.identify_functions()
        
        report = f"# ARM Calling Convention Analysis\n\n"
        report += f"**Convention:** {self.current_convention}\n\n"
        
        convention = self.calling_conventions[self.current_convention]
        
        report += "## Calling Convention Details\n\n"
        report += f"- **Parameter Registers:** {', '.join(convention['param_registers'])}\n"
        report += f"- **Return Register:** {convention['return_register']}\n"
        report += f"- **Callee Saved:** {', '.join(convention['callee_saved'])}\n"
        report += f"- **Stack Pointer:** {convention['stack_pointer']}\n"
        report += f"- **Frame Pointer:** {convention['frame_pointer']}\n"
        report += f"- **Link Register:** {convention['link_register']}\n\n"
        
        # 分析每个函数
        for i, function in enumerate(self.disasm.functions[:5]):  # 只分析前5个函数
            report += f"## Function {i + 1} Analysis\n\n"
            report += f"**Address:** 0x{function['start_address']:x} - 0x{function['end_address']:x}\n\n"
            
            # 序言分析
            prologue = self.analyze_function_prologue(function)
            report += "### Prologue\n"
            report += f"- **Stack Allocation:** {prologue['stack_allocation']} bytes\n"
            report += f"- **Saved Registers:** {', '.join(prologue['saved_registers'])}\n"
            report += f"- **Frame Setup:** {'Yes' if prologue['frame_setup'] else 'No'}\n\n"
            
            # 尾声分析
            epilogue = self.analyze_function_epilogue(function)
            report += "### Epilogue\n"
            report += f"- **Stack Deallocation:** {epilogue['stack_deallocation']} bytes\n"
            report += f"- **Restored Registers:** {', '.join(epilogue['restored_registers'])}\n"
            report += f"- **Return Instruction:** {epilogue['return_instruction']['mnemonic'] if epilogue['return_instruction'] else 'None'}\n\n"
            
            # 函数调用分析
            calls = self.analyze_function_calls(function)
            if calls:
                report += "### Function Calls\n"
                for j, call in enumerate(calls):
                    report += f"#### Call {j + 1}\n"
                    report += f"- **Target:** {call['target']}\n"
                    report += f"- **Arguments:** {len(call['arguments'])}\n"
                    for arg in call['arguments']:
                        report += f"  - {arg['register']}: {arg['instruction']['mnemonic']} {arg['instruction']['op_str']}\n"
                    report += "\n"
        
        return report

# 使用示例
def analyze_calling_convention(so_path):
    """分析SO文件的调用约定"""
    # 首先分析ELF文件获取代码段
    with open(so_path, 'rb') as f:
        elf_data = f.read()
    
    elf_analyzer = ELFAnalyzer(elf_data)
    elf_analyzer.parse_header()
    elf_analyzer.parse_section_headers()
    
    # 找到.text段
    text_section = None
    for section in elf_analyzer.sections:
        if section['name'] == '.text':
            text_section = section
            break
    
    if not text_section:
        print("No .text section found")
        return
    
    # 提取代码
    code_offset = text_section['sh_offset']
    code_size = text_section['sh_size']
    code_bytes = elf_data[code_offset:code_offset + code_size]
    
    # 反汇编
    arch = 'arm64' if elf_analyzer.header['e_machine'] == 183 else 'arm'
    disasm = ARMDisassembler(arch)
    disasm.disassemble(code_bytes, text_section['sh_addr'])
    
    # 分析调用约定
    cc_analyzer = ARMCallingConventionAnalyzer(disasm)
    report = cc_analyzer.generate_calling_convention_report()
    
    print(report)
    return cc_analyzer

7.3 Native代码逆向技术

7.3.1 静态分析技术

class NativeStaticAnalyzer:
    """Native代码静态分析器"""
    
    def __init__(self, elf_analyzer, disassembler):
        self.elf = elf_analyzer
        self.disasm = disassembler
        self.cross_references = {}
        self.string_references = {}
        self.function_graph = {}
        self.data_structures = []
    
    def build_cross_references(self):
        """构建交叉引用"""
        # 分析指令间的引用关系
        for inst in self.disasm.instructions:
            if inst['type'] in ['call', 'jump', 'branch']:
                # 分析跳转目标
                for op in inst['operands']:
                    if op['type'] == capstone.CS_OP_IMM:
                        target_addr = op['immediate']
                        
                        if target_addr not in self.cross_references:
                            self.cross_references[target_addr] = []
                        
                        self.cross_references[target_addr].append({
                            'from': inst['address'],
                            'type': inst['type'],
                            'instruction': inst
                        })
        
        return self.cross_references
    
    def analyze_string_references(self):
        """分析字符串引用"""
        # 查找字符串常量段
        string_sections = []
        for section in self.elf.sections:
            if section['name'] in ['.rodata', '.data', '.rodata.str1.1']:
                string_sections.append(section)
        
        # 提取字符串
        strings = {}
        for section in string_sections:
            section_strings = self.extract_strings_from_section(section)
            strings.update(section_strings)
        
        # 分析字符串引用
        for inst in self.disasm.instructions:
            if inst['mnemonic'] in ['ldr', 'adr', 'mov']:
                for op in inst['operands']:
                    if op['type'] == capstone.CS_OP_IMM:
                        addr = op['immediate']
                        if addr in strings:
                            if addr not in self.string_references:
                                self.string_references[addr] = []
                            
                            self.string_references[addr].append({
                                'from': inst['address'],
                                'instruction': inst,
                                'string': strings[addr]
                            })
        
        return self.string_references
    
    def extract_strings_from_section(self, section):
        """从段中提取字符串"""
        strings = {}
        
        if section['sh_size'] == 0:
            return strings
        
        offset = section['sh_offset']
        size = section['sh_size']
        data = self.elf.data[offset:offset + size]
        
        current_offset = 0
        while current_offset < len(data):
            # 查找可打印字符串
            start = current_offset
            while (current_offset < len(data) and 
                   32 <= data[current_offset] <= 126):  # 可打印ASCII字符
                current_offset += 1
            
            # 如果找到足够长的字符串
            if current_offset - start >= 4:
                try:
                    string_value = data[start:current_offset].decode('ascii')
                    string_addr = section['sh_addr'] + start
                    strings[string_addr] = string_value
                except UnicodeDecodeError:
                    pass
            
            current_offset += 1
        
        return strings
    
    def build_function_call_graph(self):
        """构建函数调用图"""
        if not self.disasm.functions:
            self.disasm.identify_functions()
        
        for function in self.disasm.functions:
            func_addr = function['start_address']
            self.function_graph[func_addr] = {
                'calls': [],
                'called_by': []
            }
            
            # 分析函数内的调用
            for inst in function['instructions']:
                if inst['type'] == 'call':
                    for op in inst['operands']:
                        if op['type'] == capstone.CS_OP_IMM:
                            target_addr = op['immediate']
                            self.function_graph[func_addr]['calls'].append(target_addr)
                            
                            # 更新被调用函数的信息
                            if target_addr not in self.function_graph:
                                self.function_graph[target_addr] = {
                                    'calls': [],
                                    'called_by': []
                                }
                            self.function_graph[target_addr]['called_by'].append(func_addr)
        
        return self.function_graph
    
    def analyze_data_structures(self):
        """分析数据结构"""
        data_structures = []
        
        # 分析内存访问模式
        memory_accesses = {}
        
        for inst in self.disasm.instructions:
            if inst['type'] == 'memory':
                for op in inst['operands']:
                    if op['type'] == capstone.CS_OP_MEM and op['memory']:
                        mem = op['memory']
                        
                        # 分析基址+偏移的访问模式
                        if mem['base'] != 0 and mem['disp'] != 0:
                            base_reg = self.disasm.cs.reg_name(mem['base'])
                            offset = mem['disp']
                            
                            key = f"{base_reg}+{offset}"
                            if key not in memory_accesses:
                                memory_accesses[key] = []
                            
                            memory_accesses[key].append({
                                'instruction': inst,
                                'access_type': 'read' if inst['mnemonic'].startswith('ld') else 'write'
                            })
        
        # 推断数据结构
        for access_pattern, accesses in memory_accesses.items():
            if len(accesses) >= 2:  # 至少有2次访问才考虑为数据结构
                structure = {
                    'pattern': access_pattern,
                    'accesses': accesses,
                    'size_hint': max([acc['instruction']['operands'][0].get('size', 4) for acc in accesses if acc['instruction']['operands']]),
                    'type_hint': self.infer_data_type(accesses)
                }
                data_structures.append(structure)
        
        self.data_structures = data_structures
        return data_structures
    
    def infer_data_type(self, accesses):
        """推断数据类型"""
        # 简单的类型推断逻辑
        read_count = sum(1 for acc in accesses if acc['access_type'] == 'read')
        write_count = sum(1 for acc in accesses if acc['access_type'] == 'write')
        
        if read_count > write_count * 2:
            return 'const_data'
        elif write_count > read_count:
            return 'mutable_data'
        else:
            return 'mixed_data'
    
    def detect_crypto_patterns(self):
        """检测加密算法模式"""
        crypto_patterns = []
        
        # 常见加密算法的特征指令序列
        aes_patterns = [
            ['aese', 'aesmc'],  # AES加密轮
            ['aesd', 'aesimc'], # AES解密轮
        ]
        
        sha_patterns = [
            ['sha1h', 'sha1c'],  # SHA-1
            ['sha256h', 'sha256h2'],  # SHA-256
        ]
        
        # 检查指令序列
        for i in range(len(self.disasm.instructions) - 1):
            inst1 = self.disasm.instructions[i]
            inst2 = self.disasm.instructions[i + 1]
            
            # 检查AES模式
            for pattern in aes_patterns:
                if (inst1['mnemonic'] == pattern[0] and 
                    inst2['mnemonic'] == pattern[1]):
                    crypto_patterns.append({
                        'type': 'AES',
                        'address': inst1['address'],
                        'pattern': pattern,
                        'instructions': [inst1, inst2]
                    })
            
            # 检查SHA模式
            for pattern in sha_patterns:
                if (inst1['mnemonic'] == pattern[0] and 
                    inst2['mnemonic'] == pattern[1]):
                    crypto_patterns.append({
                        'type': 'SHA',
                        'address': inst1['address'],
                        'pattern': pattern,
                        'instructions': [inst1, inst2]
                    })
        
        return crypto_patterns
    
    def generate_static_analysis_report(self):
        """生成静态分析报告"""
        self.build_cross_references()
        self.analyze_string_references()
        self.build_function_call_graph()
        self.analyze_data_structures()
        crypto_patterns = self.detect_crypto_patterns()
        
        report = "# Native Static Analysis Report\n\n"
        
        # 交叉引用统计
        report += "## Cross References\n\n"
        report += f"Total cross references: {len(self.cross_references)}\n\n"
        
        if self.cross_references:
            report += "Top referenced addresses:\n"
            sorted_refs = sorted(self.cross_references.items(), 
                               key=lambda x: len(x[1]), reverse=True)
            
            for addr, refs in sorted_refs[:10]:
                report += f"- **0x{addr:x}:** {len(refs)} references\n"
            report += "\n"
        
        # 字符串引用
        report += "## String References\n\n"
        report += f"Total string references: {len(self.string_references)}\n\n"
        
        if self.string_references:
            report += "Strings found:\n"
            for addr, refs in list(self.string_references.items())[:10]:
                string_value = refs[0]['string']
                report += f"- **0x{addr:x}:** \"{string_value}\" ({len(refs)} references)\n"
            report += "\n"
        
        # 函数调用图
        report += "## Function Call Graph\n\n"
        report += f"Total functions: {len(self.function_graph)}\n\n"
        
        if self.function_graph:
            # 找出调用最多的函数
            most_called = sorted(self.function_graph.items(), 
                               key=lambda x: len(x[1]['called_by']), reverse=True)
            
            report += "Most called functions:\n"
            for addr, info in most_called[:5]:
                report += f"- **0x{addr:x}:** called by {len(info['called_by'])} functions\n"
            report += "\n"
        
        # 数据结构分析
        report += "## Data Structures\n\n"
        report += f"Potential data structures: {len(self.data_structures)}\n\n"
        
        for i, struct in enumerate(self.data_structures[:5]):
            report += f"### Structure {i + 1}\n"
            report += f"- **Pattern:** {struct['pattern']}\n"
            report += f"- **Accesses:** {len(struct['accesses'])}\n"
            report += f"- **Type Hint:** {struct['type_hint']}\n\n"
        
        # 加密算法检测
        report += "## Cryptographic Patterns\n\n"
        if crypto_patterns:
            report += f"Found {len(crypto_patterns)} cryptographic patterns:\n\n"
            for pattern in crypto_patterns:
                report += f"- **{pattern['type']}** at 0x{pattern['address']:x}\n"
        else:
            report += "No cryptographic patterns detected.\n"
        
        return report

# 使用示例
def perform_native_static_analysis(so_path):
    """执行Native静态分析"""
    # 分析ELF文件
    with open(so_path, 'rb') as f:
        elf_data = f.read()
    
    elf_analyzer = ELFAnalyzer(elf_data)
    summary = elf_analyzer.get_analysis_summary()
    
    # 提取代码段进行反汇编
    text_section = None
    for section in elf_analyzer.sections:
        if section['name'] == '.text':
            text_section = section
            break
    
    if not text_section:
        print("No .text section found")
        return
    
    code_offset = text_section['sh_offset']
    code_size = min(text_section['sh_size'], 10000)  # 限制分析大小
    code_bytes = elf_data[code_offset:code_offset + code_size]
    
    # 反汇编
    arch = 'arm64' if elf_analyzer.header['e_machine'] == 183 else 'arm'
    disasm = ARMDisassembler(arch)
    disasm.disassemble(code_bytes, text_section['sh_addr'])
    
    # 静态分析
    static_analyzer = NativeStaticAnalyzer(elf_analyzer, disasm)
    report = static_analyzer.generate_static_analysis_report()
    
    print("=== ELF Summary ===")
    print(f"Architecture: {summary['file_info']['machine']}")
    print(f"Entry Point: {summary['file_info']['entry_point']}")
    print(f"Exported Functions: {len(summary['exported_functions'])}")
    
    print("\n=== Static Analysis Report ===")
    print(report)
    
    return static_analyzer

7.3.2 动态分析准备

class NativeDynamicAnalysisPrep:
    """Native动态分析准备"""
    
    def __init__(self, static_analyzer):
        self.static = static_analyzer
        self.hook_points = []
        self.trace_points = []
        self.breakpoints = []
    
    def identify_hook_points(self):
        """识别Hook点"""
        hook_points = []
        
        # 1. 导出函数作为Hook点
        for symbol in self.static.elf.symbols:
            if (symbol['bind'] == 1 and  # STB_GLOBAL
                symbol['type'] == 2 and  # STT_FUNC
                symbol['st_value'] != 0):
                
                hook_points.append({
                    'type': 'exported_function',
                    'name': symbol['name'],
                    'address': f"0x{symbol['st_value']:x}",
                    'size': symbol['st_size'],
                    'reason': 'Exported function - likely API entry point'
                })
        
        # 2. 字符串引用点作为Hook点
        for addr, refs in self.static.string_references.items():
            string_value = refs[0]['string']
            
            # 关注敏感字符串
            sensitive_keywords = [
                'password', 'key', 'token', 'secret', 'auth',
                'encrypt', 'decrypt', 'hash', 'sign', 'verify',
                'http', 'https', 'url', 'api', 'server',
                'file', 'read', 'write', 'open', 'close'
            ]
            
            if any(keyword in string_value.lower() for keyword in sensitive_keywords):
                for ref in refs:
                    hook_points.append({
                        'type': 'string_reference',
                        'name': f"string_ref_{addr:x}",
                        'address': f"0x{ref['from']:x}",
                        'string': string_value,
                        'reason': f'References sensitive string: "{string_value}"'
                    })
        
        # 3. 系统调用点
        for inst in self.static.disasm.instructions:
            if inst['mnemonic'] in ['svc', 'swi']:  # 系统调用指令
                hook_points.append({
                    'type': 'syscall',
                    'name': f"syscall_{inst['address']:x}",
                    'address': f"0x{inst['address']:x}",
                    'instruction': f"{inst['mnemonic']} {inst['op_str']}",
                    'reason': 'System call - potential security-relevant operation'
                })
        
        # 4. 加密算法相关
        crypto_patterns = self.static.detect_crypto_patterns()
        for pattern in crypto_patterns:
            hook_points.append({
                'type': 'crypto_operation',
                'name': f"{pattern['type'].lower()}_{pattern['address']:x}",
                'address': f"0x{pattern['address']:x}",
                'crypto_type': pattern['type'],
                'reason': f'{pattern["type"]} cryptographic operation detected'
            })
        
        self.hook_points = hook_points
        return hook_points
    
    def generate_frida_script(self):
        """生成Frida Hook脚本"""
        if not self.hook_points:
            self.identify_hook_points()
        
        script = """
// Auto-generated Frida script for Native analysis
console.log("[+] Starting Native analysis...");

// Get module base address
var module_base = Module.findBaseAddress("TARGET_MODULE_NAME");
if (!module_base) {
    console.log("[-] Target module not found");
    Java.perform(function() {
        // Module might be loaded later, try again
        setTimeout(function() {
            module_base = Module.findBaseAddress("TARGET_MODULE_NAME");
            if (module_base) {
                console.log("[+] Module found at: " + module_base);
                setupHooks();
            }
        }, 1000);
    });
} else {
    console.log("[+] Module base address: " + module_base);
    setupHooks();
}

function setupHooks() {
"""
        
        # 添加Hook点
        for hook in self.hook_points:
            if hook['type'] == 'exported_function':
                script += f"""
    // Hook exported function: {hook['name']}
    try {{
        var func_addr = module_base.add({hook['address']});
        Interceptor.attach(func_addr, {{
            onEnter: function(args) {{
                console.log("[+] Entering {hook['name']} at " + func_addr);
                console.log("    Reason: {hook['reason']}");
                
                // Log arguments (adjust based on calling convention)
                for (var i = 0; i < 4; i++) {{
                    console.log("    arg" + i + ": " + args[i]);
                }}
                
                // Save context for onLeave
                this.start_time = Date.now();
            }},
            onLeave: function(retval) {{
                var duration = Date.now() - this.start_time;
                console.log("[+] Leaving {hook['name']}");
                console.log("    Return value: " + retval);
                console.log("    Duration: " + duration + "ms");
            }}
        }});
        console.log("[+] Hooked {hook['name']} at " + func_addr);
    }} catch (e) {{
        console.log("[-] Failed to hook {hook['name']}: " + e);
    }}
"""
            
            elif hook['type'] == 'string_reference':
                script += f"""
    // Hook string reference: {hook['string'][:50]}...
    try {{
        var ref_addr = module_base.add({hook['address']});
        Interceptor.attach(ref_addr, {{
            onEnter: function(args) {{
                console.log("[+] String reference at " + ref_addr);
                console.log("    String: {hook['string']}");
                console.log("    Reason: {hook['reason']}");
                
                // Dump memory around the reference
                console.log("    Memory dump:");
                console.log(hexdump(ref_addr, {{length: 64}}));
            }}
        }});
        console.log("[+] Hooked string reference at " + ref_addr);
    }} catch (e) {{
        console.log("[-] Failed to hook string reference: " + e);
    }}
"""
            
            elif hook['type'] == 'crypto_operation':
                script += f"""
    // Hook crypto operation: {hook['crypto_type']}
    try {{
        var crypto_addr = module_base.add({hook['address']});
        Interceptor.attach(crypto_addr, {{
            onEnter: function(args) {{
                console.log("[+] Crypto operation ({hook['crypto_type']}) at " + crypto_addr);
                console.log("    Reason: {hook['reason']}");
                
                // Log crypto-specific information
                console.log("    Registers:");
                console.log("    R0: " + this.context.r0);
                console.log("    R1: " + this.context.r1);
                console.log("    R2: " + this.context.r2);
                console.log("    R3: " + this.context.r3);
                
                // Dump potential key/data buffers
                if (this.context.r0.toInt32() > 0x1000) {{
                    console.log("    Buffer at R0:");
                    console.log(hexdump(this.context.r0, {{length: 32}}));
                }}
            }}
        }});
        console.log("[+] Hooked crypto operation at " + crypto_addr);
    }} catch (e) {{
        console.log("[-] Failed to hook crypto operation: " + e);
    }}
"""
        
        script += """
}

// Memory monitoring functions
function monitorMemoryWrites(start_addr, size) {
    Memory.protect(start_addr, size, 'r--');
    
    Process.setExceptionHandler(function(details) {
        if (details.type === 'access-violation') {
            console.log("[+] Memory write detected at: " + details.address);
            console.log("    From: " + details.context.pc);
            console.log("    Data: " + hexdump(details.address, {length: 16}));
            
            // Restore write permission temporarily
            Memory.protect(start_addr, size, 'rw-');
            return true;
        }
        return false;
    });
}

// Utility functions
function dumpRegisters(context) {
    console.log("Registers:");
    console.log("  R0: " + context.r0);
    console.log("  R1: " + context.r1);
    console.log("  R2: " + context.r2);
    console.log("  R3: " + context.r3);
    console.log("  SP: " + context.sp);
    console.log("  LR: " + context.lr);
    console.log("  PC: " + context.pc);
}

console.log("[+] Frida script loaded successfully");
"""
        
        return script
    
    def generate_gdb_script(self):
        """生成GDB调试脚本"""
        if not self.hook_points:
            self.identify_hook_points()
        
        script = """# Auto-generated GDB script for Native analysis
set confirm off
set pagination off

# Connect to target
target remote :1234

# Set breakpoints
"""
        
        for hook in self.hook_points:
            if hook['type'] == 'exported_function':
                script += f"""
# Breakpoint for {hook['name']}
break *{hook['address']}
commands
    echo [+] Hit breakpoint at {hook['name']} ({hook['address']})\\n
    echo Reason: {hook['reason']}\\n
    info registers
    x/10i $pc
    continue
end
"""
        
        script += """
# Custom commands
define dump_context
    echo === Register Context ===\\n
    info registers
    echo === Stack Dump ===\\n
    x/20xw $sp
    echo === Code Context ===\\n
    x/10i $pc
end

define trace_calls
    set logging file trace.log
    set logging on
    set trace-commands on
    continue
end

# Start execution
echo [+] GDB script loaded, starting execution...\\n
continue
"""
        
        return script
    
    def generate_analysis_plan(self):
        """生成分析计划"""
        plan = {
            'static_analysis_complete': True,
            'hook_points_identified': len(self.hook_points),
            'recommended_tools': [],
            'analysis_steps': [],
            'expected_findings': []
        }
        
        # 推荐工具
        if any(h['type'] == 'crypto_operation' for h in self.hook_points):
            plan['recommended_tools'].append('Frida (for crypto analysis)')
            plan['expected_findings'].append('Cryptographic operations and key material')
        
        if any(h['type'] == 'string_reference' for h in self.hook_points):
            plan['recommended_tools'].append('String tracing tools')
            plan['expected_findings'].append('Sensitive string usage patterns')
        
        if any(h['type'] == 'exported_function' for h in self.hook_points):
            plan['recommended_tools'].append('API monitoring tools')
            plan['expected_findings'].append('API call patterns and parameters')
        
        # 分析步骤
        plan['analysis_steps'] = [
            "1. Load target application/library",
            "2. Attach Frida and load generated script",
            "3. Exercise application functionality",
            "4. Monitor hook point activations",
            "5. Analyze captured data and control flow",
            "6. Identify security-relevant behaviors",
            "7. Generate detailed analysis report"
        ]
        
        return plan
    
    def generate_prep_report(self):
        """生成准备报告"""
        self.identify_hook_points()
        plan = self.generate_analysis_plan()
        
        report = "# Native Dynamic Analysis Preparation Report\n\n"
        
        # Hook点统计
        hook_types = {}
        for hook in self.hook_points:
            hook_type = hook['type']
            hook_types[hook_type] = hook_types.get(hook_type, 0) + 1
        
        report += "## Hook Points Summary\n\n"
        report += f"Total hook points identified: {len(self.hook_points)}\n\n"
        
        for hook_type, count in hook_types.items():
            report += f"- **{hook_type}:** {count}\n"
        report += "\n"
        
        # 详细Hook点列表
        report += "## Detailed Hook Points\n\n"
        
        for hook_type in hook_types.keys():
            type_hooks = [h for h in self.hook_points if h['type'] == hook_type]
            report += f"### {hook_type.replace('_', ' ').title()}\n\n"
            
            for hook in type_hooks[:10]:  # 只显示前10个
                report += f"- **{hook['name']}** at {hook['address']}\n"
                report += f"  - Reason: {hook['reason']}\n"
                if 'string' in hook:
                    report += f"  - String: \"{hook['string']}\"\n"
                report += "\n"
            
            if len(type_hooks) > 10:
                report += f"... and {len(type_hooks) - 10} more {hook_type} hooks\n\n"
        
        # 分析计划
        report += "## Analysis Plan\n\n"
        report += f"**Recommended Tools:**\n"
        for tool in plan['recommended_tools']:
            report += f"- {tool}\n"
        report += "\n"
        
        report += f"**Analysis Steps:**\n"
        for step in plan['analysis_steps']:
            report += f"{step}\n"
        report += "\n"
        
        report += f"**Expected Findings:**\n"
        for finding in plan['expected_findings']:
            report += f"- {finding}\n"
        
        return report

# 使用示例
def prepare_native_dynamic_analysis(so_path):
    """准备Native动态分析"""
    # 执行静态分析
    static_analyzer = perform_native_static_analysis(so_path)
    
    # 准备动态分析
    prep = NativeDynamicAnalysisPrep(static_analyzer)
    report = prep.generate_prep_report()
    
    print("\n=== Dynamic Analysis Preparation ===")
    print(report)
    
    # 生成脚本
    frida_script = prep.generate_frida_script()
    gdb_script = prep.generate_gdb_script()
    
    # 保存脚本到文件
    with open("native_analysis.js", "w") as f:
        f.write(frida_script)
    
    with open("native_analysis.gdb", "w") as f:
        f.write(gdb_script)
    
    print("\n[+] Generated analysis scripts:")
    print("  - native_analysis.js (Frida script)")
    print("  - native_analysis.gdb (GDB script)")
    
    return prep

7.4 本章小结

本章深入介绍了Native层分析技术:

  1. ELF文件格式分析:理解ELF文件结构、节头表、符号表、动态链接等
  2. ARM汇编代码分析:掌握ARM指令集、反汇编技术、函数调用约定
  3. Native代码逆向技术:学习静态分析、交叉引用、数据结构分析等
  4. 动态分析准备:识别Hook点、生成分析脚本、制定分析计划

Native层分析是安卓逆向工程的高级技能,需要深入理解底层系统架构和汇编语言。通过这些技术可以分析加密算法、发现隐藏功能、理解性能关键代码。

学习检查清单

  • 理解ELF文件格式和结构
  • 掌握ARM汇编指令集基础
  • 熟悉ARM调用约定
  • 能够进行静态代码分析
  • 了解交叉引用和数据流分析
  • 掌握加密算法模式识别
  • 能够准备动态分析环境
  • 会生成Frida和GDB分析脚本

下一章预告
在下一章中,我们将学习高级静态分析技术,包括代码混淆对抗、自动化分析工具开发、大规模代码分析等高级主题。

评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

THMAIL

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值