0基础安卓逆向原理与实践：第6章：Java层静态分析-优快云博客

第6章：Java层静态分析

6.1 Java字节码基础

6.1.1 Java字节码概述

Java字节码是Java源代码编译后的中间表示形式，运行在Java虚拟机（JVM）上。理解字节码结构对于逆向分析至关重要。

字节码文件结构：

// ClassFile结构（JVM规范）
ClassFile {
    u4             magic;                    // 魔数 0xCAFEBABE
    u2             minor_version;            // 次版本号
    u2             major_version;            // 主版本号
    u2             constant_pool_count;      // 常量池计数
    cp_info        constant_pool[constant_pool_count-1];  // 常量池
    u2             access_flags;             // 访问标志
    u2             this_class;               // 当前类索引
    u2             super_class;              // 父类索引
    u2             interfaces_count;         // 接口计数
    u2             interfaces[interfaces_count];  // 接口索引表
    u2             fields_count;             // 字段计数
    field_info     fields[fields_count];     // 字段表
    u2             methods_count;            // 方法计数
    method_info    methods[methods_count];   // 方法表
    u2             attributes_count;         // 属性计数
    attribute_info attributes[attributes_count];  // 属性表
}

6.1.2 字节码指令集

常用字节码指令分类：

class BytecodeInstructions:
    """字节码指令分类"""
    
    # 加载和存储指令
    LOAD_STORE = {
        'aload': '从局部变量表加载引用类型值到栈顶',
        'iload': '从局部变量表加载int类型值到栈顶',
        'lload': '从局部变量表加载long类型值到栈顶',
        'fload': '从局部变量表加载float类型值到栈顶',
        'dload': '从局部变量表加载double类型值到栈顶',
        'astore': '将栈顶引用类型值存入局部变量表',
        'istore': '将栈顶int类型值存入局部变量表',
        'lstore': '将栈顶long类型值存入局部变量表',
        'fstore': '将栈顶float类型值存入局部变量表',
        'dstore': '将栈顶double类型值存入局部变量表'
    }
    
    # 运算指令
    ARITHMETIC = {
        'iadd': '执行int类型加法',
        'isub': '执行int类型减法',
        'imul': '执行int类型乘法',
        'idiv': '执行int类型除法',
        'irem': '执行int类型取余',
        'ineg': '执行int类型取负',
        'ishl': '执行int类型左移',
        'ishr': '执行int类型右移',
        'iushr': '执行int类型无符号右移',
        'iand': '执行int类型按位与',
        'ior': '执行int类型按位或',
        'ixor': '执行int类型按位异或'
    }
    
    # 类型转换指令
    TYPE_CONVERSION = {
        'i2l': 'int转long',
        'i2f': 'int转float',
        'i2d': 'int转double',
        'l2i': 'long转int',
        'f2i': 'float转int',
        'd2i': 'double转int',
        'i2b': 'int转byte',
        'i2c': 'int转char',
        'i2s': 'int转short'
    }
    
    # 对象创建与访问指令
    OBJECT_MANIPULATION = {
        'new': '创建对象实例',
        'newarray': '创建基本类型数组',
        'anewarray': '创建引用类型数组',
        'arraylength': '获取数组长度',
        'getfield': '获取对象字段值',
        'putfield': '设置对象字段值',
        'getstatic': '获取静态字段值',
        'putstatic': '设置静态字段值',
        'baload': '从byte数组加载值',
        'bastore': '向byte数组存储值'
    }
    
    # 操作数栈管理指令
    STACK_MANAGEMENT = {
        'pop': '弹出栈顶一个字长的数据',
        'pop2': '弹出栈顶两个字长的数据',
        'dup': '复制栈顶一个字长数据并压入栈顶',
        'dup2': '复制栈顶两个字长数据并压入栈顶',
        'swap': '交换栈顶两个字长数据'
    }
    
    # 控制转移指令
    CONTROL_TRANSFER = {
        'ifeq': '当栈顶int类型数值等于0时跳转',
        'ifne': '当栈顶int类型数值不等于0时跳转',
        'iflt': '当栈顶int类型数值小于0时跳转',
        'ifge': '当栈顶int类型数值大于等于0时跳转',
        'ifgt': '当栈顶int类型数值大于0时跳转',
        'ifle': '当栈顶int类型数值小于等于0时跳转',
        'if_icmpeq': '比较栈顶两int类型数值，相等时跳转',
        'if_icmpne': '比较栈顶两int类型数值，不相等时跳转',
        'goto': '无条件跳转',
        'jsr': '跳转至指定16位offset位置，并将jsr下一条指令地址压入栈顶',
        'ret': '返回至本地变量指定的index的指令位置'
    }
    
    # 方法调用和返回指令
    METHOD_INVOCATION = {
        'invokevirtual': '调用实例方法',
        'invokespecial': '调用超类构造方法、实例初始化方法、私有方法',
        'invokestatic': '调用静态方法',
        'invokeinterface': '调用接口方法',
        'invokedynamic': '调用动态方法',
        'ireturn': '从当前方法返回int',
        'lreturn': '从当前方法返回long',
        'freturn': '从当前方法返回float',
        'dreturn': '从当前方法返回double',
        'areturn': '从当前方法返回对象引用',
        'return': '从当前方法返回void'
    }

# 字节码指令示例分析
def analyze_bytecode_example():
    """分析字节码指令示例"""
    java_code = """
    public int add(int a, int b) {
        int result = a + b;
        return result;
    }
    """
    
    bytecode = """
    0: iload_1        // 加载参数a到栈顶
    1: iload_2        // 加载参数b到栈顶
    2: iadd           // 执行加法运算
    3: istore_3       // 将结果存储到局部变量3（result）
    4: iload_3        // 加载result到栈顶
    5: ireturn        // 返回int值
    """
    
    print("Java源码:")
    print(java_code)
    print("\n对应字节码:")
    print(bytecode)
    
    return bytecode

6.1.3 DEX字节码格式

Android使用DEX（Dalvik Executable）格式，这是对Java字节码的优化版本：

import struct

class DEXAnalyzer:
    """DEX文件分析器"""
    
    def __init__(self, dex_data):
        self.data = dex_data
        self.header = None
        self.strings = []
        self.types = []
        self.methods = []
        self.classes = []
    
    def parse_header(self):
        """解析DEX文件头"""
        if len(self.data) < 112:
            raise ValueError("Invalid DEX file: header too short")
        
        # DEX文件头结构
        header_format = '<8s I I I 20s I I I I I I I I I I I I I I I I I I I I I I I'
        header_data = struct.unpack(header_format, self.data[:112])
        
        self.header = {
            'magic': header_data[0],                    # DEX魔数
            'checksum': header_data[1],                 # 校验和
            'signature': header_data[2:7],              # SHA-1签名
            'file_size': header_data[7],                # 文件大小
            'header_size': header_data[8],              # 头部大小
            'endian_tag': header_data[9],               # 字节序标记
            'link_size': header_data[10],               # 链接段大小
            'link_off': header_data[11],                # 链接段偏移
            'map_off': header_data[12],                 # 映射表偏移
            'string_ids_size': header_data[13],         # 字符串ID数量
            'string_ids_off': header_data[14],          # 字符串ID偏移
            'type_ids_size': header_data[15],           # 类型ID数量
            'type_ids_off': header_data[16],            # 类型ID偏移
            'proto_ids_size': header_data[17],          # 原型ID数量
            'proto_ids_off': header_data[18],           # 原型ID偏移
            'field_ids_size': header_data[19],          # 字段ID数量
            'field_ids_off': header_data[20],           # 字段ID偏移
            'method_ids_size': header_data[21],         # 方法ID数量
            'method_ids_off': header_data[22],          # 方法ID偏移
            'class_defs_size': header_data[23],         # 类定义数量
            'class_defs_off': header_data[24],          # 类定义偏移
            'data_size': header_data[25],               # 数据段大小
            'data_off': header_data[26]                 # 数据段偏移
        }
        
        # 验证DEX魔数
        if self.header['magic'][:4] != b'dex\n':
            raise ValueError("Invalid DEX magic number")
        
        return self.header
    
    def parse_string_ids(self):
        """解析字符串ID表"""
        if not self.header:
            self.parse_header()
        
        string_ids_off = self.header['string_ids_off']
        string_ids_size = self.header['string_ids_size']
        
        # 每个字符串ID占4字节
        for i in range(string_ids_size):
            offset_pos = string_ids_off + i * 4
            string_data_off = struct.unpack('<I', self.data[offset_pos:offset_pos+4])[0]
            
            # 读取字符串数据
            string_data = self.read_uleb128_string(string_data_off)
            self.strings.append(string_data)
        
        return self.strings
    
    def read_uleb128_string(self, offset):
        """读取ULEB128编码的字符串"""
        # 读取字符串长度（ULEB128编码）
        length, new_offset = self.read_uleb128(offset)
        
        # 读取字符串内容
        string_data = self.data[new_offset:new_offset+length]
        try:
            return string_data.decode('utf-8')
        except UnicodeDecodeError:
            return string_data.decode('utf-8', errors='replace')
    
    def read_uleb128(self, offset):
        """读取ULEB128编码的整数"""
        result = 0
        shift = 0
        current_offset = offset
        
        while True:
            byte = self.data[current_offset]
            current_offset += 1
            
            result |= (byte & 0x7F) << shift
            
            if (byte & 0x80) == 0:
                break
            
            shift += 7
        
        return result, current_offset
    
    def parse_type_ids(self):
        """解析类型ID表"""
        if not self.strings:
            self.parse_string_ids()
        
        type_ids_off = self.header['type_ids_off']
        type_ids_size = self.header['type_ids_size']
        
        for i in range(type_ids_size):
            offset_pos = type_ids_off + i * 4
            descriptor_idx = struct.unpack('<I', self.data[offset_pos:offset_pos+4])[0]
            
            if descriptor_idx < len(self.strings):
                type_descriptor = self.strings[descriptor_idx]
                self.types.append(type_descriptor)
            else:
                self.types.append(f"INVALID_TYPE_{descriptor_idx}")
        
        return self.types
    
    def parse_method_ids(self):
        """解析方法ID表"""
        if not self.types:
            self.parse_type_ids()
        
        method_ids_off = self.header['method_ids_off']
        method_ids_size = self.header['method_ids_size']
        
        for i in range(method_ids_size):
            offset_pos = method_ids_off + i * 8  # 每个方法ID占8字节
            class_idx, proto_idx, name_idx = struct.unpack('<H H I', 
                                                          self.data[offset_pos:offset_pos+8])
            
            method_info = {
                'class_idx': class_idx,
                'proto_idx': proto_idx,
                'name_idx': name_idx,
                'class_name': self.types[class_idx] if class_idx < len(self.types) else f"INVALID_CLASS_{class_idx}",
                'method_name': self.strings[name_idx] if name_idx < len(self.strings) else f"INVALID_NAME_{name_idx}"
            }
            
            self.methods.append(method_info)
        
        return self.methods
    
    def parse_class_defs(self):
        """解析类定义表"""
        if not self.methods:
            self.parse_method_ids()
        
        class_defs_off = self.header['class_defs_off']
        class_defs_size = self.header['class_defs_size']
        
        for i in range(class_defs_size):
            offset_pos = class_defs_off + i * 32  # 每个类定义占32字节
            class_data = struct.unpack('<I I I I I I I I', 
                                     self.data[offset_pos:offset_pos+32])
            
            class_info = {
                'class_idx': class_data[0],
                'access_flags': class_data[1],
                'superclass_idx': class_data[2],
                'interfaces_off': class_data[3],
                'source_file_idx': class_data[4],
                'annotations_off': class_data[5],
                'class_data_off': class_data[6],
                'static_values_off': class_data[7]
            }
            
            # 解析类名
            if class_info['class_idx'] < len(self.types):
                class_info['class_name'] = self.types[class_info['class_idx']]
            
            # 解析父类名
            if class_info['superclass_idx'] != 0xFFFFFFFF and class_info['superclass_idx'] < len(self.types):
                class_info['superclass_name'] = self.types[class_info['superclass_idx']]
            
            self.classes.append(class_info)
        
        return self.classes
    
    def get_analysis_summary(self):
        """获取分析摘要"""
        if not self.classes:
            self.parse_class_defs()
        
        return {
            'file_size': len(self.data),
            'header_info': self.header,
            'strings_count': len(self.strings),
            'types_count': len(self.types),
            'methods_count': len(self.methods),
            'classes_count': len(self.classes),
            'top_strings': self.strings[:10] if self.strings else [],
            'top_types': self.types[:10] if self.types else [],
            'class_names': [cls.get('class_name', 'Unknown') for cls in self.classes[:10]]
        }

# 使用示例
def analyze_dex_file(dex_path):
    """分析DEX文件"""
    with open(dex_path, 'rb') as f:
        dex_data = f.read()
    
    analyzer = DEXAnalyzer(dex_data)
    summary = analyzer.get_analysis_summary()
    
    print("=== DEX File Analysis ===")
    print(f"File size: {summary['file_size']:,} bytes")
    print(f"Strings: {summary['strings_count']}")
    print(f"Types: {summary['types_count']}")
    print(f"Methods: {summary['methods_count']}")
    print(f"Classes: {summary['classes_count']}")
    
    print("\nTop 10 strings:")
    for i, string in enumerate(summary['top_strings']):
        print(f"  {i}: {string}")
    
    print("\nTop 10 classes:")
    for i, class_name in enumerate(summary['class_names']):
        print(f"  {i}: {class_name}")
    
    return analyzer

6.2 Java代码反编译

6.2.1 反编译工具对比

主流Java反编译工具：

工具	类型	优点	缺点	适用场景
JD-GUI	GUI工具	界面友好，支持多种格式	对复杂代码支持有限	快速浏览代码
JD-Core	命令行	轻量级，集成方便	功能相对简单	自动化处理
CFR	命令行	支持现代Java特性	学习曲线陡峭	复杂代码分析
Procyon	命令行	支持Lambda表达式	性能较慢	现代Java代码
Fernflower	命令行	IntelliJ内置引擎	配置复杂	专业开发
Jadx	GUI+命令行	专门针对Android	仅支持Android	Android逆向

6.2.2 使用Jadx进行Android反编译

Jadx安装和基本使用：

# 下载Jadx
wget https://github.com/skylot/jadx/releases/download/v1.4.7/jadx-1.4.7.zip
unzip jadx-1.4.7.zip
cd jadx-1.4.7

# GUI模式
./bin/jadx-gui

# 命令行模式
./bin/jadx -d output_dir input.apk

# 高级参数
./bin/jadx --help

Jadx高级参数：

# 基本反编译
jadx -d output_dir app.apk

# 不反编译资源文件
jadx -d output_dir --no-res app.apk

# 不反编译源码
jadx -d output_dir --no-src app.apk

# 显示详细信息
jadx -d output_dir -v app.apk

# 设置线程数
jadx -d output_dir -j 4 app.apk

# 跳过反编译错误
jadx -d output_dir --skip-errors app.apk

# 输出格式设置
jadx -d output_dir --output-format java app.apk

# 反混淆选项
jadx -d output_dir --deobf app.apk

# 显示字节码
jadx -d output_dir --show-bad-code app.apk

6.2.3 自定义反编译脚本

#!/usr/bin/env python3
"""
自定义Android反编译脚本
"""

import os
import subprocess
import zipfile
import tempfile
import shutil
from pathlib import Path
import argparse

class AndroidDecompiler:
    def __init__(self, apk_path, output_dir):
        self.apk_path = Path(apk_path)
        self.output_dir = Path(output_dir)
        self.temp_dir = None
        
    def decompile_full(self):
        """完整反编译流程"""
        print(f"Starting decompilation of {self.apk_path}")
        
        # 创建输出目录
        self.output_dir.mkdir(parents=True, exist_ok=True)
        
        # 创建临时目录
        self.temp_dir = Path(tempfile.mkdtemp())
        
        try:
            # 1. 使用APKTool反编译资源
            self.decompile_with_apktool()
            
            # 2. 使用Jadx反编译Java代码
            self.decompile_with_jadx()
            
            # 3. 使用dex2jar转换DEX
            self.convert_with_dex2jar()
            
            # 4. 提取字符串和其他信息
            self.extract_strings()
            
            # 5. 生成分析报告
            self.generate_report()
            
            print(f"Decompilation completed: {self.output_dir}")
            
        finally:
            # 清理临时目录
            if self.temp_dir and self.temp_dir.exists():
                shutil.rmtree(self.temp_dir)
    
    def decompile_with_apktool(self):
        """使用APKTool反编译"""
        print("Running APKTool...")
        
        apktool_output = self.output_dir / "apktool"
        cmd = [
            "apktool", "d", str(self.apk_path),
            "-o", str(apktool_output),
            "-f"  # 强制覆盖
        ]
        
        try:
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
            if result.returncode == 0:
                print("APKTool decompilation successful")
            else:
                print(f"APKTool error: {result.stderr}")
        except subprocess.TimeoutExpired:
            print("APKTool timeout")
        except FileNotFoundError:
            print("APKTool not found, skipping resource decompilation")
    
    def decompile_with_jadx(self):
        """使用Jadx反编译"""
        print("Running Jadx...")
        
        jadx_output = self.output_dir / "jadx"
        cmd = [
            "jadx",
            "-d", str(jadx_output),
            "-j", "4",  # 4个线程
            "--skip-errors",
            str(self.apk_path)
        ]
        
        try:
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
            if result.returncode == 0:
                print("Jadx decompilation successful")
            else:
                print(f"Jadx error: {result.stderr}")
        except subprocess.TimeoutExpired:
            print("Jadx timeout")
        except FileNotFoundError:
            print("Jadx not found, skipping Java decompilation")
    
    def convert_with_dex2jar(self):
        """使用dex2jar转换"""
        print("Running dex2jar...")
        
        jar_output = self.output_dir / f"{self.apk_path.stem}.jar"
        cmd = [
            "d2j-dex2jar",
            str(self.apk_path),
            "-o", str(jar_output)
        ]
        
        try:
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
            if result.returncode == 0:
                print(f"dex2jar conversion successful: {jar_output}")
                
                # 使用JD-Core反编译JAR
                self.decompile_jar_with_jd_core(jar_output)
            else:
                print(f"dex2jar error: {result.stderr}")
        except subprocess.TimeoutExpired:
            print("dex2jar timeout")
        except FileNotFoundError:
            print("dex2jar not found, skipping JAR conversion")
    
    def decompile_jar_with_jd_core(self, jar_path):
        """使用JD-Core反编译JAR文件"""
        print("Running JD-Core...")
        
        jd_output = self.output_dir / "jd-core"
        jd_output.mkdir(exist_ok=True)
        
        # 这里需要JD-Core的命令行版本
        # 或者使用Java API调用
        try:
            # 简化实现：直接解压JAR文件
            with zipfile.ZipFile(jar_path, 'r') as jar:
                jar.extractall(jd_output)
            print(f"JAR extracted to: {jd_output}")
        except Exception as e:
            print(f"JAR extraction error: {e}")
    
    def extract_strings(self):
        """提取字符串"""
        print("Extracting strings...")
        
        strings_file = self.output_dir / "strings.txt"
        cmd = ["strings", str(self.apk_path)]
        
        try:
            result = subprocess.run(cmd, capture_output=True, text=True)
            if result.returncode == 0:
                with open(strings_file, 'w', encoding='utf-8') as f:
                    f.write(result.stdout)
                print(f"Strings extracted to: {strings_file}")
            else:
                print("Failed to extract strings")
        except FileNotFoundError:
            print("strings command not found")
    
    def generate_report(self):
        """生成分析报告"""
        print("Generating analysis report...")
        
        report_file = self.output_dir / "analysis_report.md"
        
        with open(report_file, 'w', encoding='utf-8') as f:
            f.write(f"# Decompilation Report\n\n")
            f.write(f"**APK File:** {self.apk_path.name}\n")
            f.write(f"**Output Directory:** {self.output_dir}\n")
            f.write(f"**Decompilation Date:** {subprocess.run(['date'], capture_output=True, text=True).stdout.strip()}\n\n")
            
            # 统计反编译结果
            f.write("## Decompilation Results\n\n")
            
            # APKTool结果
            apktool_dir = self.output_dir / "apktool"
            if apktool_dir.exists():
                f.write(f"- **APKTool Output:** {apktool_dir}\n")
                f.write(f"  - Files: {len(list(apktool_dir.rglob('*')))}\n")
            
            # Jadx结果
            jadx_dir = self.output_dir / "jadx"
            if jadx_dir.exists():
                f.write(f"- **Jadx Output:** {jadx_dir}\n")
                java_files = list(jadx_dir.rglob('*.java'))
                f.write(f"  - Java files: {len(java_files)}\n")
            
            # JAR文件
            jar_file = self.output_dir / f"{self.apk_path.stem}.jar"
            if jar_file.exists():
                f.write(f"- **JAR File:** {jar_file}\n")
                f.write(f"  - Size: {jar_file.stat().st_size:,} bytes\n")
            
            # 字符串文件
            strings_file = self.output_dir / "strings.txt"
            if strings_file.exists():
                f.write(f"- **Strings File:** {strings_file}\n")
                with open(strings_file, 'r', encoding='utf-8') as sf:
                    lines = sf.readlines()
                    f.write(f"  - Lines: {len(lines)}\n")
        
        print(f"Analysis report generated: {report_file}")

def main():
    parser = argparse.ArgumentParser(description='Android APK Decompiler')
    parser.add_argument('apk', help='Path to APK file')
    parser.add_argument('-o', '--output', required=True, help='Output directory')
    parser.add_argument('--apktool-only', action='store_true', help='Only run APKTool')
    parser.add_argument('--jadx-only', action='store_true', help='Only run Jadx')
    
    args = parser.parse_args()
    
    if not Path(args.apk).exists():
        print(f"Error: APK file not found: {args.apk}")
        return 1
    
    decompiler = AndroidDecompiler(args.apk, args.output)
    
    if args.apktool_only:
        decompiler.decompile_with_apktool()
    elif args.jadx_only:
        decompiler.decompile_with_jadx()
    else:
        decompiler.decompile_full()
    
    return 0

if __name__ == "__main__":
    exit(main())

6.3 静态代码分析技术

6.3.1 抽象语法树（AST）分析

import ast
import javalang
from pathlib import Path

class JavaASTAnalyzer:
    """Java抽象语法树分析器"""
    
    def __init__(self, java_file_path):
        self.java_file = Path(java_file_path)
        self.tree = None
        self.analysis_results = {}
    
    def parse_java_file(self):
        """解析Java文件生成AST"""
        try:
            with open(self.java_file, 'r', encoding='utf-8') as f:
                java_code = f.read()
            
            # 使用javalang解析Java代码
            self.tree = javalang.parse.parse(java_code)
            return self.tree
        except Exception as e:
            print(f"Failed to parse Java file: {e}")
            return None
    
    def analyze_class_structure(self):
        """分析类结构"""
        if not self.tree:
            self.parse_java_file()
        
        classes = []
        interfaces = []
        
        for path, node in self.tree.filter(javalang.tree.ClassDeclaration):
            class_info = {
                'name': node.name,
                'modifiers': node.modifiers,
                'extends': node.extends.name if node.extends else None,
                'implements': [impl.name for impl in node.implements] if node.implements else [],
                'fields': [],
                'methods': [],
                'constructors': []
            }
            
            # 分析字段
            for field_path, field_node in node.filter(javalang.tree.FieldDeclaration):
                for declarator in field_node.declarators:
                    field_info = {
                        'name': declarator.name,
                        'type': field_node.type.name,
                        'modifiers': field_node.modifiers
                    }
                    class_info['fields'].append(field_info)
            
            # 分析方法
            for method_path, method_node in node.filter(javalang.tree.MethodDeclaration):
                method_info = {
                    'name': method_node.name,
                    'return_type': method_node.return_type.name if method_node.return_type else 'void',
                    'modifiers': method_node.modifiers,
                    'parameters': []
                }
                
                if method_node.parameters:
                    for param in method_node.parameters:
                        param_info = {
                            'name': param.name,
                            'type': param.type.name
                        }
                        method_info['parameters'].append(param_info)
                
                class_info['methods'].append(method_info)
            
            # 分析构造函数
            for ctor_path, ctor_node in node.filter(javalang.tree.ConstructorDeclaration):
                ctor_info = {
                    'name': ctor_node.name,
                    'modifiers': ctor_node.modifiers,
                    'parameters': []
                }
                
                if ctor_node.parameters:
                    for param in ctor_node.parameters:
                        param_info = {
                            'name': param.name,
                            'type': param.type.name
                        }
                        ctor_info['parameters'].append(param_info)
                
                class_info['constructors'].append(ctor_info)
            
            classes.append(class_info)
        
        # 分析接口
        for path, node in self.tree.filter(javalang.tree.InterfaceDeclaration):
            interface_info = {
                'name': node.name,
                'modifiers': node.modifiers,
                'extends': [ext.name for ext in node.extends] if node.extends else [],
                'methods': []
            }
            
            for method_path, method_node in node.filter(javalang.tree.MethodDeclaration):
                method_info = {
                    'name': method_node.name,
                    'return_type': method_node.return_type.name if method_node.return_type else 'void',
                    'parameters': []
                }
                
                if method_node.parameters:
                    for param in method_node.parameters:
                        param_info = {
                            'name': param.name,
                            'type': param.type.name
                        }
                        method_info['parameters'].append(param_info)
                
                interface_info['methods'].append(method_info)
            
            interfaces.append(interface_info)
        
        self.analysis_results['classes'] = classes
        self.analysis_results['interfaces'] = interfaces
        
        return {'classes': classes, 'interfaces': interfaces}
    
    def analyze_method_calls(self):
        """分析方法调用"""
        if not self.tree:
            self.parse_java_file()
        
        method_calls = []
        
        for path, node in self.tree.filter(javalang.tree.MethodInvocation):
            call_info = {
                'method_name': node.member,
                'qualifier': str(node.qualifier) if node.qualifier else None,
                'arguments': len(node.arguments) if node.arguments else 0
            }
            method_calls.append(call_info)
        
        self.analysis_results['method_calls'] = method_calls
        return method_calls
    
    def analyze_imports(self):
        """分析导入语句"""
        if not self.tree:
            self.parse_java_file()
        
        imports = []
        
        if self.tree.imports:
            for import_node in self.tree.imports:
                import_info = {
                    'path': import_node.path,
                    'static': import_node.static,
                    'wildcard': import_node.wildcard
                }
                imports.append(import_info)
        
        self.analysis_results['imports'] = imports
        return imports
    
    def analyze_string_literals(self):
        """分析字符串字面量"""
        if not self.tree:
            self.parse_java_file()
        
        string_literals = []
        
        for path, node in self.tree.filter(javalang.tree.Literal):
            if isinstance(node.value, str) and node.value.startswith('"'):
                string_literals.append(node.value[1:-1])  # 去掉引号
        
        self.analysis_results['string_literals'] = string_literals
        return string_literals
    
    def generate_analysis_report(self):
        """生成分析报告"""
        if not self.analysis_results:
            self.analyze_class_structure()
            self.analyze_method_calls()
            self.analyze_imports()
            self.analyze_string_literals()
        
        report = f"# Java Code Analysis Report\n\n"
        report += f"**File:** {self.java_file.name}\n\n"
        
        # 类和接口统计
        classes = self.analysis_results.get('classes', [])
        interfaces = self.analysis_results.get('interfaces', [])
        
        report += f"## Structure Overview\n\n"
        report += f"- **Classes:** {len(classes)}\n"
        report += f"- **Interfaces:** {len(interfaces)}\n"
        report += f"- **Imports:** {len(self.analysis_results.get('imports', []))}\n"
        report += f"- **Method Calls:** {len(self.analysis_results.get('method_calls', []))}\n"
        report += f"- **String Literals:** {len(self.analysis_results.get('string_literals', []))}\n\n"
        
        # 详细类信息
        if classes:
            report += f"## Classes\n\n"
            for cls in classes:
                report += f"### {cls['name']}\n"
                if cls['extends']:
                    report += f"- **Extends:** {cls['extends']}\n"
                if cls['implements']:
                    report += f"- **Implements:** {', '.join(cls['implements'])}\n"
                report += f"- **Fields:** {len(cls['fields'])}\n"
                report += f"- **Methods:** {len(cls['methods'])}\n"
                report += f"- **Constructors:** {len(cls['constructors'])}\n\n"
        
        # 方法调用统计
        method_calls = self.analysis_results.get('method_calls', [])
        if method_calls:
            report += f"## Method Calls\n\n"
            call_counts = {}
            for call in method_calls:
                method_name = call['method_name']
                call_counts[method_name] = call_counts.get(method_name, 0) + 1
            
            sorted_calls = sorted(call_counts.items(), key=lambda x: x[1], reverse=True)
            for method_name, count in sorted_calls[:10]:
                report += f"- **{method_name}:** {count} calls\n"
            report += "\n"
        
        return report

# 使用示例
def analyze_java_directory(directory_path):
    """分析Java目录中的所有文件"""
    directory = Path(directory_path)
    java_files = list(directory.rglob('*.java'))
    
    print(f"Found {len(java_files)} Java files")
    
    all_results = {
        'total_files': len(java_files),
        'total_classes': 0,
        'total_methods': 0,
        'common_imports': {},
        'common_method_calls': {}
    }
    
    for java_file in java_files:
        print(f"Analyzing: {java_file}")
        
        try:
            analyzer = JavaASTAnalyzer(java_file)
            analyzer.analyze_class_structure()
            analyzer.analyze_method_calls()
            analyzer.analyze_imports()
            
            # 统计信息
            classes = analyzer.analysis_results.get('classes', [])
            all_results['total_classes'] += len(classes)
            
            for cls in classes:
                all_results['total_methods'] += len(cls['methods'])
            
            # 统计导入
            imports = analyzer.analysis_results.get('imports', [])
            for imp in imports:
                import_path = imp['path']
                all_results['common_imports'][import_path] = all_results['common_imports'].get(import_path, 0) + 1
            
            # 统计方法调用
            method_calls = analyzer.analysis_results.get('method_calls', [])
            for call in method_calls:
                method_name = call['method_name']
                all_results['common_method_calls'][method_name] = all_results['common_method_calls'].get(method_name, 0) + 1
        
        except Exception as e:
            print(f"Error analyzing {java_file}: {e}")
    
    # 生成总体报告
    print("\n=== Analysis Summary ===")
    print(f"Total files: {all_results['total_files']}")
    print(f"Total classes: {all_results['total_classes']}")
    print(f"Total methods: {all_results['total_methods']}")
    
    print("\nTop 10 imports:")
    sorted_imports = sorted(all_results['common_imports'].items(), key=lambda x: x[1], reverse=True)
    for import_path, count in sorted_imports[:10]:
        print(f"  {import_path}: {count}")
    
    print("\nTop 10 method calls:")
    sorted_calls = sorted(all_results['common_method_calls'].items(), key=lambda x: x[1], reverse=True)
    for method_name, count in sorted_calls[:10]:
        print(f"  {method_name}: {count}")
    
    return all_results

6.3.2 数据流分析

class DataFlowAnalyzer:
    """数据流分析器"""
    
    def __init__(self, java_code):
        self.java_code = java_code
        self.variables = {}
        self.assignments = []
        self.uses = []
    
    def analyze_variable_flow(self):
        """分析变量数据流"""
        # 简化的数据流分析实现
        lines = self.java_code.split('\n')
        
        for line_no, line in enumerate(lines, 1):
            line = line.strip()
            
            # 检测变量声明和赋值
            if '=' in line and not line.startswith('//'):
                self.analyze_assignment(line, line_no)
            
            # 检测变量使用
            self.analyze_variable_usage(line, line_no)
    
    def analyze_assignment(self, line, line_no):
        """分析赋值语句"""
        # 简化的赋值分析
        if '=' in line:
            parts = line.split('=', 1)
            if len(parts) == 2:
                left = parts[0].strip()
                right = parts[1].strip().rstrip(';')
                
                # 提取变量名（简化处理）
                var_name = left.split()[-1]
                
                assignment = {
                    'line': line_no,
                    'variable': var_name,
                    'value': right,
                    'type': 'assignment'
                }
                
                self.assignments.append(assignment)
                
                # 更新变量定义位置
                if var_name not in self.variables:
                    self.variables[var_name] = []
                self.variables[var_name].append(line_no)
    
    def analyze_variable_usage(self, line, line_no):
        """分析变量使用"""
        # 检查已知变量在当前行的使用
        for var_name in self.variables:
            if var_name in line and not line.startswith('//'):
                # 简单检查是否为使用（非声明）
                if not (f'{var_name} =' in line or f'{var_name}=' in line):
                    use = {
                        'line': line_no,
                        'variable': var_name,
                        'context': line,
                        'type': 'use'
                    }
                    self.uses.append(use)
    
    def find_def_use_chains(self):
        """查找定义-使用链"""
        chains = []
        
        for var_name, def_lines in self.variables.items():
            var_uses = [use for use in self.uses if use['variable'] == var_name]
            
            for def_line in def_lines:
                # 找到该定义之后的使用
                subsequent_uses = [use for use in var_uses if use['line'] > def_line]
                
                if subsequent_uses:
                    chain = {
                        'variable': var_name,
                        'definition': def_line,
                        'uses': [use['line'] for use in subsequent_uses]
                    }
                    chains.append(chain)
        
        return chains
    
    def detect_unused_variables(self):
        """检测未使用的变量"""
        unused = []
        
        for var_name, def_lines in self.variables.items():
            var_uses = [use for use in self.uses if use['variable'] == var_name]
            
            if not var_uses:
                unused.extend([(var_name, line) for line in def_lines])
        
        return unused
    
    def generate_dataflow_report(self):
        """生成数据流分析报告"""
        self.analyze_variable_flow()
        chains = self.find_def_use_chains()
        unused = self.detect_unused_variables()
        
        report = "# Data Flow Analysis Report\n\n"
        
        report += f"## Summary\n\n"
        report += f"- **Variables:** {len(self.variables)}\n"
        report += f"- **Assignments:** {len(self.assignments)}\n"
        report += f"- **Uses:** {len(self.uses)}\n"
        report += f"- **Def-Use Chains:** {len(chains)}\n"
        report += f"- **Unused Variables:** {len(unused)}\n\n"
        
        if unused:
            report += f"## Unused Variables\n\n"
            for var_name, line in unused:
                report += f"- **{var_name}** (line {line})\n"
            report += "\n"
        
        if chains:
            report += f"## Def-Use Chains\n\n"
            for chain in chains[:10]:  # 显示前10个
                report += f"### {chain['variable']}\n"
                report += f"- **Definition:** line {chain['definition']}\n"
                report += f"- **Uses:** lines {', '.join(map(str, chain['uses']))}\n\n"
        
        return report

# 使用示例
java_code_example = """
public class Example {
    private int count = 0;
    private String name;
    
    public void method1() {
        int x = 10;
        int y = 20;
        int result = x + y;
        System.out.println(result);
        
        String temp = "hello";
        // temp is unused after this point
    }
    
    public void method2() {
        count++;
        name = "example";
        System.out.println(name);
    }
}
"""

analyzer = DataFlowAnalyzer(java_code_example)
report = analyzer.generate_dataflow_report()
print(report)

6.3.3 控制流分析

import re
from collections import defaultdict, deque

class ControlFlowAnalyzer:
    """控制流分析器"""
    
    def __init__(self, java_code):
        self.java_code = java_code
        self.lines = java_code.split('\n')
        self.basic_blocks = []
        self.cfg = defaultdict(list)  # 控制流图
        self.dominators = {}
        
    def identify_basic_blocks(self):
        """识别基本块"""
        leaders = set([0])  # 第一行总是leader
        
        # 找到所有的leader（基本块起始行）
        for i, line in enumerate(self.lines):
            line = line.strip()
            
            # 分支语句的目标是leader
            if re.search(r'\b(if|while|for|switch|try|catch)\b', line):
                # 分支语句的下一行是leader
                if i + 1 < len(self.lines):
                    leaders.add(i + 1)
            
            # 跳转语句后的下一行是leader
            if re.search(r'\b(return|break|continue|throw)\b', line):
                if i + 1 < len(self.lines):
                    leaders.add(i + 1)
            
            # 标签是leader
            if ':' in line and not '//' in line:
                leaders.add(i)
        
        # 创建基本块
        sorted_leaders = sorted(leaders)
        
        for i in range(len(sorted_leaders)):
            start = sorted_leaders[i]
            end = sorted_leaders[i + 1] - 1 if i + 1 < len(sorted_leaders) else len(self.lines) - 1
            
            # 过滤空行和注释
            block_lines = []
            for line_no in range(start, end + 1):
                if line_no < len(self.lines):
                    line = self.lines[line_no].strip()
                    if line and not line.startswith('//'):
                        block_lines.append((line_no, line))
            
            if block_lines:
                basic_block = {
                    'id': len(self.basic_blocks),
                    'start_line': block_lines[0][0],
                    'end_line': block_lines[-1][0],
                    'lines': block_lines,
                    'successors': [],
                    'predecessors': []
                }
                self.basic_blocks.append(basic_block)
        
        return self.basic_blocks
    
    def build_control_flow_graph(self):
        """构建控制流图"""
        if not self.basic_blocks:
            self.identify_basic_blocks()
        
        for i, block in enumerate(self.basic_blocks):
            last_line = block['lines'][-1][1] if block['lines'] else ""
            
            # 分析最后一行指令确定后继
            if re.search(r'\breturn\b', last_line):
                # return语句没有后继
                pass
            elif re.search(r'\bif\s*\(', last_line):
                # if语句有两个后继：条件为真和条件为假
                # 简化处理：假设下一个块是条件为真的分支
                if i + 1 < len(self.basic_blocks):
                    self.add_edge(i, i + 1)
                
                # 查找else分支或条件为假的分支
                # 这里需要更复杂的解析逻辑
                
            elif re.search(r'\bwhile\s*\(', last_line):
                # while循环：回到循环开始，或者退出循环
                if i + 1 < len(self.basic_blocks):
                    self.add_edge(i, i + 1)  # 循环体
                # 还需要添加回边和退出边
                
            elif re.search(r'\bfor\s*\(', last_line):
                # for循环类似while循环
                if i + 1 < len(self.basic_blocks):
                    self.add_edge(i, i + 1)
                
            else:
                # 普通语句：顺序执行到下一个基本块
                if i + 1 < len(self.basic_blocks):
                    self.add_edge(i, i + 1)
        
        return self.cfg
    
    def add_edge(self, from_block, to_block):
        """添加控制流边"""
        if to_block < len(self.basic_blocks):
            self.cfg[from_block].append(to_block)
            self.basic_blocks[from_block]['successors'].append(to_block)
            self.basic_blocks[to_block]['predecessors'].append(from_block)
    
    def compute_dominators(self):
        """计算支配关系"""
        if not self.cfg:
            self.build_control_flow_graph()
        
        n = len(self.basic_blocks)
        if n == 0:
            return self.dominators
        
        # 初始化支配集合
        for i in range(n):
            if i == 0:  # 入口节点
                self.dominators[i] = {0}
            else:
                self.dominators[i] = set(range(n))
        
        # 迭代计算直到收敛
        changed = True
        while changed:
            changed = False
            
            for i in range(1, n):  # 跳过入口节点
                # Dom(n) = {n} ∪ (∩ Dom(p) for all predecessors p of n)
                new_dom = {i}
                
                predecessors = self.basic_blocks[i]['predecessors']
                if predecessors:
                    intersection = set(range(n))
                    for pred in predecessors:
                        intersection &= self.dominators[pred]
                    new_dom |= intersection
                
                if new_dom != self.dominators[i]:
                    self.dominators[i] = new_dom
                    changed = True
        
        return self.dominators
    
    def find_loops(self):
        """查找循环"""
        if not self.dominators:
            self.compute_dominators()
        
        loops = []
        
        # 查找回边（指向支配者的边）
        for from_block in range(len(self.basic_blocks)):
            for to_block in self.cfg[from_block]:
                if to_block in self.dominators[from_block]:
                    # 找到回边，确定循环
                    loop = self.find_loop_nodes(to_block, from_block)
                    if loop:
                        loops.append({
                            'header': to_block,
                            'back_edge': (from_block, to_block),
                            'nodes': loop
                        })
        
        return loops
    
    def find_loop_nodes(self, header, back_edge_source):
        """查找循环中的所有节点"""
        loop_nodes = {header}
        worklist = deque([back_edge_source])
        
        while worklist:
            node = worklist.popleft()
            if node not in loop_nodes:
                loop_nodes.add(node)
                
                # 添加所有前驱节点
                for pred in self.basic_blocks[node]['predecessors']:
                    if pred not in loop_nodes:
                        worklist.append(pred)
        
        return loop_nodes
    
    def generate_cfg_report(self):
        """生成控制流图分析报告"""
        self.build_control_flow_graph()
        self.compute_dominators()
        loops = self.find_loops()
        
        report = "# Control Flow Analysis Report\n\n"
        
        report += f"## Summary\n\n"
        report += f"- **Basic Blocks:** {len(self.basic_blocks)}\n"
        report += f"- **Control Flow Edges:** {sum(len(successors) for successors in self.cfg.values())}\n"
        report += f"- **Loops:** {len(loops)}\n\n"
        
        # 基本块信息
        report += f"## Basic Blocks\n\n"
        for block in self.basic_blocks:
            report += f"### Block {block['id']}\n"
            report += f"- **Lines:** {block['start_line']}-{block['end_line']}\n"
            report += f"- **Successors:** {block['successors']}\n"
            report += f"- **Predecessors:** {block['predecessors']}\n"
            
            # 显示代码
            report += f"- **Code:**\n"
            for line_no, line in block['lines'][:3]:  # 只显示前3行
                report += f"  {line_no}: {line}\n"
            if len(block['lines']) > 3:
                report += f"  ... ({len(block['lines']) - 3} more lines)\n"
            report += "\n"
        
        # 支配关系
        report += f"## Dominance Relations\n\n"
        for node, dominators in self.dominators.items():
            dom_list = sorted(list(dominators))
            report += f"- **Block {node}:** dominated by {dom_list}\n"
        report += "\n"
        
        # 循环信息
        if loops:
            report += f"## Loops\n\n"
            for i, loop in enumerate(loops):
                report += f"### Loop {i + 1}\n"
                report += f"- **Header:** Block {loop['header']}\n"
                report += f"- **Back Edge:** Block {loop['back_edge'][0]} → Block {loop['back_edge'][1]}\n"
                report += f"- **Nodes:** {sorted(list(loop['nodes']))}\n\n"
        
        return report

# 使用示例
java_code_with_control_flow = """
public void complexMethod(int n) {
    int i = 0;
    int sum = 0;
    
    if (n > 0) {
        while (i < n) {
            sum += i;
            i++;
            
            if (sum > 100) {
                break;
            }
        }
    } else {
        sum = -1;
    }
    
    for (int j = 0; j < 5; j++) {
        System.out.println(j);
    }
    
    return;
}
"""

cfg_analyzer = ControlFlowAnalyzer(java_code_with_control_flow)
cfg_report = cfg_analyzer.generate_cfg_report()
print(cfg_report)

6.4 安全漏洞检测

6.4.1 常见Java安全漏洞

import re
from pathlib import Path

class JavaSecurityAnalyzer:
    """Java安全漏洞分析器"""
    
    def __init__(self):
        self.vulnerability_patterns = {
            'sql_injection': [
                r'Statement\s+\w+\s*=.*\.createStatement\(\)',
                r'\.executeQuery\s*\(\s*["\'].*\+.*["\']',
                r'\.executeUpdate\s*\(\s*["\'].*\+.*["\']',
                r'PreparedStatement.*\+.*\)'
            ],
            'xss': [
                r'response\.getWriter\(\)\.print\w*\([^)]*request\.getParameter',
                r'out\.print\w*\([^)]*request\.getParameter',
                r'response\.getWriter\(\)\.print\w*\([^)]*\+[^)]*\)',
            ],
            'path_traversal': [
                r'new\s+File\s*\([^)]*request\.getParameter',
                r'new\s+FileInputStream\s*\([^)]*request\.getParameter',
                r'new\s+FileOutputStream\s*\([^)]*request\.getParameter',
                r'Files\.read\w*\([^)]*request\.getParameter'
            ],
            'command_injection': [
                r'Runtime\.getRuntime\(\)\.exec\s*\([^)]*request\.getParameter',
                r'ProcessBuilder\s*\([^)]*request\.getParameter',
                r'\.exec\s*\([^)]*\+[^)]*\)'
            ],
            'weak_crypto': [
                r'Cipher\.getInstance\s*\(\s*["\']DES["\']',
                r'Cipher\.getInstance\s*\(\s*["\']MD5["\']',
                r'MessageDigest\.getInstance\s*\(\s*["\']MD5["\']',
                r'new\s+Random\s*\(\)',
                r'Math\.random\s*\(\)'
            ],
            'hardcoded_secrets': [
                r'password\s*=\s*["\'][^"\']{8,}["\']',
                r'secret\s*=\s*["\'][^"\']{8,}["\']',
                r'api[_-]?key\s*=\s*["\'][^"\']{8,}["\']',
                r'token\s*=\s*["\'][^"\']{8,}["\']'
            ],
            'insecure_random': [
                r'new\s+Random\s*\(\s*\)',
                r'Math\.random\s*\(\)',
                r'System\.currentTimeMillis\s*\(\)'
            ],
            'unsafe_reflection': [
                r'Class\.forName\s*\([^)]*request\.getParameter',
                r'\.newInstance\s*\(\s*\)',
                r'Method\.invoke\s*\([^)]*request\.getParameter'
            ]
        }
        
        self.findings = []
    
    def analyze_file(self, java_file_path):
        """分析单个Java文件"""
        file_path = Path(java_file_path)
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
            
            lines = content.split('\n')
            
            for line_no, line in enumerate(lines, 1):
                self.analyze_line(file_path, line_no, line)
            
        except Exception as e:
            print(f"Error analyzing {java_file_path}: {e}")
    
    def analyze_line(self, file_path, line_no, line):
        """分析单行代码"""
        line = line.strip()
        
        if not line or line.startswith('//'):
            return
        
        for vuln_type, patterns in self.vulnerability_patterns.items():
            for pattern in patterns:
                if re.search(pattern, line, re.IGNORECASE):
                    finding = {
                        'file': str(file_path),
                        'line': line_no,
                        'vulnerability': vuln_type,
                        'pattern': pattern,
                        'code': line,
                        'severity': self.get_severity(vuln_type),
                        'description': self.get_description(vuln_type)
                    }
                    self.findings.append(finding)
    
    def get_severity(self, vuln_type):
        """获取漏洞严重程度"""
        severity_map = {
            'sql_injection': 'HIGH',
            'xss': 'HIGH',
            'path_traversal': 'HIGH',
            'command_injection': 'CRITICAL',
            'weak_crypto': 'MEDIUM',
            'hardcoded_secrets': 'HIGH',
            'insecure_random': 'MEDIUM',
            'unsafe_reflection': 'HIGH'
        }
        return severity_map.get(vuln_type, 'MEDIUM')
    
    def get_description(self, vuln_type):
        """获取漏洞描述"""
        descriptions = {
            'sql_injection': 'Potential SQL injection vulnerability',
            'xss': 'Potential Cross-Site Scripting (XSS) vulnerability',
            'path_traversal': 'Potential path traversal vulnerability',
            'command_injection': 'Potential command injection vulnerability',
            'weak_crypto': 'Use of weak cryptographic algorithm',
            'hardcoded_secrets': 'Hardcoded secrets or credentials',
            'insecure_random': 'Use of insecure random number generation',
            'unsafe_reflection': 'Unsafe use of reflection'
        }
        return descriptions.get(vuln_type, 'Security vulnerability detected')
    
    def analyze_directory(self, directory_path):
        """分析目录中的所有Java文件"""
        directory = Path(directory_path)
        java_files = list(directory.rglob('*.java'))
        
        print(f"Analyzing {len(java_files)} Java files...")
        
        for java_file in java_files:
            self.analyze_file(java_file)
        
        return self.findings
    
    def generate_security_report(self):
        """生成安全分析报告"""
        if not self.findings:
            return "No security issues found."
        
        # 按严重程度分组
        by_severity = {'CRITICAL': [], 'HIGH': [], 'MEDIUM': [], 'LOW': []}
        for finding in self.findings:
            severity = finding['severity']
            by_severity[severity].append(finding)
        
        # 按漏洞类型分组
        by_type = {}
        for finding in self.findings:
            vuln_type = finding['vulnerability']
            if vuln_type not in by_type:
                by_type[vuln_type] = []
            by_type[vuln_type].append(finding)
        
        report = "# Java Security Analysis Report\n\n"
        
        # 摘要
        report += "## Summary\n\n"
        report += f"- **Total Issues:** {len(self.findings)}\n"
        for severity in ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW']:
            count = len(by_severity[severity])
            if count > 0:
                report += f"- **{severity}:** {count}\n"
        report += "\n"
        
        # 按严重程度详细列出
        for severity in ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW']:
            issues = by_severity[severity]
            if issues:
                report += f"## {severity} Severity Issues\n\n"
                
                for issue in issues:
                    report += f"### {issue['description']}\n"
                    report += f"- **File:** {issue['file']}\n"
                    report += f"- **Line:** {issue['line']}\n"
                    report += f"- **Type:** {issue['vulnerability']}\n"
                    report += f"- **Code:** `{issue['code']}`\n\n"
        
        # 按类型统计
        report += "## Issues by Type\n\n"
        for vuln_type, issues in sorted(by_type.items()):
            report += f"- **{vuln_type}:** {len(issues)} issues\n"
        
        return report
    
    def export_findings_json(self, output_file):
        """导出发现的问题到JSON文件"""
        import json
        
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(self.findings, f, indent=2, ensure_ascii=False)
        
        print(f"Findings exported to: {output_file}")

# 使用示例
def analyze_java_security(directory_path):
    """分析Java代码安全性"""
    analyzer = JavaSecurityAnalyzer()
    findings = analyzer.analyze_directory(directory_path)
    
    if findings:
        report = analyzer.generate_security_report()
        print(report)
        
        # 导出结果
        analyzer.export_findings_json("security_findings.json")
    else:
        print("No security issues found.")
    
    return analyzer

# 测试用例
vulnerable_java_code = """
public class VulnerableExample {
    public void sqlInjection(String userInput) {
        String query = "SELECT * FROM users WHERE name = '" + userInput + "'";
        Statement stmt = connection.createStatement();
        ResultSet rs = stmt.executeQuery(query);
    }
    
    public void xssVulnerability(HttpServletRequest request, HttpServletResponse response) {
        String userInput = request.getParameter("input");
        response.getWriter().print("Hello " + userInput);
    }
    
    public void pathTraversal(HttpServletRequest request) {
        String filename = request.getParameter("file");
        File file = new File("/uploads/" + filename);
    }
    
    public void weakCrypto() {
        Cipher cipher = Cipher.getInstance("DES");
        MessageDigest md = MessageDigest.getInstance("MD5");
        Random random = new Random();
    }
    
    public void hardcodedSecrets() {
        String password = "admin123456";
        String apiKey = "sk-1234567890abcdef";
    }
}
"""

# 创建测试文件并分析
with open("VulnerableExample.java", "w") as f:
    f.write(vulnerable_java_code)

analyzer = JavaSecurityAnalyzer()
analyzer.analyze_file("VulnerableExample.java")
report = analyzer.generate_security_report()
print(report)

6.4.2 自动化安全扫描

#!/usr/bin/env python3
"""
自动化Java安全扫描工具
"""

import os
import json
import argparse
import subprocess
from pathlib import Path
from datetime import datetime

class AutomatedSecurityScanner:
    """自动化安全扫描器"""
    
    def __init__(self, target_path, output_dir):
        self.target_path = Path(target_path)
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        
        self.scan_results = {
            'timestamp': datetime.now().isoformat(),
            'target': str(self.target_path),
            'tools': {},
            'summary': {}
        }
    
    def run_spotbugs(self):
        """运行SpotBugs静态分析"""
        print("Running SpotBugs...")
        
        try:
            # 查找class文件或jar文件
            class_files = list(self.target_path.rglob('*.class'))
            jar_files = list(self.target_path.rglob('*.jar'))
            
            if not class_files and not jar_files:
                print("No .class or .jar files found for SpotBugs analysis")
                return
            
            # 运行SpotBugs
            output_file = self.output_dir / "spotbugs_results.xml"
            
            if jar_files:
                target = str(jar_files[0])  # 分析第一个JAR文件
            else:
                target = str(self.target_path)
            
            cmd = [
                "spotbugs",
                "-textui",
                "-xml:withMessages",
                "-output", str(output_file),
                target
            ]
            
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
            
            if result.returncode == 0:
                self.scan_results['tools']['spotbugs'] = {
                    'status': 'success',
                    'output_file': str(output_file),
                    'issues_found': self.parse_spotbugs_results(output_file)
                }
                print(f"SpotBugs completed: {output_file}")
            else:
                self.scan_results['tools']['spotbugs'] = {
                    'status': 'error',
                    'error': result.stderr
                }
                print(f"SpotBugs error: {result.stderr}")
                
        except subprocess.TimeoutExpired:
            print("SpotBugs timeout")
        except FileNotFoundError:
            print("SpotBugs not found, skipping")
    
    def run_pmd(self):
        """运行PMD静态分析"""
        print("Running PMD...")
        
        try:
            java_files = list(self.target_path.rglob('*.java'))
            
            if not java_files:
                print("No .java files found for PMD analysis")
                return
            
            output_file = self.output_dir / "pmd_results.xml"
            
            cmd = [
                "pmd",
                "-d", str(self.target_path),
                "-f", "xml",
                "-r", str(output_file),
                "-R", "category/java/security.xml"
            ]
            
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
            
            self.scan_results['tools']['pmd'] = {
                'status': 'success' if result.returncode == 0 else 'completed_with_issues',
                'output_file': str(output_file),
                'issues_found': self.parse_pmd_results(output_file)
            }
            
            print(f"PMD completed: {output_file}")
            
        except subprocess.TimeoutExpired:
            print("PMD timeout")
        except FileNotFoundError:
            print("PMD not found, skipping")
    
    def run_custom_scanner(self):
        """运行自定义安全扫描器"""
        print("Running custom security scanner...")
        
        analyzer = JavaSecurityAnalyzer()
        findings = analyzer.analyze_directory(self.target_path)
        
        # 保存结果
        output_file = self.output_dir / "custom_scan_results.json"
        analyzer.export_findings_json(output_file)
        
        self.scan_results['tools']['custom_scanner'] = {
            'status': 'success',
            'output_file': str(output_file),
            'issues_found': len(findings)
        }
        
        print(f"Custom scanner completed: {len(findings)} issues found")
    
    def parse_spotbugs_results(self, xml_file):
        """解析SpotBugs XML结果"""
        try:
            import xml.etree.ElementTree as ET
            tree = ET.parse(xml_file)
            root = tree.getroot()
            
            bugs = root.findall('.//BugInstance')
            return len(bugs)
        except Exception as e:
            print(f"Error parsing SpotBugs results: {e}")
            return 0
    
    def parse_pmd_results(self, xml_file):
        """解析PMD XML结果"""
        try:
            import xml.etree.ElementTree as ET
            tree = ET.parse(xml_file)
            root = tree.getroot()
            
            violations = root.findall('.//violation')
            return len(violations)
        except Exception as e:
            print(f"Error parsing PMD results: {e}")
            return 0
    
    def run_dependency_check(self):
        """运行依赖漏洞检查"""
        print("Running OWASP Dependency Check...")
        
        try:
            output_dir = self.output_dir / "dependency-check"
            output_dir.mkdir(exist_ok=True)
            
            cmd = [
                "dependency-check",
                "--project", "Security Scan",
                "--scan", str(self.target_path),
                "--out", str(output_dir),
                "--format", "JSON"
            ]
            
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
            
            if result.returncode == 0:
                # 查找生成的JSON报告
                json_reports = list(output_dir.glob("*.json"))
                if json_reports:
                    vulnerabilities = self.parse_dependency_check_results(json_reports[0])
                    self.scan_results['tools']['dependency_check'] = {
                        'status': 'success',
                        'output_file': str(json_reports[0]),
                        'vulnerabilities_found': vulnerabilities
                    }
                    print(f"Dependency Check completed: {vulnerabilities} vulnerabilities found")
            else:
                self.scan_results['tools']['dependency_check'] = {
                    'status': 'error',
                    'error': result.stderr
                }
                
        except subprocess.TimeoutExpired:
            print("Dependency Check timeout")
        except FileNotFoundError:
            print("OWASP Dependency Check not found, skipping")
    
    def parse_dependency_check_results(self, json_file):
        """解析Dependency Check JSON结果"""
        try:
            with open(json_file, 'r') as f:
                data = json.load(f)
            
            total_vulnerabilities = 0
            if 'dependencies' in data:
                for dep in data['dependencies']:
                    if 'vulnerabilities' in dep:
                        total_vulnerabilities += len(dep['vulnerabilities'])
            
            return total_vulnerabilities
        except Exception as e:
            print(f"Error parsing Dependency Check results: {e}")
            return 0
    
    def generate_consolidated_report(self):
        """生成综合报告"""
        print("Generating consolidated report...")
        
        # 计算总体统计
        total_issues = 0
        tools_used = 0
        
        for tool_name, tool_result in self.scan_results['tools'].items():
            if tool_result['status'] in ['success', 'completed_with_issues']:
                tools_used += 1
                
                if 'issues_found' in tool_result:
                    total_issues += tool_result['issues_found']
                elif 'vulnerabilities_found' in tool_result:
                    total_issues += tool_result['vulnerabilities_found']
        
        self.scan_results['summary'] = {
            'tools_used': tools_used,
            'total_issues': total_issues,
            'scan_duration': 'N/A'  # 可以添加时间跟踪
        }
        
        # 生成Markdown报告
        report_file = self.output_dir / "security_scan_report.md"
        
        with open(report_file, 'w', encoding='utf-8') as f:
            f.write("# Automated Security Scan Report\n\n")
            f.write(f"**Target:** {self.scan_results['target']}\n")
            f.write(f"**Scan Date:** {self.scan_results['timestamp']}\n")
            f.write(f"**Tools Used:** {self.scan_results['summary']['tools_used']}\n")
            f.write(f"**Total Issues:** {self.scan_results['summary']['total_issues']}\n\n")
            
            f.write("## Tool Results\n\n")
            
            for tool_name, tool_result in self.scan_results['tools'].items():
                f.write(f"### {tool_name.upper()}\n")
                f.write(f"- **Status:** {tool_result['status']}\n")
                
                if 'output_file' in tool_result:
                    f.write(f"- **Output File:** {tool_result['output_file']}\n")
                
                if 'issues_found' in tool_result:
                    f.write(f"- **Issues Found:** {tool_result['issues_found']}\n")
                elif 'vulnerabilities_found' in tool_result:
                    f.write(f"- **Vulnerabilities Found:** {tool_result['vulnerabilities_found']}\n")
                
                if 'error' in tool_result:
                    f.write(f"- **Error:** {tool_result['error']}\n")
                
                f.write("\n")
        
        # 保存JSON结果
        json_file = self.output_dir / "scan_results.json"
        with open(json_file, 'w', encoding='utf-8') as f:
            json.dump(self.scan_results, f, indent=2, ensure_ascii=False)
        
        print(f"Consolidated report generated: {report_file}")
        print(f"JSON results saved: {json_file}")
        
        return report_file
    
    def run_full_scan(self):
        """运行完整安全扫描"""
        print(f"Starting automated security scan of: {self.target_path}")
        print(f"Output directory: {self.output_dir}")
        
        # 运行各种扫描工具
        self.run_custom_scanner()
        self.run_spotbugs()
        self.run_pmd()
        self.run_dependency_check()
        
        # 生成综合报告
        report_file = self.generate_consolidated_report()
        
        print(f"\nScan completed! Report available at: {report_file}")
        
        return self.scan_results

def main():
    parser = argparse.ArgumentParser(description='Automated Java Security Scanner')
    parser.add_argument('target', help='Target directory or file to scan')
    parser.add_argument('-o', '--output', required=True, help='Output directory for results')
    parser.add_argument('--tools', nargs='+', 
                       choices=['custom', 'spotbugs', 'pmd', 'dependency-check'],
                       default=['custom', 'spotbugs', 'pmd', 'dependency-check'],
                       help='Tools to run')
    
    args = parser.parse_args()
    
    if not Path(args.target).exists():
        print(f"Error: Target path not found: {args.target}")
        return 1
    
    scanner = AutomatedSecurityScanner(args.target, args.output)
    results = scanner.run_full_scan()
    
    # 显示摘要
    print(f"\n=== Scan Summary ===")
    print(f"Tools used: {results['summary']['tools_used']}")
    print(f"Total issues: {results['summary']['total_issues']}")
    
    return 0

if __name__ == "__main__":
    exit(main())