第6章:Java层静态分析
6.1 Java字节码基础
6.1.1 Java字节码概述
Java字节码是Java源代码编译后的中间表示形式,运行在Java虚拟机(JVM)上。理解字节码结构对于逆向分析至关重要。
字节码文件结构:
// ClassFile结构(JVM规范)
ClassFile {
u4 magic; // 魔数 0xCAFEBABE
u2 minor_version; // 次版本号
u2 major_version; // 主版本号
u2 constant_pool_count; // 常量池计数
cp_info constant_pool[constant_pool_count-1]; // 常量池
u2 access_flags; // 访问标志
u2 this_class; // 当前类索引
u2 super_class; // 父类索引
u2 interfaces_count; // 接口计数
u2 interfaces[interfaces_count]; // 接口索引表
u2 fields_count; // 字段计数
field_info fields[fields_count]; // 字段表
u2 methods_count; // 方法计数
method_info methods[methods_count]; // 方法表
u2 attributes_count; // 属性计数
attribute_info attributes[attributes_count]; // 属性表
}
6.1.2 字节码指令集
常用字节码指令分类:
class BytecodeInstructions:
"""字节码指令分类"""
# 加载和存储指令
LOAD_STORE = {
'aload': '从局部变量表加载引用类型值到栈顶',
'iload': '从局部变量表加载int类型值到栈顶',
'lload': '从局部变量表加载long类型值到栈顶',
'fload': '从局部变量表加载float类型值到栈顶',
'dload': '从局部变量表加载double类型值到栈顶',
'astore': '将栈顶引用类型值存入局部变量表',
'istore': '将栈顶int类型值存入局部变量表',
'lstore': '将栈顶long类型值存入局部变量表',
'fstore': '将栈顶float类型值存入局部变量表',
'dstore': '将栈顶double类型值存入局部变量表'
}
# 运算指令
ARITHMETIC = {
'iadd': '执行int类型加法',
'isub': '执行int类型减法',
'imul': '执行int类型乘法',
'idiv': '执行int类型除法',
'irem': '执行int类型取余',
'ineg': '执行int类型取负',
'ishl': '执行int类型左移',
'ishr': '执行int类型右移',
'iushr': '执行int类型无符号右移',
'iand': '执行int类型按位与',
'ior': '执行int类型按位或',
'ixor': '执行int类型按位异或'
}
# 类型转换指令
TYPE_CONVERSION = {
'i2l': 'int转long',
'i2f': 'int转float',
'i2d': 'int转double',
'l2i': 'long转int',
'f2i': 'float转int',
'd2i': 'double转int',
'i2b': 'int转byte',
'i2c': 'int转char',
'i2s': 'int转short'
}
# 对象创建与访问指令
OBJECT_MANIPULATION = {
'new': '创建对象实例',
'newarray': '创建基本类型数组',
'anewarray': '创建引用类型数组',
'arraylength': '获取数组长度',
'getfield': '获取对象字段值',
'putfield': '设置对象字段值',
'getstatic': '获取静态字段值',
'putstatic': '设置静态字段值',
'baload': '从byte数组加载值',
'bastore': '向byte数组存储值'
}
# 操作数栈管理指令
STACK_MANAGEMENT = {
'pop': '弹出栈顶一个字长的数据',
'pop2': '弹出栈顶两个字长的数据',
'dup': '复制栈顶一个字长数据并压入栈顶',
'dup2': '复制栈顶两个字长数据并压入栈顶',
'swap': '交换栈顶两个字长数据'
}
# 控制转移指令
CONTROL_TRANSFER = {
'ifeq': '当栈顶int类型数值等于0时跳转',
'ifne': '当栈顶int类型数值不等于0时跳转',
'iflt': '当栈顶int类型数值小于0时跳转',
'ifge': '当栈顶int类型数值大于等于0时跳转',
'ifgt': '当栈顶int类型数值大于0时跳转',
'ifle': '当栈顶int类型数值小于等于0时跳转',
'if_icmpeq': '比较栈顶两int类型数值,相等时跳转',
'if_icmpne': '比较栈顶两int类型数值,不相等时跳转',
'goto': '无条件跳转',
'jsr': '跳转至指定16位offset位置,并将jsr下一条指令地址压入栈顶',
'ret': '返回至本地变量指定的index的指令位置'
}
# 方法调用和返回指令
METHOD_INVOCATION = {
'invokevirtual': '调用实例方法',
'invokespecial': '调用超类构造方法、实例初始化方法、私有方法',
'invokestatic': '调用静态方法',
'invokeinterface': '调用接口方法',
'invokedynamic': '调用动态方法',
'ireturn': '从当前方法返回int',
'lreturn': '从当前方法返回long',
'freturn': '从当前方法返回float',
'dreturn': '从当前方法返回double',
'areturn': '从当前方法返回对象引用',
'return': '从当前方法返回void'
}
# 字节码指令示例分析
def analyze_bytecode_example():
"""分析字节码指令示例"""
java_code = """
public int add(int a, int b) {
int result = a + b;
return result;
}
"""
bytecode = """
0: iload_1 // 加载参数a到栈顶
1: iload_2 // 加载参数b到栈顶
2: iadd // 执行加法运算
3: istore_3 // 将结果存储到局部变量3(result)
4: iload_3 // 加载result到栈顶
5: ireturn // 返回int值
"""
print("Java源码:")
print(java_code)
print("\n对应字节码:")
print(bytecode)
return bytecode
6.1.3 DEX字节码格式
Android使用DEX(Dalvik Executable)格式,这是对Java字节码的优化版本:
import struct
class DEXAnalyzer:
"""DEX文件分析器"""
def __init__(self, dex_data):
self.data = dex_data
self.header = None
self.strings = []
self.types = []
self.methods = []
self.classes = []
def parse_header(self):
"""解析DEX文件头"""
if len(self.data) < 112:
raise ValueError("Invalid DEX file: header too short")
# DEX文件头结构
header_format = '<8s I I I 20s I I I I I I I I I I I I I I I I I I I I I I I'
header_data = struct.unpack(header_format, self.data[:112])
self.header = {
'magic': header_data[0], # DEX魔数
'checksum': header_data[1], # 校验和
'signature': header_data[2:7], # SHA-1签名
'file_size': header_data[7], # 文件大小
'header_size': header_data[8], # 头部大小
'endian_tag': header_data[9], # 字节序标记
'link_size': header_data[10], # 链接段大小
'link_off': header_data[11], # 链接段偏移
'map_off': header_data[12], # 映射表偏移
'string_ids_size': header_data[13], # 字符串ID数量
'string_ids_off': header_data[14], # 字符串ID偏移
'type_ids_size': header_data[15], # 类型ID数量
'type_ids_off': header_data[16], # 类型ID偏移
'proto_ids_size': header_data[17], # 原型ID数量
'proto_ids_off': header_data[18], # 原型ID偏移
'field_ids_size': header_data[19], # 字段ID数量
'field_ids_off': header_data[20], # 字段ID偏移
'method_ids_size': header_data[21], # 方法ID数量
'method_ids_off': header_data[22], # 方法ID偏移
'class_defs_size': header_data[23], # 类定义数量
'class_defs_off': header_data[24], # 类定义偏移
'data_size': header_data[25], # 数据段大小
'data_off': header_data[26] # 数据段偏移
}
# 验证DEX魔数
if self.header['magic'][:4] != b'dex\n':
raise ValueError("Invalid DEX magic number")
return self.header
def parse_string_ids(self):
"""解析字符串ID表"""
if not self.header:
self.parse_header()
string_ids_off = self.header['string_ids_off']
string_ids_size = self.header['string_ids_size']
# 每个字符串ID占4字节
for i in range(string_ids_size):
offset_pos = string_ids_off + i * 4
string_data_off = struct.unpack('<I', self.data[offset_pos:offset_pos+4])[0]
# 读取字符串数据
string_data = self.read_uleb128_string(string_data_off)
self.strings.append(string_data)
return self.strings
def read_uleb128_string(self, offset):
"""读取ULEB128编码的字符串"""
# 读取字符串长度(ULEB128编码)
length, new_offset = self.read_uleb128(offset)
# 读取字符串内容
string_data = self.data[new_offset:new_offset+length]
try:
return string_data.decode('utf-8')
except UnicodeDecodeError:
return string_data.decode('utf-8', errors='replace')
def read_uleb128(self, offset):
"""读取ULEB128编码的整数"""
result = 0
shift = 0
current_offset = offset
while True:
byte = self.data[current_offset]
current_offset += 1
result |= (byte & 0x7F) << shift
if (byte & 0x80) == 0:
break
shift += 7
return result, current_offset
def parse_type_ids(self):
"""解析类型ID表"""
if not self.strings:
self.parse_string_ids()
type_ids_off = self.header['type_ids_off']
type_ids_size = self.header['type_ids_size']
for i in range(type_ids_size):
offset_pos = type_ids_off + i * 4
descriptor_idx = struct.unpack('<I', self.data[offset_pos:offset_pos+4])[0]
if descriptor_idx < len(self.strings):
type_descriptor = self.strings[descriptor_idx]
self.types.append(type_descriptor)
else:
self.types.append(f"INVALID_TYPE_{descriptor_idx}")
return self.types
def parse_method_ids(self):
"""解析方法ID表"""
if not self.types:
self.parse_type_ids()
method_ids_off = self.header['method_ids_off']
method_ids_size = self.header['method_ids_size']
for i in range(method_ids_size):
offset_pos = method_ids_off + i * 8 # 每个方法ID占8字节
class_idx, proto_idx, name_idx = struct.unpack('<H H I',
self.data[offset_pos:offset_pos+8])
method_info = {
'class_idx': class_idx,
'proto_idx': proto_idx,
'name_idx': name_idx,
'class_name': self.types[class_idx] if class_idx < len(self.types) else f"INVALID_CLASS_{class_idx}",
'method_name': self.strings[name_idx] if name_idx < len(self.strings) else f"INVALID_NAME_{name_idx}"
}
self.methods.append(method_info)
return self.methods
def parse_class_defs(self):
"""解析类定义表"""
if not self.methods:
self.parse_method_ids()
class_defs_off = self.header['class_defs_off']
class_defs_size = self.header['class_defs_size']
for i in range(class_defs_size):
offset_pos = class_defs_off + i * 32 # 每个类定义占32字节
class_data = struct.unpack('<I I I I I I I I',
self.data[offset_pos:offset_pos+32])
class_info = {
'class_idx': class_data[0],
'access_flags': class_data[1],
'superclass_idx': class_data[2],
'interfaces_off': class_data[3],
'source_file_idx': class_data[4],
'annotations_off': class_data[5],
'class_data_off': class_data[6],
'static_values_off': class_data[7]
}
# 解析类名
if class_info['class_idx'] < len(self.types):
class_info['class_name'] = self.types[class_info['class_idx']]
# 解析父类名
if class_info['superclass_idx'] != 0xFFFFFFFF and class_info['superclass_idx'] < len(self.types):
class_info['superclass_name'] = self.types[class_info['superclass_idx']]
self.classes.append(class_info)
return self.classes
def get_analysis_summary(self):
"""获取分析摘要"""
if not self.classes:
self.parse_class_defs()
return {
'file_size': len(self.data),
'header_info': self.header,
'strings_count': len(self.strings),
'types_count': len(self.types),
'methods_count': len(self.methods),
'classes_count': len(self.classes),
'top_strings': self.strings[:10] if self.strings else [],
'top_types': self.types[:10] if self.types else [],
'class_names': [cls.get('class_name', 'Unknown') for cls in self.classes[:10]]
}
# 使用示例
def analyze_dex_file(dex_path):
"""分析DEX文件"""
with open(dex_path, 'rb') as f:
dex_data = f.read()
analyzer = DEXAnalyzer(dex_data)
summary = analyzer.get_analysis_summary()
print("=== DEX File Analysis ===")
print(f"File size: {summary['file_size']:,} bytes")
print(f"Strings: {summary['strings_count']}")
print(f"Types: {summary['types_count']}")
print(f"Methods: {summary['methods_count']}")
print(f"Classes: {summary['classes_count']}")
print("\nTop 10 strings:")
for i, string in enumerate(summary['top_strings']):
print(f" {i}: {string}")
print("\nTop 10 classes:")
for i, class_name in enumerate(summary['class_names']):
print(f" {i}: {class_name}")
return analyzer
6.2 Java代码反编译
6.2.1 反编译工具对比
主流Java反编译工具:
| 工具 | 类型 | 优点 | 缺点 | 适用场景 |
|---|---|---|---|---|
| JD-GUI | GUI工具 | 界面友好,支持多种格式 | 对复杂代码支持有限 | 快速浏览代码 |
| JD-Core | 命令行 | 轻量级,集成方便 | 功能相对简单 | 自动化处理 |
| CFR | 命令行 | 支持现代Java特性 | 学习曲线陡峭 | 复杂代码分析 |
| Procyon | 命令行 | 支持Lambda表达式 | 性能较慢 | 现代Java代码 |
| Fernflower | 命令行 | IntelliJ内置引擎 | 配置复杂 | 专业开发 |
| Jadx | GUI+命令行 | 专门针对Android | 仅支持Android | Android逆向 |
6.2.2 使用Jadx进行Android反编译
Jadx安装和基本使用:
# 下载Jadx
wget https://github.com/skylot/jadx/releases/download/v1.4.7/jadx-1.4.7.zip
unzip jadx-1.4.7.zip
cd jadx-1.4.7
# GUI模式
./bin/jadx-gui
# 命令行模式
./bin/jadx -d output_dir input.apk
# 高级参数
./bin/jadx --help
Jadx高级参数:
# 基本反编译
jadx -d output_dir app.apk
# 不反编译资源文件
jadx -d output_dir --no-res app.apk
# 不反编译源码
jadx -d output_dir --no-src app.apk
# 显示详细信息
jadx -d output_dir -v app.apk
# 设置线程数
jadx -d output_dir -j 4 app.apk
# 跳过反编译错误
jadx -d output_dir --skip-errors app.apk
# 输出格式设置
jadx -d output_dir --output-format java app.apk
# 反混淆选项
jadx -d output_dir --deobf app.apk
# 显示字节码
jadx -d output_dir --show-bad-code app.apk
6.2.3 自定义反编译脚本
#!/usr/bin/env python3
"""
自定义Android反编译脚本
"""
import os
import subprocess
import zipfile
import tempfile
import shutil
from pathlib import Path
import argparse
class AndroidDecompiler:
def __init__(self, apk_path, output_dir):
self.apk_path = Path(apk_path)
self.output_dir = Path(output_dir)
self.temp_dir = None
def decompile_full(self):
"""完整反编译流程"""
print(f"Starting decompilation of {self.apk_path}")
# 创建输出目录
self.output_dir.mkdir(parents=True, exist_ok=True)
# 创建临时目录
self.temp_dir = Path(tempfile.mkdtemp())
try:
# 1. 使用APKTool反编译资源
self.decompile_with_apktool()
# 2. 使用Jadx反编译Java代码
self.decompile_with_jadx()
# 3. 使用dex2jar转换DEX
self.convert_with_dex2jar()
# 4. 提取字符串和其他信息
self.extract_strings()
# 5. 生成分析报告
self.generate_report()
print(f"Decompilation completed: {self.output_dir}")
finally:
# 清理临时目录
if self.temp_dir and self.temp_dir.exists():
shutil.rmtree(self.temp_dir)
def decompile_with_apktool(self):
"""使用APKTool反编译"""
print("Running APKTool...")
apktool_output = self.output_dir / "apktool"
cmd = [
"apktool", "d", str(self.apk_path),
"-o", str(apktool_output),
"-f" # 强制覆盖
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
if result.returncode == 0:
print("APKTool decompilation successful")
else:
print(f"APKTool error: {result.stderr}")
except subprocess.TimeoutExpired:
print("APKTool timeout")
except FileNotFoundError:
print("APKTool not found, skipping resource decompilation")
def decompile_with_jadx(self):
"""使用Jadx反编译"""
print("Running Jadx...")
jadx_output = self.output_dir / "jadx"
cmd = [
"jadx",
"-d", str(jadx_output),
"-j", "4", # 4个线程
"--skip-errors",
str(self.apk_path)
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
if result.returncode == 0:
print("Jadx decompilation successful")
else:
print(f"Jadx error: {result.stderr}")
except subprocess.TimeoutExpired:
print("Jadx timeout")
except FileNotFoundError:
print("Jadx not found, skipping Java decompilation")
def convert_with_dex2jar(self):
"""使用dex2jar转换"""
print("Running dex2jar...")
jar_output = self.output_dir / f"{self.apk_path.stem}.jar"
cmd = [
"d2j-dex2jar",
str(self.apk_path),
"-o", str(jar_output)
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
if result.returncode == 0:
print(f"dex2jar conversion successful: {jar_output}")
# 使用JD-Core反编译JAR
self.decompile_jar_with_jd_core(jar_output)
else:
print(f"dex2jar error: {result.stderr}")
except subprocess.TimeoutExpired:
print("dex2jar timeout")
except FileNotFoundError:
print("dex2jar not found, skipping JAR conversion")
def decompile_jar_with_jd_core(self, jar_path):
"""使用JD-Core反编译JAR文件"""
print("Running JD-Core...")
jd_output = self.output_dir / "jd-core"
jd_output.mkdir(exist_ok=True)
# 这里需要JD-Core的命令行版本
# 或者使用Java API调用
try:
# 简化实现:直接解压JAR文件
with zipfile.ZipFile(jar_path, 'r') as jar:
jar.extractall(jd_output)
print(f"JAR extracted to: {jd_output}")
except Exception as e:
print(f"JAR extraction error: {e}")
def extract_strings(self):
"""提取字符串"""
print("Extracting strings...")
strings_file = self.output_dir / "strings.txt"
cmd = ["strings", str(self.apk_path)]
try:
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
with open(strings_file, 'w', encoding='utf-8') as f:
f.write(result.stdout)
print(f"Strings extracted to: {strings_file}")
else:
print("Failed to extract strings")
except FileNotFoundError:
print("strings command not found")
def generate_report(self):
"""生成分析报告"""
print("Generating analysis report...")
report_file = self.output_dir / "analysis_report.md"
with open(report_file, 'w', encoding='utf-8') as f:
f.write(f"# Decompilation Report\n\n")
f.write(f"**APK File:** {self.apk_path.name}\n")
f.write(f"**Output Directory:** {self.output_dir}\n")
f.write(f"**Decompilation Date:** {subprocess.run(['date'], capture_output=True, text=True).stdout.strip()}\n\n")
# 统计反编译结果
f.write("## Decompilation Results\n\n")
# APKTool结果
apktool_dir = self.output_dir / "apktool"
if apktool_dir.exists():
f.write(f"- **APKTool Output:** {apktool_dir}\n")
f.write(f" - Files: {len(list(apktool_dir.rglob('*')))}\n")
# Jadx结果
jadx_dir = self.output_dir / "jadx"
if jadx_dir.exists():
f.write(f"- **Jadx Output:** {jadx_dir}\n")
java_files = list(jadx_dir.rglob('*.java'))
f.write(f" - Java files: {len(java_files)}\n")
# JAR文件
jar_file = self.output_dir / f"{self.apk_path.stem}.jar"
if jar_file.exists():
f.write(f"- **JAR File:** {jar_file}\n")
f.write(f" - Size: {jar_file.stat().st_size:,} bytes\n")
# 字符串文件
strings_file = self.output_dir / "strings.txt"
if strings_file.exists():
f.write(f"- **Strings File:** {strings_file}\n")
with open(strings_file, 'r', encoding='utf-8') as sf:
lines = sf.readlines()
f.write(f" - Lines: {len(lines)}\n")
print(f"Analysis report generated: {report_file}")
def main():
parser = argparse.ArgumentParser(description='Android APK Decompiler')
parser.add_argument('apk', help='Path to APK file')
parser.add_argument('-o', '--output', required=True, help='Output directory')
parser.add_argument('--apktool-only', action='store_true', help='Only run APKTool')
parser.add_argument('--jadx-only', action='store_true', help='Only run Jadx')
args = parser.parse_args()
if not Path(args.apk).exists():
print(f"Error: APK file not found: {args.apk}")
return 1
decompiler = AndroidDecompiler(args.apk, args.output)
if args.apktool_only:
decompiler.decompile_with_apktool()
elif args.jadx_only:
decompiler.decompile_with_jadx()
else:
decompiler.decompile_full()
return 0
if __name__ == "__main__":
exit(main())
6.3 静态代码分析技术
6.3.1 抽象语法树(AST)分析
import ast
import javalang
from pathlib import Path
class JavaASTAnalyzer:
"""Java抽象语法树分析器"""
def __init__(self, java_file_path):
self.java_file = Path(java_file_path)
self.tree = None
self.analysis_results = {}
def parse_java_file(self):
"""解析Java文件生成AST"""
try:
with open(self.java_file, 'r', encoding='utf-8') as f:
java_code = f.read()
# 使用javalang解析Java代码
self.tree = javalang.parse.parse(java_code)
return self.tree
except Exception as e:
print(f"Failed to parse Java file: {e}")
return None
def analyze_class_structure(self):
"""分析类结构"""
if not self.tree:
self.parse_java_file()
classes = []
interfaces = []
for path, node in self.tree.filter(javalang.tree.ClassDeclaration):
class_info = {
'name': node.name,
'modifiers': node.modifiers,
'extends': node.extends.name if node.extends else None,
'implements': [impl.name for impl in node.implements] if node.implements else [],
'fields': [],
'methods': [],
'constructors': []
}
# 分析字段
for field_path, field_node in node.filter(javalang.tree.FieldDeclaration):
for declarator in field_node.declarators:
field_info = {
'name': declarator.name,
'type': field_node.type.name,
'modifiers': field_node.modifiers
}
class_info['fields'].append(field_info)
# 分析方法
for method_path, method_node in node.filter(javalang.tree.MethodDeclaration):
method_info = {
'name': method_node.name,
'return_type': method_node.return_type.name if method_node.return_type else 'void',
'modifiers': method_node.modifiers,
'parameters': []
}
if method_node.parameters:
for param in method_node.parameters:
param_info = {
'name': param.name,
'type': param.type.name
}
method_info['parameters'].append(param_info)
class_info['methods'].append(method_info)
# 分析构造函数
for ctor_path, ctor_node in node.filter(javalang.tree.ConstructorDeclaration):
ctor_info = {
'name': ctor_node.name,
'modifiers': ctor_node.modifiers,
'parameters': []
}
if ctor_node.parameters:
for param in ctor_node.parameters:
param_info = {
'name': param.name,
'type': param.type.name
}
ctor_info['parameters'].append(param_info)
class_info['constructors'].append(ctor_info)
classes.append(class_info)
# 分析接口
for path, node in self.tree.filter(javalang.tree.InterfaceDeclaration):
interface_info = {
'name': node.name,
'modifiers': node.modifiers,
'extends': [ext.name for ext in node.extends] if node.extends else [],
'methods': []
}
for method_path, method_node in node.filter(javalang.tree.MethodDeclaration):
method_info = {
'name': method_node.name,
'return_type': method_node.return_type.name if method_node.return_type else 'void',
'parameters': []
}
if method_node.parameters:
for param in method_node.parameters:
param_info = {
'name': param.name,
'type': param.type.name
}
method_info['parameters'].append(param_info)
interface_info['methods'].append(method_info)
interfaces.append(interface_info)
self.analysis_results['classes'] = classes
self.analysis_results['interfaces'] = interfaces
return {'classes': classes, 'interfaces': interfaces}
def analyze_method_calls(self):
"""分析方法调用"""
if not self.tree:
self.parse_java_file()
method_calls = []
for path, node in self.tree.filter(javalang.tree.MethodInvocation):
call_info = {
'method_name': node.member,
'qualifier': str(node.qualifier) if node.qualifier else None,
'arguments': len(node.arguments) if node.arguments else 0
}
method_calls.append(call_info)
self.analysis_results['method_calls'] = method_calls
return method_calls
def analyze_imports(self):
"""分析导入语句"""
if not self.tree:
self.parse_java_file()
imports = []
if self.tree.imports:
for import_node in self.tree.imports:
import_info = {
'path': import_node.path,
'static': import_node.static,
'wildcard': import_node.wildcard
}
imports.append(import_info)
self.analysis_results['imports'] = imports
return imports
def analyze_string_literals(self):
"""分析字符串字面量"""
if not self.tree:
self.parse_java_file()
string_literals = []
for path, node in self.tree.filter(javalang.tree.Literal):
if isinstance(node.value, str) and node.value.startswith('"'):
string_literals.append(node.value[1:-1]) # 去掉引号
self.analysis_results['string_literals'] = string_literals
return string_literals
def generate_analysis_report(self):
"""生成分析报告"""
if not self.analysis_results:
self.analyze_class_structure()
self.analyze_method_calls()
self.analyze_imports()
self.analyze_string_literals()
report = f"# Java Code Analysis Report\n\n"
report += f"**File:** {self.java_file.name}\n\n"
# 类和接口统计
classes = self.analysis_results.get('classes', [])
interfaces = self.analysis_results.get('interfaces', [])
report += f"## Structure Overview\n\n"
report += f"- **Classes:** {len(classes)}\n"
report += f"- **Interfaces:** {len(interfaces)}\n"
report += f"- **Imports:** {len(self.analysis_results.get('imports', []))}\n"
report += f"- **Method Calls:** {len(self.analysis_results.get('method_calls', []))}\n"
report += f"- **String Literals:** {len(self.analysis_results.get('string_literals', []))}\n\n"
# 详细类信息
if classes:
report += f"## Classes\n\n"
for cls in classes:
report += f"### {cls['name']}\n"
if cls['extends']:
report += f"- **Extends:** {cls['extends']}\n"
if cls['implements']:
report += f"- **Implements:** {', '.join(cls['implements'])}\n"
report += f"- **Fields:** {len(cls['fields'])}\n"
report += f"- **Methods:** {len(cls['methods'])}\n"
report += f"- **Constructors:** {len(cls['constructors'])}\n\n"
# 方法调用统计
method_calls = self.analysis_results.get('method_calls', [])
if method_calls:
report += f"## Method Calls\n\n"
call_counts = {}
for call in method_calls:
method_name = call['method_name']
call_counts[method_name] = call_counts.get(method_name, 0) + 1
sorted_calls = sorted(call_counts.items(), key=lambda x: x[1], reverse=True)
for method_name, count in sorted_calls[:10]:
report += f"- **{method_name}:** {count} calls\n"
report += "\n"
return report
# 使用示例
def analyze_java_directory(directory_path):
"""分析Java目录中的所有文件"""
directory = Path(directory_path)
java_files = list(directory.rglob('*.java'))
print(f"Found {len(java_files)} Java files")
all_results = {
'total_files': len(java_files),
'total_classes': 0,
'total_methods': 0,
'common_imports': {},
'common_method_calls': {}
}
for java_file in java_files:
print(f"Analyzing: {java_file}")
try:
analyzer = JavaASTAnalyzer(java_file)
analyzer.analyze_class_structure()
analyzer.analyze_method_calls()
analyzer.analyze_imports()
# 统计信息
classes = analyzer.analysis_results.get('classes', [])
all_results['total_classes'] += len(classes)
for cls in classes:
all_results['total_methods'] += len(cls['methods'])
# 统计导入
imports = analyzer.analysis_results.get('imports', [])
for imp in imports:
import_path = imp['path']
all_results['common_imports'][import_path] = all_results['common_imports'].get(import_path, 0) + 1
# 统计方法调用
method_calls = analyzer.analysis_results.get('method_calls', [])
for call in method_calls:
method_name = call['method_name']
all_results['common_method_calls'][method_name] = all_results['common_method_calls'].get(method_name, 0) + 1
except Exception as e:
print(f"Error analyzing {java_file}: {e}")
# 生成总体报告
print("\n=== Analysis Summary ===")
print(f"Total files: {all_results['total_files']}")
print(f"Total classes: {all_results['total_classes']}")
print(f"Total methods: {all_results['total_methods']}")
print("\nTop 10 imports:")
sorted_imports = sorted(all_results['common_imports'].items(), key=lambda x: x[1], reverse=True)
for import_path, count in sorted_imports[:10]:
print(f" {import_path}: {count}")
print("\nTop 10 method calls:")
sorted_calls = sorted(all_results['common_method_calls'].items(), key=lambda x: x[1], reverse=True)
for method_name, count in sorted_calls[:10]:
print(f" {method_name}: {count}")
return all_results
6.3.2 数据流分析
class DataFlowAnalyzer:
"""数据流分析器"""
def __init__(self, java_code):
self.java_code = java_code
self.variables = {}
self.assignments = []
self.uses = []
def analyze_variable_flow(self):
"""分析变量数据流"""
# 简化的数据流分析实现
lines = self.java_code.split('\n')
for line_no, line in enumerate(lines, 1):
line = line.strip()
# 检测变量声明和赋值
if '=' in line and not line.startswith('//'):
self.analyze_assignment(line, line_no)
# 检测变量使用
self.analyze_variable_usage(line, line_no)
def analyze_assignment(self, line, line_no):
"""分析赋值语句"""
# 简化的赋值分析
if '=' in line:
parts = line.split('=', 1)
if len(parts) == 2:
left = parts[0].strip()
right = parts[1].strip().rstrip(';')
# 提取变量名(简化处理)
var_name = left.split()[-1]
assignment = {
'line': line_no,
'variable': var_name,
'value': right,
'type': 'assignment'
}
self.assignments.append(assignment)
# 更新变量定义位置
if var_name not in self.variables:
self.variables[var_name] = []
self.variables[var_name].append(line_no)
def analyze_variable_usage(self, line, line_no):
"""分析变量使用"""
# 检查已知变量在当前行的使用
for var_name in self.variables:
if var_name in line and not line.startswith('//'):
# 简单检查是否为使用(非声明)
if not (f'{var_name} =' in line or f'{var_name}=' in line):
use = {
'line': line_no,
'variable': var_name,
'context': line,
'type': 'use'
}
self.uses.append(use)
def find_def_use_chains(self):
"""查找定义-使用链"""
chains = []
for var_name, def_lines in self.variables.items():
var_uses = [use for use in self.uses if use['variable'] == var_name]
for def_line in def_lines:
# 找到该定义之后的使用
subsequent_uses = [use for use in var_uses if use['line'] > def_line]
if subsequent_uses:
chain = {
'variable': var_name,
'definition': def_line,
'uses': [use['line'] for use in subsequent_uses]
}
chains.append(chain)
return chains
def detect_unused_variables(self):
"""检测未使用的变量"""
unused = []
for var_name, def_lines in self.variables.items():
var_uses = [use for use in self.uses if use['variable'] == var_name]
if not var_uses:
unused.extend([(var_name, line) for line in def_lines])
return unused
def generate_dataflow_report(self):
"""生成数据流分析报告"""
self.analyze_variable_flow()
chains = self.find_def_use_chains()
unused = self.detect_unused_variables()
report = "# Data Flow Analysis Report\n\n"
report += f"## Summary\n\n"
report += f"- **Variables:** {len(self.variables)}\n"
report += f"- **Assignments:** {len(self.assignments)}\n"
report += f"- **Uses:** {len(self.uses)}\n"
report += f"- **Def-Use Chains:** {len(chains)}\n"
report += f"- **Unused Variables:** {len(unused)}\n\n"
if unused:
report += f"## Unused Variables\n\n"
for var_name, line in unused:
report += f"- **{var_name}** (line {line})\n"
report += "\n"
if chains:
report += f"## Def-Use Chains\n\n"
for chain in chains[:10]: # 显示前10个
report += f"### {chain['variable']}\n"
report += f"- **Definition:** line {chain['definition']}\n"
report += f"- **Uses:** lines {', '.join(map(str, chain['uses']))}\n\n"
return report
# 使用示例
java_code_example = """
public class Example {
private int count = 0;
private String name;
public void method1() {
int x = 10;
int y = 20;
int result = x + y;
System.out.println(result);
String temp = "hello";
// temp is unused after this point
}
public void method2() {
count++;
name = "example";
System.out.println(name);
}
}
"""
analyzer = DataFlowAnalyzer(java_code_example)
report = analyzer.generate_dataflow_report()
print(report)
6.3.3 控制流分析
import re
from collections import defaultdict, deque
class ControlFlowAnalyzer:
"""控制流分析器"""
def __init__(self, java_code):
self.java_code = java_code
self.lines = java_code.split('\n')
self.basic_blocks = []
self.cfg = defaultdict(list) # 控制流图
self.dominators = {}
def identify_basic_blocks(self):
"""识别基本块"""
leaders = set([0]) # 第一行总是leader
# 找到所有的leader(基本块起始行)
for i, line in enumerate(self.lines):
line = line.strip()
# 分支语句的目标是leader
if re.search(r'\b(if|while|for|switch|try|catch)\b', line):
# 分支语句的下一行是leader
if i + 1 < len(self.lines):
leaders.add(i + 1)
# 跳转语句后的下一行是leader
if re.search(r'\b(return|break|continue|throw)\b', line):
if i + 1 < len(self.lines):
leaders.add(i + 1)
# 标签是leader
if ':' in line and not '//' in line:
leaders.add(i)
# 创建基本块
sorted_leaders = sorted(leaders)
for i in range(len(sorted_leaders)):
start = sorted_leaders[i]
end = sorted_leaders[i + 1] - 1 if i + 1 < len(sorted_leaders) else len(self.lines) - 1
# 过滤空行和注释
block_lines = []
for line_no in range(start, end + 1):
if line_no < len(self.lines):
line = self.lines[line_no].strip()
if line and not line.startswith('//'):
block_lines.append((line_no, line))
if block_lines:
basic_block = {
'id': len(self.basic_blocks),
'start_line': block_lines[0][0],
'end_line': block_lines[-1][0],
'lines': block_lines,
'successors': [],
'predecessors': []
}
self.basic_blocks.append(basic_block)
return self.basic_blocks
def build_control_flow_graph(self):
"""构建控制流图"""
if not self.basic_blocks:
self.identify_basic_blocks()
for i, block in enumerate(self.basic_blocks):
last_line = block['lines'][-1][1] if block['lines'] else ""
# 分析最后一行指令确定后继
if re.search(r'\breturn\b', last_line):
# return语句没有后继
pass
elif re.search(r'\bif\s*\(', last_line):
# if语句有两个后继:条件为真和条件为假
# 简化处理:假设下一个块是条件为真的分支
if i + 1 < len(self.basic_blocks):
self.add_edge(i, i + 1)
# 查找else分支或条件为假的分支
# 这里需要更复杂的解析逻辑
elif re.search(r'\bwhile\s*\(', last_line):
# while循环:回到循环开始,或者退出循环
if i + 1 < len(self.basic_blocks):
self.add_edge(i, i + 1) # 循环体
# 还需要添加回边和退出边
elif re.search(r'\bfor\s*\(', last_line):
# for循环类似while循环
if i + 1 < len(self.basic_blocks):
self.add_edge(i, i + 1)
else:
# 普通语句:顺序执行到下一个基本块
if i + 1 < len(self.basic_blocks):
self.add_edge(i, i + 1)
return self.cfg
def add_edge(self, from_block, to_block):
"""添加控制流边"""
if to_block < len(self.basic_blocks):
self.cfg[from_block].append(to_block)
self.basic_blocks[from_block]['successors'].append(to_block)
self.basic_blocks[to_block]['predecessors'].append(from_block)
def compute_dominators(self):
"""计算支配关系"""
if not self.cfg:
self.build_control_flow_graph()
n = len(self.basic_blocks)
if n == 0:
return self.dominators
# 初始化支配集合
for i in range(n):
if i == 0: # 入口节点
self.dominators[i] = {0}
else:
self.dominators[i] = set(range(n))
# 迭代计算直到收敛
changed = True
while changed:
changed = False
for i in range(1, n): # 跳过入口节点
# Dom(n) = {n} ∪ (∩ Dom(p) for all predecessors p of n)
new_dom = {i}
predecessors = self.basic_blocks[i]['predecessors']
if predecessors:
intersection = set(range(n))
for pred in predecessors:
intersection &= self.dominators[pred]
new_dom |= intersection
if new_dom != self.dominators[i]:
self.dominators[i] = new_dom
changed = True
return self.dominators
def find_loops(self):
"""查找循环"""
if not self.dominators:
self.compute_dominators()
loops = []
# 查找回边(指向支配者的边)
for from_block in range(len(self.basic_blocks)):
for to_block in self.cfg[from_block]:
if to_block in self.dominators[from_block]:
# 找到回边,确定循环
loop = self.find_loop_nodes(to_block, from_block)
if loop:
loops.append({
'header': to_block,
'back_edge': (from_block, to_block),
'nodes': loop
})
return loops
def find_loop_nodes(self, header, back_edge_source):
"""查找循环中的所有节点"""
loop_nodes = {header}
worklist = deque([back_edge_source])
while worklist:
node = worklist.popleft()
if node not in loop_nodes:
loop_nodes.add(node)
# 添加所有前驱节点
for pred in self.basic_blocks[node]['predecessors']:
if pred not in loop_nodes:
worklist.append(pred)
return loop_nodes
def generate_cfg_report(self):
"""生成控制流图分析报告"""
self.build_control_flow_graph()
self.compute_dominators()
loops = self.find_loops()
report = "# Control Flow Analysis Report\n\n"
report += f"## Summary\n\n"
report += f"- **Basic Blocks:** {len(self.basic_blocks)}\n"
report += f"- **Control Flow Edges:** {sum(len(successors) for successors in self.cfg.values())}\n"
report += f"- **Loops:** {len(loops)}\n\n"
# 基本块信息
report += f"## Basic Blocks\n\n"
for block in self.basic_blocks:
report += f"### Block {block['id']}\n"
report += f"- **Lines:** {block['start_line']}-{block['end_line']}\n"
report += f"- **Successors:** {block['successors']}\n"
report += f"- **Predecessors:** {block['predecessors']}\n"
# 显示代码
report += f"- **Code:**\n"
for line_no, line in block['lines'][:3]: # 只显示前3行
report += f" {line_no}: {line}\n"
if len(block['lines']) > 3:
report += f" ... ({len(block['lines']) - 3} more lines)\n"
report += "\n"
# 支配关系
report += f"## Dominance Relations\n\n"
for node, dominators in self.dominators.items():
dom_list = sorted(list(dominators))
report += f"- **Block {node}:** dominated by {dom_list}\n"
report += "\n"
# 循环信息
if loops:
report += f"## Loops\n\n"
for i, loop in enumerate(loops):
report += f"### Loop {i + 1}\n"
report += f"- **Header:** Block {loop['header']}\n"
report += f"- **Back Edge:** Block {loop['back_edge'][0]} → Block {loop['back_edge'][1]}\n"
report += f"- **Nodes:** {sorted(list(loop['nodes']))}\n\n"
return report
# 使用示例
java_code_with_control_flow = """
public void complexMethod(int n) {
int i = 0;
int sum = 0;
if (n > 0) {
while (i < n) {
sum += i;
i++;
if (sum > 100) {
break;
}
}
} else {
sum = -1;
}
for (int j = 0; j < 5; j++) {
System.out.println(j);
}
return;
}
"""
cfg_analyzer = ControlFlowAnalyzer(java_code_with_control_flow)
cfg_report = cfg_analyzer.generate_cfg_report()
print(cfg_report)
6.4 安全漏洞检测
6.4.1 常见Java安全漏洞
import re
from pathlib import Path
class JavaSecurityAnalyzer:
"""Java安全漏洞分析器"""
def __init__(self):
self.vulnerability_patterns = {
'sql_injection': [
r'Statement\s+\w+\s*=.*\.createStatement\(\)',
r'\.executeQuery\s*\(\s*["\'].*\+.*["\']',
r'\.executeUpdate\s*\(\s*["\'].*\+.*["\']',
r'PreparedStatement.*\+.*\)'
],
'xss': [
r'response\.getWriter\(\)\.print\w*\([^)]*request\.getParameter',
r'out\.print\w*\([^)]*request\.getParameter',
r'response\.getWriter\(\)\.print\w*\([^)]*\+[^)]*\)',
],
'path_traversal': [
r'new\s+File\s*\([^)]*request\.getParameter',
r'new\s+FileInputStream\s*\([^)]*request\.getParameter',
r'new\s+FileOutputStream\s*\([^)]*request\.getParameter',
r'Files\.read\w*\([^)]*request\.getParameter'
],
'command_injection': [
r'Runtime\.getRuntime\(\)\.exec\s*\([^)]*request\.getParameter',
r'ProcessBuilder\s*\([^)]*request\.getParameter',
r'\.exec\s*\([^)]*\+[^)]*\)'
],
'weak_crypto': [
r'Cipher\.getInstance\s*\(\s*["\']DES["\']',
r'Cipher\.getInstance\s*\(\s*["\']MD5["\']',
r'MessageDigest\.getInstance\s*\(\s*["\']MD5["\']',
r'new\s+Random\s*\(\)',
r'Math\.random\s*\(\)'
],
'hardcoded_secrets': [
r'password\s*=\s*["\'][^"\']{8,}["\']',
r'secret\s*=\s*["\'][^"\']{8,}["\']',
r'api[_-]?key\s*=\s*["\'][^"\']{8,}["\']',
r'token\s*=\s*["\'][^"\']{8,}["\']'
],
'insecure_random': [
r'new\s+Random\s*\(\s*\)',
r'Math\.random\s*\(\)',
r'System\.currentTimeMillis\s*\(\)'
],
'unsafe_reflection': [
r'Class\.forName\s*\([^)]*request\.getParameter',
r'\.newInstance\s*\(\s*\)',
r'Method\.invoke\s*\([^)]*request\.getParameter'
]
}
self.findings = []
def analyze_file(self, java_file_path):
"""分析单个Java文件"""
file_path = Path(java_file_path)
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
for line_no, line in enumerate(lines, 1):
self.analyze_line(file_path, line_no, line)
except Exception as e:
print(f"Error analyzing {java_file_path}: {e}")
def analyze_line(self, file_path, line_no, line):
"""分析单行代码"""
line = line.strip()
if not line or line.startswith('//'):
return
for vuln_type, patterns in self.vulnerability_patterns.items():
for pattern in patterns:
if re.search(pattern, line, re.IGNORECASE):
finding = {
'file': str(file_path),
'line': line_no,
'vulnerability': vuln_type,
'pattern': pattern,
'code': line,
'severity': self.get_severity(vuln_type),
'description': self.get_description(vuln_type)
}
self.findings.append(finding)
def get_severity(self, vuln_type):
"""获取漏洞严重程度"""
severity_map = {
'sql_injection': 'HIGH',
'xss': 'HIGH',
'path_traversal': 'HIGH',
'command_injection': 'CRITICAL',
'weak_crypto': 'MEDIUM',
'hardcoded_secrets': 'HIGH',
'insecure_random': 'MEDIUM',
'unsafe_reflection': 'HIGH'
}
return severity_map.get(vuln_type, 'MEDIUM')
def get_description(self, vuln_type):
"""获取漏洞描述"""
descriptions = {
'sql_injection': 'Potential SQL injection vulnerability',
'xss': 'Potential Cross-Site Scripting (XSS) vulnerability',
'path_traversal': 'Potential path traversal vulnerability',
'command_injection': 'Potential command injection vulnerability',
'weak_crypto': 'Use of weak cryptographic algorithm',
'hardcoded_secrets': 'Hardcoded secrets or credentials',
'insecure_random': 'Use of insecure random number generation',
'unsafe_reflection': 'Unsafe use of reflection'
}
return descriptions.get(vuln_type, 'Security vulnerability detected')
def analyze_directory(self, directory_path):
"""分析目录中的所有Java文件"""
directory = Path(directory_path)
java_files = list(directory.rglob('*.java'))
print(f"Analyzing {len(java_files)} Java files...")
for java_file in java_files:
self.analyze_file(java_file)
return self.findings
def generate_security_report(self):
"""生成安全分析报告"""
if not self.findings:
return "No security issues found."
# 按严重程度分组
by_severity = {'CRITICAL': [], 'HIGH': [], 'MEDIUM': [], 'LOW': []}
for finding in self.findings:
severity = finding['severity']
by_severity[severity].append(finding)
# 按漏洞类型分组
by_type = {}
for finding in self.findings:
vuln_type = finding['vulnerability']
if vuln_type not in by_type:
by_type[vuln_type] = []
by_type[vuln_type].append(finding)
report = "# Java Security Analysis Report\n\n"
# 摘要
report += "## Summary\n\n"
report += f"- **Total Issues:** {len(self.findings)}\n"
for severity in ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW']:
count = len(by_severity[severity])
if count > 0:
report += f"- **{severity}:** {count}\n"
report += "\n"
# 按严重程度详细列出
for severity in ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW']:
issues = by_severity[severity]
if issues:
report += f"## {severity} Severity Issues\n\n"
for issue in issues:
report += f"### {issue['description']}\n"
report += f"- **File:** {issue['file']}\n"
report += f"- **Line:** {issue['line']}\n"
report += f"- **Type:** {issue['vulnerability']}\n"
report += f"- **Code:** `{issue['code']}`\n\n"
# 按类型统计
report += "## Issues by Type\n\n"
for vuln_type, issues in sorted(by_type.items()):
report += f"- **{vuln_type}:** {len(issues)} issues\n"
return report
def export_findings_json(self, output_file):
"""导出发现的问题到JSON文件"""
import json
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(self.findings, f, indent=2, ensure_ascii=False)
print(f"Findings exported to: {output_file}")
# 使用示例
def analyze_java_security(directory_path):
"""分析Java代码安全性"""
analyzer = JavaSecurityAnalyzer()
findings = analyzer.analyze_directory(directory_path)
if findings:
report = analyzer.generate_security_report()
print(report)
# 导出结果
analyzer.export_findings_json("security_findings.json")
else:
print("No security issues found.")
return analyzer
# 测试用例
vulnerable_java_code = """
public class VulnerableExample {
public void sqlInjection(String userInput) {
String query = "SELECT * FROM users WHERE name = '" + userInput + "'";
Statement stmt = connection.createStatement();
ResultSet rs = stmt.executeQuery(query);
}
public void xssVulnerability(HttpServletRequest request, HttpServletResponse response) {
String userInput = request.getParameter("input");
response.getWriter().print("Hello " + userInput);
}
public void pathTraversal(HttpServletRequest request) {
String filename = request.getParameter("file");
File file = new File("/uploads/" + filename);
}
public void weakCrypto() {
Cipher cipher = Cipher.getInstance("DES");
MessageDigest md = MessageDigest.getInstance("MD5");
Random random = new Random();
}
public void hardcodedSecrets() {
String password = "admin123456";
String apiKey = "sk-1234567890abcdef";
}
}
"""
# 创建测试文件并分析
with open("VulnerableExample.java", "w") as f:
f.write(vulnerable_java_code)
analyzer = JavaSecurityAnalyzer()
analyzer.analyze_file("VulnerableExample.java")
report = analyzer.generate_security_report()
print(report)
6.4.2 自动化安全扫描
#!/usr/bin/env python3
"""
自动化Java安全扫描工具
"""
import os
import json
import argparse
import subprocess
from pathlib import Path
from datetime import datetime
class AutomatedSecurityScanner:
"""自动化安全扫描器"""
def __init__(self, target_path, output_dir):
self.target_path = Path(target_path)
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.scan_results = {
'timestamp': datetime.now().isoformat(),
'target': str(self.target_path),
'tools': {},
'summary': {}
}
def run_spotbugs(self):
"""运行SpotBugs静态分析"""
print("Running SpotBugs...")
try:
# 查找class文件或jar文件
class_files = list(self.target_path.rglob('*.class'))
jar_files = list(self.target_path.rglob('*.jar'))
if not class_files and not jar_files:
print("No .class or .jar files found for SpotBugs analysis")
return
# 运行SpotBugs
output_file = self.output_dir / "spotbugs_results.xml"
if jar_files:
target = str(jar_files[0]) # 分析第一个JAR文件
else:
target = str(self.target_path)
cmd = [
"spotbugs",
"-textui",
"-xml:withMessages",
"-output", str(output_file),
target
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
if result.returncode == 0:
self.scan_results['tools']['spotbugs'] = {
'status': 'success',
'output_file': str(output_file),
'issues_found': self.parse_spotbugs_results(output_file)
}
print(f"SpotBugs completed: {output_file}")
else:
self.scan_results['tools']['spotbugs'] = {
'status': 'error',
'error': result.stderr
}
print(f"SpotBugs error: {result.stderr}")
except subprocess.TimeoutExpired:
print("SpotBugs timeout")
except FileNotFoundError:
print("SpotBugs not found, skipping")
def run_pmd(self):
"""运行PMD静态分析"""
print("Running PMD...")
try:
java_files = list(self.target_path.rglob('*.java'))
if not java_files:
print("No .java files found for PMD analysis")
return
output_file = self.output_dir / "pmd_results.xml"
cmd = [
"pmd",
"-d", str(self.target_path),
"-f", "xml",
"-r", str(output_file),
"-R", "category/java/security.xml"
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
self.scan_results['tools']['pmd'] = {
'status': 'success' if result.returncode == 0 else 'completed_with_issues',
'output_file': str(output_file),
'issues_found': self.parse_pmd_results(output_file)
}
print(f"PMD completed: {output_file}")
except subprocess.TimeoutExpired:
print("PMD timeout")
except FileNotFoundError:
print("PMD not found, skipping")
def run_custom_scanner(self):
"""运行自定义安全扫描器"""
print("Running custom security scanner...")
analyzer = JavaSecurityAnalyzer()
findings = analyzer.analyze_directory(self.target_path)
# 保存结果
output_file = self.output_dir / "custom_scan_results.json"
analyzer.export_findings_json(output_file)
self.scan_results['tools']['custom_scanner'] = {
'status': 'success',
'output_file': str(output_file),
'issues_found': len(findings)
}
print(f"Custom scanner completed: {len(findings)} issues found")
def parse_spotbugs_results(self, xml_file):
"""解析SpotBugs XML结果"""
try:
import xml.etree.ElementTree as ET
tree = ET.parse(xml_file)
root = tree.getroot()
bugs = root.findall('.//BugInstance')
return len(bugs)
except Exception as e:
print(f"Error parsing SpotBugs results: {e}")
return 0
def parse_pmd_results(self, xml_file):
"""解析PMD XML结果"""
try:
import xml.etree.ElementTree as ET
tree = ET.parse(xml_file)
root = tree.getroot()
violations = root.findall('.//violation')
return len(violations)
except Exception as e:
print(f"Error parsing PMD results: {e}")
return 0
def run_dependency_check(self):
"""运行依赖漏洞检查"""
print("Running OWASP Dependency Check...")
try:
output_dir = self.output_dir / "dependency-check"
output_dir.mkdir(exist_ok=True)
cmd = [
"dependency-check",
"--project", "Security Scan",
"--scan", str(self.target_path),
"--out", str(output_dir),
"--format", "JSON"
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
if result.returncode == 0:
# 查找生成的JSON报告
json_reports = list(output_dir.glob("*.json"))
if json_reports:
vulnerabilities = self.parse_dependency_check_results(json_reports[0])
self.scan_results['tools']['dependency_check'] = {
'status': 'success',
'output_file': str(json_reports[0]),
'vulnerabilities_found': vulnerabilities
}
print(f"Dependency Check completed: {vulnerabilities} vulnerabilities found")
else:
self.scan_results['tools']['dependency_check'] = {
'status': 'error',
'error': result.stderr
}
except subprocess.TimeoutExpired:
print("Dependency Check timeout")
except FileNotFoundError:
print("OWASP Dependency Check not found, skipping")
def parse_dependency_check_results(self, json_file):
"""解析Dependency Check JSON结果"""
try:
with open(json_file, 'r') as f:
data = json.load(f)
total_vulnerabilities = 0
if 'dependencies' in data:
for dep in data['dependencies']:
if 'vulnerabilities' in dep:
total_vulnerabilities += len(dep['vulnerabilities'])
return total_vulnerabilities
except Exception as e:
print(f"Error parsing Dependency Check results: {e}")
return 0
def generate_consolidated_report(self):
"""生成综合报告"""
print("Generating consolidated report...")
# 计算总体统计
total_issues = 0
tools_used = 0
for tool_name, tool_result in self.scan_results['tools'].items():
if tool_result['status'] in ['success', 'completed_with_issues']:
tools_used += 1
if 'issues_found' in tool_result:
total_issues += tool_result['issues_found']
elif 'vulnerabilities_found' in tool_result:
total_issues += tool_result['vulnerabilities_found']
self.scan_results['summary'] = {
'tools_used': tools_used,
'total_issues': total_issues,
'scan_duration': 'N/A' # 可以添加时间跟踪
}
# 生成Markdown报告
report_file = self.output_dir / "security_scan_report.md"
with open(report_file, 'w', encoding='utf-8') as f:
f.write("# Automated Security Scan Report\n\n")
f.write(f"**Target:** {self.scan_results['target']}\n")
f.write(f"**Scan Date:** {self.scan_results['timestamp']}\n")
f.write(f"**Tools Used:** {self.scan_results['summary']['tools_used']}\n")
f.write(f"**Total Issues:** {self.scan_results['summary']['total_issues']}\n\n")
f.write("## Tool Results\n\n")
for tool_name, tool_result in self.scan_results['tools'].items():
f.write(f"### {tool_name.upper()}\n")
f.write(f"- **Status:** {tool_result['status']}\n")
if 'output_file' in tool_result:
f.write(f"- **Output File:** {tool_result['output_file']}\n")
if 'issues_found' in tool_result:
f.write(f"- **Issues Found:** {tool_result['issues_found']}\n")
elif 'vulnerabilities_found' in tool_result:
f.write(f"- **Vulnerabilities Found:** {tool_result['vulnerabilities_found']}\n")
if 'error' in tool_result:
f.write(f"- **Error:** {tool_result['error']}\n")
f.write("\n")
# 保存JSON结果
json_file = self.output_dir / "scan_results.json"
with open(json_file, 'w', encoding='utf-8') as f:
json.dump(self.scan_results, f, indent=2, ensure_ascii=False)
print(f"Consolidated report generated: {report_file}")
print(f"JSON results saved: {json_file}")
return report_file
def run_full_scan(self):
"""运行完整安全扫描"""
print(f"Starting automated security scan of: {self.target_path}")
print(f"Output directory: {self.output_dir}")
# 运行各种扫描工具
self.run_custom_scanner()
self.run_spotbugs()
self.run_pmd()
self.run_dependency_check()
# 生成综合报告
report_file = self.generate_consolidated_report()
print(f"\nScan completed! Report available at: {report_file}")
return self.scan_results
def main():
parser = argparse.ArgumentParser(description='Automated Java Security Scanner')
parser.add_argument('target', help='Target directory or file to scan')
parser.add_argument('-o', '--output', required=True, help='Output directory for results')
parser.add_argument('--tools', nargs='+',
choices=['custom', 'spotbugs', 'pmd', 'dependency-check'],
default=['custom', 'spotbugs', 'pmd', 'dependency-check'],
help='Tools to run')
args = parser.parse_args()
if not Path(args.target).exists():
print(f"Error: Target path not found: {args.target}")
return 1
scanner = AutomatedSecurityScanner(args.target, args.output)
results = scanner.run_full_scan()
# 显示摘要
print(f"\n=== Scan Summary ===")
print(f"Tools used: {results['summary']['tools_used']}")
print(f"Total issues: {results['summary']['total_issues']}")
return 0
if __name__ == "__main__":
exit(main())
6.5 本章小结
本章深入介绍了Java层静态分析技术:
- Java字节码基础:理解Java和DEX字节码格式
- 代码反编译:掌握各种反编译工具的使用
- 静态代码分析:学习AST分析、数据流分析、控制流分析
- 安全漏洞检测:识别常见Java安全漏洞和自动化扫描
Java层静态分析是安卓逆向工程的核心技能,通过这些技术可以深入理解应用的业务逻辑、发现安全漏洞、分析恶意行为。
学习检查清单:
- 理解Java字节码和DEX格式的区别
- 熟练使用Jadx等反编译工具
- 掌握AST分析技术
- 了解数据流和控制流分析原理
- 能够识别常见的Java安全漏洞
- 掌握自动化安全扫描技术
- 能够编写自定义静态分析工具
下一章预告:
在下一章中,我们将学习Native层分析技术,包括SO库的结构分析、ARM汇编代码分析、Native代码的逆向技术等。

被折叠的 条评论
为什么被折叠?



