第5章:APK结构分析与解包
5.1 APK文件格式深度解析
5.1.1 APK文件本质
APK(Android Package)文件本质上是一个ZIP压缩包,遵循ZIP文件格式规范。理解APK的内部结构是进行安卓逆向分析的基础。
graph TB
subgraph APK[APK文件结构]
subgraph Root[根目录]
Manifest[AndroidManifest.xml]
Classes[classes.dex]
Resources[resources.arsc]
ResDir[res/]
AssetsDir[assets/]
LibDir[lib/]
MetaDir[META-INF/]
end
subgraph ResContent[res/目录内容]
Drawable[drawable/]
Layout[layout/]
Values[values/]
Raw[raw/]
end
subgraph MetaContent[META-INF/内容]
Manifest2[MANIFEST.MF]
CertSF[CERT.SF]
CertRSA[CERT.RSA]
end
subgraph LibContent[lib/目录内容]
ARM64[arm64-v8a/]
ARM32[armeabi-v7a/]
X86[x86/]
X86_64[x86_64/]
end
end
ResDir --> ResContent
MetaDir --> MetaContent
LibDir --> LibContent
5.1.2 ZIP文件格式分析
ZIP文件结构:
import struct
import zipfile
from pathlib import Path
class APKAnalyzer:
def __init__(self, apk_path):
self.apk_path = Path(apk_path)
self.zip_file = zipfile.ZipFile(apk_path, 'r')
def analyze_zip_structure(self):
"""分析ZIP文件结构"""
print("=== ZIP File Structure Analysis ===")
# 获取文件列表
file_list = self.zip_file.filelist
print(f"Total files: {len(file_list)}")
# 分析每个文件
for file_info in file_list:
print(f"\nFile: {file_info.filename}")
print(f" Compressed size: {file_info.compress_size}")
print(f" Uncompressed size: {file_info.file_size}")
print(f" Compression type: {file_info.compress_type}")
print(f" CRC32: 0x{file_info.CRC:08x}")
print(f" Date/Time: {file_info.date_time}")
return file_list
def extract_file_signatures(self):
"""提取文件签名"""
signatures = {}
for file_info in self.zip_file.filelist:
if file_info.file_size > 0:
data = self.zip_file.read(file_info.filename)
# 读取文件头部字节作为签名
signature = data[:16] if len(data) >= 16 else data
signatures[file_info.filename] = signature.hex()
return signatures
def analyze_compression_ratios(self):
"""分析压缩比"""
ratios = {}
for file_info in self.zip_file.filelist:
if file_info.file_size > 0:
ratio = file_info.compress_size / file_info.file_size
ratios[file_info.filename] = ratio
# 按压缩比排序
sorted_ratios = sorted(ratios.items(), key=lambda x: x[1])
print("\n=== Compression Ratios ===")
for filename, ratio in sorted_ratios[:10]: # 显示压缩比最低的10个文件
print(f"{filename}: {ratio:.3f}")
return ratios
# 使用示例
analyzer = APKAnalyzer("example.apk")
file_list = analyzer.analyze_zip_structure()
signatures = analyzer.extract_file_signatures()
ratios = analyzer.analyze_compression_ratios()
5.1.3 APK文件组成详解
核心文件分析:
class APKComponentAnalyzer:
def __init__(self, apk_path):
self.apk_path = apk_path
self.zip_file = zipfile.ZipFile(apk_path, 'r')
def analyze_manifest(self):
"""分析AndroidManifest.xml"""
try:
manifest_data = self.zip_file.read('AndroidManifest.xml')
print("=== AndroidManifest.xml Analysis ===")
print(f"Size: {len(manifest_data)} bytes")
# 检查是否为二进制XML
if manifest_data.startswith(b'\x03\x00\x08\x00'):
print("Format: Binary XML (AXML)")
return self.parse_binary_xml(manifest_data)
else:
print("Format: Plain text XML")
return manifest_data.decode('utf-8')
except KeyError:
print("AndroidManifest.xml not found")
return None
def analyze_dex_files(self):
"""分析DEX文件"""
dex_files = []
for file_info in self.zip_file.filelist:
if file_info.filename.endswith('.dex'):
dex_data = self.zip_file.read(file_info.filename)
dex_info = self.parse_dex_header(dex_data)
dex_files.append({
'filename': file_info.filename,
'size': len(dex_data),
'header_info': dex_info
})
print(f"\n=== DEX Files Analysis ===")
print(f"Found {len(dex_files)} DEX files")
for dex in dex_files:
print(f"\nDEX File: {dex['filename']}")
print(f" Size: {dex['size']} bytes")
if dex['header_info']:
header = dex['header_info']
print(f" Classes: {header.get('class_defs_size', 'Unknown')}")
print(f" Methods: {header.get('method_ids_size', 'Unknown')}")
print(f" Strings: {header.get('string_ids_size', 'Unknown')}")
return dex_files
def parse_dex_header(self, dex_data):
"""解析DEX文件头"""
if len(dex_data) < 112: # DEX头部最小长度
return None
try:
# DEX文件头结构
header = struct.unpack('<8s I I I I I I I I I I I I I I I I I I I I I I I I I I I',
dex_data[:112])
return {
'magic': header[0],
'checksum': header[1],
'file_size': header[2],
'header_size': header[3],
'string_ids_size': header[8],
'type_ids_size': header[9],
'proto_ids_size': header[10],
'field_ids_size': header[11],
'method_ids_size': header[12],
'class_defs_size': header[13]
}
except struct.error:
return None
def analyze_resources(self):
"""分析资源文件"""
resource_info = {
'resources_arsc': None,
'res_files': [],
'assets_files': []
}
# 分析resources.arsc
try:
arsc_data = self.zip_file.read('resources.arsc')
resource_info['resources_arsc'] = {
'size': len(arsc_data),
'signature': arsc_data[:4].hex()
}
except KeyError:
pass
# 分析res/目录
for file_info in self.zip_file.filelist:
if file_info.filename.startswith('res/'):
resource_info['res_files'].append({
'path': file_info.filename,
'size': file_info.file_size,
'compressed_size': file_info.compress_size
})
elif file_info.filename.startswith('assets/'):
resource_info['assets_files'].append({
'path': file_info.filename,
'size': file_info.file_size,
'compressed_size': file_info.compress_size
})
print(f"\n=== Resources Analysis ===")
if resource_info['resources_arsc']:
print(f"resources.arsc: {resource_info['resources_arsc']['size']} bytes")
print(f"res/ files: {len(resource_info['res_files'])}")
print(f"assets/ files: {len(resource_info['assets_files'])}")
return resource_info
def analyze_native_libraries(self):
"""分析Native库"""
lib_info = {}
for file_info in self.zip_file.filelist:
if file_info.filename.startswith('lib/'):
parts = file_info.filename.split('/')
if len(parts) >= 3: # lib/arch/library.so
arch = parts[1]
lib_name = parts[2]
if arch not in lib_info:
lib_info[arch] = []
# 分析SO文件
so_data = self.zip_file.read(file_info.filename)
lib_info[arch].append({
'name': lib_name,
'size': len(so_data),
'elf_header': self.parse_elf_header(so_data)
})
print(f"\n=== Native Libraries Analysis ===")
for arch, libs in lib_info.items():
print(f"Architecture: {arch}")
for lib in libs:
print(f" {lib['name']}: {lib['size']} bytes")
if lib['elf_header']:
print(f" ELF Type: {lib['elf_header'].get('type', 'Unknown')}")
return lib_info
def parse_elf_header(self, elf_data):
"""解析ELF文件头"""
if len(elf_data) < 52: # ELF头部最小长度
return None
try:
# ELF文件头魔数检查
if elf_data[:4] != b'\x7fELF':
return None
# 解析ELF头部
header = struct.unpack('<16B H H I I I I I H H H H H H', elf_data[:52])
return {
'class': header[4], # 32位或64位
'data': header[5], # 字节序
'type': header[16], # 文件类型
'machine': header[17] # 机器类型
}
except struct.error:
return None
5.2 APK解包技术
5.2.1 基础解包方法
使用标准ZIP工具:
# 使用unzip解包APK
unzip -l example.apk # 列出文件
unzip example.apk -d extracted/ # 解包到目录
unzip -j example.apk classes.dex # 只提取特定文件
# 使用7zip解包
7z l example.apk # 列出文件
7z x example.apk -oextracted/ # 解包到目录
# 查看文件结构
tree extracted/
Python解包实现:
import zipfile
import os
from pathlib import Path
class APKExtractor:
def __init__(self, apk_path, output_dir):
self.apk_path = Path(apk_path)
self.output_dir = Path(output_dir)
self.zip_file = zipfile.ZipFile(apk_path, 'r')
def extract_all(self):
"""提取所有文件"""
print(f"Extracting {self.apk_path} to {self.output_dir}")
# 创建输出目录
self.output_dir.mkdir(parents=True, exist_ok=True)
# 提取所有文件
self.zip_file.extractall(self.output_dir)
print("Extraction completed")
def extract_selective(self, file_patterns):
"""选择性提取文件"""
import fnmatch
extracted_files = []
for file_info in self.zip_file.filelist:
for pattern in file_patterns:
if fnmatch.fnmatch(file_info.filename, pattern):
# 提取匹配的文件
self.zip_file.extract(file_info.filename, self.output_dir)
extracted_files.append(file_info.filename)
break
print(f"Extracted {len(extracted_files)} files")
return extracted_files
def extract_with_metadata(self):
"""提取文件并保留元数据"""
metadata = {}
for file_info in self.zip_file.filelist:
# 提取文件
self.zip_file.extract(file_info.filename, self.output_dir)
# 保存元数据
metadata[file_info.filename] = {
'original_size': file_info.file_size,
'compressed_size': file_info.compress_size,
'crc32': file_info.CRC,
'date_time': file_info.date_time,
'compress_type': file_info.compress_type
}
# 保存元数据到JSON文件
import json
with open(self.output_dir / 'extraction_metadata.json', 'w') as f:
json.dump(metadata, f, indent=2, default=str)
return metadata
# 使用示例
extractor = APKExtractor("example.apk", "extracted")
extractor.extract_all()
# 选择性提取
patterns = ["*.dex", "AndroidManifest.xml", "lib/*/*.so"]
extractor.extract_selective(patterns)
5.2.2 APKTool深度使用
APKTool高级参数:
# 基本反编译
apktool d example.apk
# 指定输出目录
apktool d example.apk -o output_directory
# 不解码资源文件
apktool d example.apk --no-res
# 不解码源码
apktool d example.apk --no-src
# 保持原始文件
apktool d example.apk --keep-broken-res
# 指定框架文件
apktool d example.apk -p framework_directory
# 强制覆盖
apktool d example.apk -f
# 详细输出
apktool d example.apk -v
# 使用指定的aapt版本
apktool d example.apk --use-aapt2
APKTool配置文件:
# apktool.yml示例
version: "2.6.1"
apkFileName: example.apk
isFrameworkApk: false
usesFramework:
ids:
- 1
packageInfo:
forcedPackageId: "127"
versionInfo:
versionCode: "1"
versionName: "1.0"
compressionType: false
sharedLibrary: false
sparseResources: false
unknownFiles: {}
doNotCompress:
- "resources.arsc"
5.2.3 自定义解包工具
完整的APK解包器:
import zipfile
import struct
import xml.etree.ElementTree as ET
from pathlib import Path
import json
import hashlib
class AdvancedAPKExtractor:
def __init__(self, apk_path):
self.apk_path = Path(apk_path)
self.zip_file = zipfile.ZipFile(apk_path, 'r')
self.analysis_results = {}
def full_extraction_analysis(self, output_dir):
"""完整的提取和分析"""
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
print("Starting comprehensive APK analysis...")
# 1. 基本信息分析
self.analyze_basic_info()
# 2. 文件结构分析
self.analyze_file_structure()
# 3. 提取并分析各组件
self.extract_and_analyze_manifest(output_path)
self.extract_and_analyze_dex(output_path)
self.extract_and_analyze_resources(output_path)
self.extract_and_analyze_native_libs(output_path)
self.extract_and_analyze_certificates(output_path)
# 4. 生成分析报告
self.generate_analysis_report(output_path)
print("Analysis completed!")
return self.analysis_results
def analyze_basic_info(self):
"""分析基本信息"""
stat = self.apk_path.stat()
# 计算文件哈希
with open(self.apk_path, 'rb') as f:
content = f.read()
md5_hash = hashlib.md5(content).hexdigest()
sha1_hash = hashlib.sha1(content).hexdigest()
sha256_hash = hashlib.sha256(content).hexdigest()
self.analysis_results['basic_info'] = {
'filename': self.apk_path.name,
'size': stat.st_size,
'md5': md5_hash,
'sha1': sha1_hash,
'sha256': sha256_hash,
'creation_time': stat.st_ctime,
'modification_time': stat.st_mtime
}
def analyze_file_structure(self):
"""分析文件结构"""
file_structure = {
'total_files': len(self.zip_file.filelist),
'directories': set(),
'file_types': {},
'size_distribution': {
'total_uncompressed': 0,
'total_compressed': 0,
'largest_files': []
}
}
files_by_size = []
for file_info in self.zip_file.filelist:
# 目录分析
if '/' in file_info.filename:
dir_path = '/'.join(file_info.filename.split('/')[:-1])
file_structure['directories'].add(dir_path)
# 文件类型分析
if '.' in file_info.filename:
ext = file_info.filename.split('.')[-1].lower()
file_structure['file_types'][ext] = file_structure['file_types'].get(ext, 0) + 1
# 大小统计
file_structure['size_distribution']['total_uncompressed'] += file_info.file_size
file_structure['size_distribution']['total_compressed'] += file_info.compress_size
files_by_size.append((file_info.filename, file_info.file_size))
# 最大的10个文件
files_by_size.sort(key=lambda x: x[1], reverse=True)
file_structure['size_distribution']['largest_files'] = files_by_size[:10]
# 压缩比
if file_structure['size_distribution']['total_uncompressed'] > 0:
compression_ratio = file_structure['size_distribution']['total_compressed'] / file_structure['size_distribution']['total_uncompressed']
file_structure['size_distribution']['compression_ratio'] = compression_ratio
self.analysis_results['file_structure'] = file_structure
def extract_and_analyze_manifest(self, output_path):
"""提取和分析AndroidManifest.xml"""
try:
manifest_data = self.zip_file.read('AndroidManifest.xml')
# 保存原始文件
with open(output_path / 'AndroidManifest.xml', 'wb') as f:
f.write(manifest_data)
# 分析清单文件
manifest_analysis = {
'size': len(manifest_data),
'format': 'binary' if manifest_data.startswith(b'\x03\x00\x08\x00') else 'text',
'encoding': 'unknown'
}
# 如果是二进制XML,尝试解析
if manifest_analysis['format'] == 'binary':
try:
# 这里需要使用专门的AXML解析器
# 简化实现,实际应该使用aapt或专门的库
manifest_analysis['note'] = 'Binary XML format, requires specialized parser'
except Exception as e:
manifest_analysis['parse_error'] = str(e)
self.analysis_results['manifest'] = manifest_analysis
except KeyError:
self.analysis_results['manifest'] = {'error': 'AndroidManifest.xml not found'}
def extract_and_analyze_dex(self, output_path):
"""提取和分析DEX文件"""
dex_analysis = {'files': []}
for file_info in self.zip_file.filelist:
if file_info.filename.endswith('.dex'):
dex_data = self.zip_file.read(file_info.filename)
# 保存DEX文件
with open(output_path / file_info.filename, 'wb') as f:
f.write(dex_data)
# 分析DEX文件
dex_info = self.analyze_dex_file(dex_data)
dex_info['filename'] = file_info.filename
dex_info['size'] = len(dex_data)
dex_analysis['files'].append(dex_info)
self.analysis_results['dex'] = dex_analysis
def analyze_dex_file(self, dex_data):
"""详细分析DEX文件"""
if len(dex_data) < 112:
return {'error': 'Invalid DEX file size'}
try:
# 解析DEX头部
magic = dex_data[:8]
if magic[:4] != b'dex\n':
return {'error': 'Invalid DEX magic'}
# 提取版本信息
version = magic[4:7].decode('ascii')
# 解析头部结构
header_data = struct.unpack('<I I I I I I I I I I I I I I I I I I I I I I I I I I I I',
dex_data[8:112])
return {
'version': version,
'checksum': header_data[0],
'file_size': header_data[1],
'header_size': header_data[2],
'endian_tag': header_data[3],
'string_ids_size': header_data[7],
'type_ids_size': header_data[8],
'proto_ids_size': header_data[9],
'field_ids_size': header_data[10],
'method_ids_size': header_data[11],
'class_defs_size': header_data[12],
'data_size': header_data[13],
'data_off': header_data[14]
}
except Exception as e:
return {'error': f'DEX parsing error: {str(e)}'}
def extract_and_analyze_resources(self, output_path):
"""提取和分析资源文件"""
resources_dir = output_path / 'resources'
resources_dir.mkdir(exist_ok=True)
resource_analysis = {
'resources_arsc': None,
'res_files': [],
'assets_files': []
}
# 提取resources.arsc
try:
arsc_data = self.zip_file.read('resources.arsc')
with open(resources_dir / 'resources.arsc', 'wb') as f:
f.write(arsc_data)
resource_analysis['resources_arsc'] = {
'size': len(arsc_data),
'signature': arsc_data[:4].hex()
}
except KeyError:
pass
# 提取res/和assets/目录
for file_info in self.zip_file.filelist:
if file_info.filename.startswith('res/') or file_info.filename.startswith('assets/'):
# 创建目录结构
file_path = resources_dir / file_info.filename
file_path.parent.mkdir(parents=True, exist_ok=True)
# 提取文件
with open(file_path, 'wb') as f:
f.write(self.zip_file.read(file_info.filename))
# 记录文件信息
file_record = {
'path': file_info.filename,
'size': file_info.file_size,
'compressed_size': file_info.compress_size
}
if file_info.filename.startswith('res/'):
resource_analysis['res_files'].append(file_record)
else:
resource_analysis['assets_files'].append(file_record)
self.analysis_results['resources'] = resource_analysis
def extract_and_analyze_native_libs(self, output_path):
"""提取和分析Native库"""
lib_dir = output_path / 'lib'
lib_analysis = {}
for file_info in self.zip_file.filelist:
if file_info.filename.startswith('lib/') and file_info.filename.endswith('.so'):
# 创建目录结构
file_path = lib_dir / file_info.filename[4:] # 去掉'lib/'前缀
file_path.parent.mkdir(parents=True, exist_ok=True)
# 提取SO文件
so_data = self.zip_file.read(file_info.filename)
with open(file_path, 'wb') as f:
f.write(so_data)
# 分析SO文件
arch = file_info.filename.split('/')[1]
lib_name = file_info.filename.split('/')[-1]
if arch not in lib_analysis:
lib_analysis[arch] = []
lib_info = {
'name': lib_name,
'size': len(so_data),
'path': file_info.filename
}
# 分析ELF头部
elf_info = self.analyze_elf_file(so_data)
if elf_info:
lib_info['elf_info'] = elf_info
lib_analysis[arch].append(lib_info)
self.analysis_results['native_libs'] = lib_analysis
def analyze_elf_file(self, elf_data):
"""分析ELF文件"""
if len(elf_data) < 52 or elf_data[:4] != b'\x7fELF':
return None
try:
# ELF头部解析
ei_class = elf_data[4] # 32位或64位
ei_data = elf_data[5] # 字节序
ei_version = elf_data[6] # ELF版本
# 解析ELF头部结构(32位)
if ei_class == 1: # 32位
header = struct.unpack('<H H I I I I I H H H H H H', elf_data[16:52])
else: # 64位
if len(elf_data) < 64:
return None
header = struct.unpack('<H H I Q Q Q I H H H H H H', elf_data[16:64])
return {
'class': '64-bit' if ei_class == 2 else '32-bit',
'endianness': 'little' if ei_data == 1 else 'big',
'version': ei_version,
'type': header[0],
'machine': header[1],
'entry_point': header[3] if ei_class == 1 else header[3]
}
except Exception:
return None
def extract_and_analyze_certificates(self, output_path):
"""提取和分析证书"""
cert_dir = output_path / 'META-INF'
cert_analysis = {}
for file_info in self.zip_file.filelist:
if file_info.filename.startswith('META-INF/'):
# 创建目录
cert_dir.mkdir(exist_ok=True)
# 提取文件
file_path = cert_dir / file_info.filename[9:] # 去掉'META-INF/'前缀
with open(file_path, 'wb') as f:
f.write(self.zip_file.read(file_info.filename))
# 分析证书文件
if file_info.filename.endswith('.RSA') or file_info.filename.endswith('.DSA'):
cert_data = self.zip_file.read(file_info.filename)
cert_analysis[file_info.filename] = {
'size': len(cert_data),
'type': 'certificate',
'format': file_info.filename.split('.')[-1]
}
elif file_info.filename.endswith('.SF'):
sf_data = self.zip_file.read(file_info.filename)
cert_analysis[file_info.filename] = {
'size': len(sf_data),
'type': 'signature_file'
}
elif file_info.filename.endswith('MANIFEST.MF'):
mf_data = self.zip_file.read(file_info.filename)
cert_analysis[file_info.filename] = {
'size': len(mf_data),
'type': 'manifest'
}
self.analysis_results['certificates'] = cert_analysis
def generate_analysis_report(self, output_path):
"""生成分析报告"""
report_path = output_path / 'analysis_report.json'
with open(report_path, 'w', encoding='utf-8') as f:
json.dump(self.analysis_results, f, indent=2, ensure_ascii=False, default=str)
# 生成Markdown报告
self.generate_markdown_report(output_path)
def generate_markdown_report(self, output_path):
"""生成Markdown格式的报告"""
report_path = output_path / 'analysis_report.md'
with open(report_path, 'w', encoding='utf-8') as f:
f.write(f"# APK Analysis Report\n\n")
f.write(f"**File:** {self.analysis_results['basic_info']['filename']}\n")
f.write(f"**Size:** {self.analysis_results['basic_info']['size']:,} bytes\n")
f.write(f"**MD5:** {self.analysis_results['basic_info']['md5']}\n")
f.write(f"**SHA256:** {self.analysis_results['basic_info']['sha256']}\n\n")
# 文件结构
f.write("## File Structure\n\n")
fs = self.analysis_results['file_structure']
f.write(f"- **Total Files:** {fs['total_files']}\n")
f.write(f"- **Directories:** {len(fs['directories'])}\n")
f.write(f"- **Compression Ratio:** {fs['size_distribution'].get('compression_ratio', 'N/A'):.2%}\n\n")
# DEX文件
if 'dex' in self.analysis_results:
f.write("## DEX Files\n\n")
for dex in self.analysis_results['dex']['files']:
f.write(f"### {dex['filename']}\n")
f.write(f"- **Size:** {dex['size']:,} bytes\n")
if 'version' in dex:
f.write(f"- **Version:** {dex['version']}\n")
f.write(f"- **Classes:** {dex.get('class_defs_size', 'Unknown')}\n")
f.write(f"- **Methods:** {dex.get('method_ids_size', 'Unknown')}\n")
f.write(f"- **Strings:** {dex.get('string_ids_size', 'Unknown')}\n")
f.write("\n")
# Native库
if 'native_libs' in self.analysis_results:
f.write("## Native Libraries\n\n")
for arch, libs in self.analysis_results['native_libs'].items():
f.write(f"### {arch}\n")
for lib in libs:
f.write(f"- **{lib['name']}:** {lib['size']:,} bytes\n")
if 'elf_info' in lib:
elf = lib['elf_info']
f.write(f" - Class: {elf.get('class', 'Unknown')}\n")
f.write(f" - Endianness: {elf.get('endianness', 'Unknown')}\n")
f.write("\n")
# 使用示例
if __name__ == "__main__":
extractor = AdvancedAPKExtractor("example.apk")
results = extractor.full_extraction_analysis("extracted_analysis")
5.3 二进制XML解析
5.3.1 AXML格式分析
Android使用二进制XML格式(AXML)来压缩和优化XML文件:
import struct
from enum import IntEnum
class AXMLParser:
"""Android Binary XML Parser"""
class ChunkType(IntEnum):
RES_NULL_TYPE = 0x0000
RES_STRING_POOL_TYPE = 0x0001
RES_TABLE_TYPE = 0x0002
RES_XML_TYPE = 0x0003
RES_XML_FIRST_CHUNK_TYPE = 0x0100
RES_XML_START_NAMESPACE_TYPE = 0x0100
RES_XML_END_NAMESPACE_TYPE = 0x0101
RES_XML_START_ELEMENT_TYPE = 0x0102
RES_XML_END_ELEMENT_TYPE = 0x0103
RES_XML_CDATA_TYPE = 0x0104
RES_XML_LAST_CHUNK_TYPE = 0x017f
RES_XML_RESOURCE_MAP_TYPE = 0x0180
def __init__(self, axml_data):
self.data = axml_data
self.offset = 0
self.strings = []
self.resources = []
self.namespaces = {}
def parse(self):
"""解析AXML文件"""
# 检查文件头
if len(self.data) < 8:
raise ValueError("Invalid AXML file: too short")
# 读取文件头
chunk_type, header_size, chunk_size = struct.unpack('<H H I', self.data[:8])
if chunk_type != self.ChunkType.RES_XML_TYPE:
raise ValueError(f"Invalid AXML file: wrong chunk type {chunk_type}")
self.offset = 8
# 解析各个chunk
while self.offset < len(self.data):
chunk_type, header_size, chunk_size = self.read_chunk_header()
if chunk_type == self.ChunkType.RES_STRING_POOL_TYPE:
self.parse_string_pool(chunk_size)
elif chunk_type == self.ChunkType.RES_XML_RESOURCE_MAP_TYPE:
self.parse_resource_map(chunk_size)
elif chunk_type == self.ChunkType.RES_XML_START_NAMESPACE_TYPE:
self.parse_start_namespace()
elif chunk_type == self.ChunkType.RES_XML_END_NAMESPACE_TYPE:
self.parse_end_namespace()
elif chunk_type == self.ChunkType.RES_XML_START_ELEMENT_TYPE:
self.parse_start_element()
elif chunk_type == self.ChunkType.RES_XML_END_ELEMENT_TYPE:
self.parse_end_element()
elif chunk_type == self.ChunkType.RES_XML_CDATA_TYPE:
self.parse_cdata()
else:
# 跳过未知chunk
self.offset += chunk_size - 8
def read_chunk_header(self):
"""读取chunk头部"""
if self.offset + 8 > len(self.data):
raise ValueError("Unexpected end of file")
chunk_type, header_size, chunk_size = struct.unpack('<H H I',
self.data[self.offset:self.offset+8])
return chunk_type, header_size, chunk_size
def parse_string_pool(self, chunk_size):
"""解析字符串池"""
start_offset = self.offset
# 读取字符串池头部
string_count, style_count, flags, strings_start, styles_start = struct.unpack(
'<I I I I I', self.data[self.offset+8:self.offset+28])
self.offset += 28
# 读取字符串偏移表
string_offsets = []
for i in range(string_count):
offset = struct.unpack('<I', self.data[self.offset:self.offset+4])[0]
string_offsets.append(offset)
self.offset += 4
# 读取字符串
strings_base = start_offset + strings_start
for offset in string_offsets:
string_pos = strings_base + offset
if flags & 0x100: # UTF-8编码
# 读取UTF-8字符串
str_len = struct.unpack('<H', self.data[string_pos:string_pos+2])[0]
string_data = self.data[string_pos+2:string_pos+2+str_len*2]
string = string_data.decode('utf-16le').rstrip('\x00')
else:
# 读取UTF-16字符串
str_len = struct.unpack('<H', self.data[string_pos:string_pos+2])[0]
string_data = self.data[string_pos+2:string_pos+2+str_len*2]
string = string_data.decode('utf-16le')
self.strings.append(string)
# 移动到chunk结束位置
self.offset = start_offset + chunk_size
def parse_resource_map(self, chunk_size):
"""解析资源映射"""
resource_count = (chunk_size - 8) // 4
for i in range(resource_count):
resource_id = struct.unpack('<I', self.data[self.offset:self.offset+4])[0]
self.resources.append(resource_id)
self.offset += 4
def parse_start_namespace(self):
"""解析命名空间开始"""
line_number, comment, prefix, uri = struct.unpack('<I I I I',
self.data[self.offset:self.offset+16])
prefix_str = self.strings[prefix] if prefix != 0xFFFFFFFF else ""
uri_str = self.strings[uri] if uri != 0xFFFFFFFF else ""
self.namespaces[prefix_str] = uri_str
self.offset += 16
print(f"Start Namespace: {prefix_str} = {uri_str}")
def parse_end_namespace(self):
"""解析命名空间结束"""
line_number, comment, prefix, uri = struct.unpack('<I I I I',
self.data[self.offset:self.offset+16])
self.offset += 16
prefix_str = self.strings[prefix] if prefix != 0xFFFFFFFF else ""
print(f"End Namespace: {prefix_str}")
def parse_start_element(self):
"""解析元素开始"""
line_number, comment, namespace, name, attribute_start, attribute_size, attribute_count, id_index, class_index, style_index = struct.unpack(
'<I I I I H H H H H H', self.data[self.offset:self.offset+36])
self.offset += 36
# 获取元素名称
element_name = self.strings[name] if name != 0xFFFFFFFF else ""
namespace_str = self.strings[namespace] if namespace != 0xFFFFFFFF else ""
print(f"Start Element: {namespace_str}:{element_name}" if namespace_str else f"Start Element: {element_name}")
# 解析属性
for i in range(attribute_count):
attr_namespace, attr_name, attr_value_string, attr_type, attr_data = struct.unpack(
'<I I I B B B B I', self.data[self.offset:self.offset+20])
self.offset += 20
attr_name_str = self.strings[attr_name] if attr_name != 0xFFFFFFFF else ""
attr_namespace_str = self.strings[attr_namespace] if attr_namespace != 0xFFFFFFFF else ""
# 解析属性值
if attr_value_string != 0xFFFFFFFF:
attr_value = self.strings[attr_value_string]
else:
attr_value = self.parse_attribute_value(attr_type, attr_data)
full_attr_name = f"{attr_namespace_str}:{attr_name_str}" if attr_namespace_str else attr_name_str
print(f" Attribute: {full_attr_name} = {attr_value}")
def parse_end_element(self):
"""解析元素结束"""
line_number, comment, namespace, name = struct.unpack('<I I I I',
self.data[self.offset:self.offset+16])
self.offset += 16
element_name = self.strings[name] if name != 0xFFFFFFFF else ""
namespace_str = self.strings[namespace] if namespace != 0xFFFFFFFF else ""
print(f"End Element: {namespace_str}:{element_name}" if namespace_str else f"End Element: {element_name}")
def parse_cdata(self):
"""解析CDATA"""
line_number, comment, data, typed_data = struct.unpack('<I I I I',
self.data[self.offset:self.offset+16])
self.offset += 16
cdata_str = self.strings[data] if data != 0xFFFFFFFF else ""
print(f"CDATA: {cdata_str}")
def parse_attribute_value(self, attr_type, attr_data):
"""解析属性值"""
if attr_type == 0x03: # TYPE_STRING
return self.strings[attr_data] if attr_data < len(self.strings) else f"@string/{attr_data}"
elif attr_type == 0x10: # TYPE_INT_DEC
return str(attr_data)
elif attr_type == 0x11: # TYPE_INT_HEX
return f"0x{attr_data:08x}"
elif attr_type == 0x12: # TYPE_INT_BOOLEAN
return "true" if attr_data else "false"
elif attr_type == 0x01: # TYPE_REFERENCE
return f"@{attr_data:08x}"
else:
return f"(type:{attr_type:02x}){attr_data}"
# 使用示例
def parse_axml_file(axml_path):
"""解析AXML文件"""
with open(axml_path, 'rb') as f:
axml_data = f.read()
parser = AXMLParser(axml_data)
try:
parser.parse()
return parser
except Exception as e:
print(f"AXML parsing error: {e}")
return None
# 测试
if __name__ == "__main__":
# 从APK中提取AndroidManifest.xml并解析
import zipfile
with zipfile.ZipFile("example.apk", 'r') as apk:
manifest_data = apk.read('AndroidManifest.xml')
parser = AXMLParser(manifest_data)
parser.parse()
5.3.2 使用aapt解析
# 使用aapt解析AndroidManifest.xml
aapt dump xmltree example.apk AndroidManifest.xml
# 解析资源文件
aapt dump resources example.apk
# 获取应用信息
aapt dump badging example.apk
# 列出权限
aapt dump permissions example.apk
# 解析特定XML文件
aapt dump xmltree example.apk res/layout/activity_main.xml
5.4 资源文件分析
5.4.1 resources.arsc分析
import struct
class ResourcesARSCParser:
"""resources.arsc文件解析器"""
def __init__(self, arsc_data):
self.data = arsc_data
self.offset = 0
self.string_pool = []
self.packages = []
def parse(self):
"""解析resources.arsc文件"""
# 检查文件头
if len(self.data) < 12:
raise ValueError("Invalid ARSC file")
# 读取文件头
chunk_type, header_size, chunk_size, package_count = struct.unpack(
'<H H I I', self.data[:12])
if chunk_type != 0x0002: # RES_TABLE_TYPE
raise ValueError("Invalid ARSC file: wrong chunk type")
self.offset = 12
# 解析全局字符串池
self.parse_global_string_pool()
# 解析包
for i in range(package_count):
self.parse_package()
def parse_global_string_pool(self):
"""解析全局字符串池"""
chunk_type, header_size, chunk_size = struct.unpack(
'<H H I', self.data[self.offset:self.offset+8])
if chunk_type == 0x0001: # RES_STRING_POOL_TYPE
# 解析字符串池(简化实现)
self.offset += chunk_size
def parse_package(self):
"""解析包"""
start_offset = self.offset
# 读取包头
chunk_type, header_size, chunk_size, package_id = struct.unpack(
'<H H I I', self.data[self.offset:self.offset+12])
if chunk_type != 0x0200: # RES_TABLE_PACKAGE_TYPE
raise ValueError("Invalid package chunk type")
# 读取包名
package_name = self.data[self.offset+12:self.offset+12+256].decode('utf-16le').rstrip('\x00')
package_info = {
'id': package_id,
'name': package_name,
'types': []
}
self.offset = start_offset + header_size
# 解析类型规范和类型
while self.offset < start_offset + chunk_size:
chunk_type, chunk_header_size, chunk_chunk_size = struct.unpack(
'<H H I', self.data[self.offset:self.offset+8])
if chunk_type == 0x0202: # RES_TABLE_TYPE_SPEC_TYPE
self.parse_type_spec(package_info)
elif chunk_type == 0x0201: # RES_TABLE_TYPE_TYPE
self.parse_type(package_info)
else:
self.offset += chunk_chunk_size
self.packages.append(package_info)
def parse_type_spec(self, package_info):
"""解析类型规范"""
chunk_type, header_size, chunk_size, type_id, entry_count = struct.unpack(
'<H H I B B B B I', self.data[self.offset:self.offset+16])
# 跳过类型规范数据
self.offset += chunk_size
def parse_type(self, package_info):
"""解析类型"""
chunk_type, header_size, chunk_size, type_id, entry_count, entries_start = struct.unpack(
'<H H I B B B B I I', self.data[self.offset:self.offset+20])
# 跳过配置信息
config_size = header_size - 20
self.offset += 20 + config_size
# 读取条目偏移表
entry_offsets = []
for i in range(entry_count):
offset = struct.unpack('<I', self.data[self.offset:self.offset+4])[0]
entry_offsets.append(offset)
self.offset += 4
# 解析条目
entries_base = self.offset - 4 * entry_count - config_size - 20 + entries_start
type_info = {
'id': type_id,
'entries': []
}
for i, offset in enumerate(entry_offsets):
if offset != 0xFFFFFFFF:
entry_pos = entries_base + offset
entry_info = self.parse_entry(entry_pos, i)
type_info['entries'].append(entry_info)
package_info['types'].append(type_info)
# 移动到下一个chunk
self.offset = self.offset - 4 * entry_count - config_size - 20 + chunk_size
def parse_entry(self, entry_pos, entry_id):
"""解析条目"""
# 简化实现,只读取基本信息
entry_size, flags, key_index = struct.unpack(
'<H H I', self.data[entry_pos:entry_pos+8])
return {
'id': entry_id,
'size': entry_size,
'flags': flags,
'key_index': key_index
}
# 使用示例
def analyze_resources_arsc(apk_path):
"""分析APK中的resources.arsc文件"""
import zipfile
with zipfile.ZipFile(apk_path, 'r') as apk:
try:
arsc_data = apk.read('resources.arsc')
parser = ResourcesARSCParser(arsc_data)
parser.parse()
print(f"Found {len(parser.packages)} packages:")
for package in parser.packages:
print(f" Package: {package['name']} (ID: {package['id']})")
print(f" Types: {len(package['types'])}")
return parser
except KeyError:
print("resources.arsc not found in APK")
return None
5.4.2 资源ID映射
class ResourceMapper:
"""资源ID映射器"""
def __init__(self):
self.resource_map = {}
self.type_names = {
0x01: "attr",
0x02: "drawable",
0x03: "mipmap",
0x04: "layout",
0x05: "anim",
0x06: "animator",
0x07: "interpolator",
0x08: "transition",
0x09: "xml",
0x0a: "raw",
0x0b: "color",
0x0c: "menu",
0x0d: "dimen",
0x0e: "string",
0x0f: "style",
0x10: "styleable",
0x11: "array",
0x12: "plurals",
0x13: "integer",
0x14: "bool",
0x15: "fraction",
0x16: "id"
}
def parse_resource_id(self, resource_id):
"""解析资源ID"""
package_id = (resource_id >> 24) & 0xFF
type_id = (resource_id >> 16) & 0xFF
entry_id = resource_id & 0xFFFF
type_name = self.type_names.get(type_id, f"type_{type_id:02x}")
return {
'package_id': package_id,
'type_id': type_id,
'type_name': type_name,
'entry_id': entry_id,
'full_name': f"@{type_name}/{entry_id:04x}"
}
def build_resource_map(self, arsc_parser):
"""构建资源映射表"""
for package in arsc_parser.packages:
package_id = package['id']
for type_info in package['types']:
type_id = type_info['id']
type_name = self.type_names.get(type_id, f"type_{type_id:02x}")
for entry in type_info['entries']:
entry_id = entry['id']
resource_id = (package_id << 24) | (type_id << 16) | entry_id
self.resource_map[resource_id] = {
'package': package['name'],
'type': type_name,
'entry': entry_id,
'name': f"{package['name']}:{type_name}/{entry_id:04x}"
}
def resolve_resource_id(self, resource_id):
"""解析资源ID到名称"""
return self.resource_map.get(resource_id, self.parse_resource_id(resource_id))
# 使用示例
mapper = ResourceMapper()
resource_info = mapper.parse_resource_id(0x7f080001)
print(f"Resource ID 0x7f080001: {resource_info}")
5.5 实践练习
练习1:APK基础分析
目标:分析一个真实APK文件的结构
def comprehensive_apk_analysis(apk_path):
"""综合APK分析"""
import zipfile
import os
from pathlib import Path
print(f"=== Analyzing {apk_path} ===\n")
# 基本文件信息
apk_file = Path(apk_path)
file_size = apk_file.stat().st_size
print(f"File size: {file_size:,} bytes ({file_size/1024/1024:.2f} MB)")
with zipfile.ZipFile(apk_path, 'r') as apk:
# 文件列表分析
files = apk.filelist
print(f"Total files: {len(files)}")
# 按类型分类文件
file_types = {}
total_uncompressed = 0
total_compressed = 0
for file_info in files:
# 文件类型统计
if '.' in file_info.filename:
ext = file_info.filename.split('.')[-1].lower()
file_types[ext] = file_types.get(ext, 0) + 1
# 大小统计
total_uncompressed += file_info.file_size
total_compressed += file_info.compress_size
print(f"Uncompressed size: {total_uncompressed:,} bytes")
print(f"Compressed size: {total_compressed:,} bytes")
print(f"Compression ratio: {total_compressed/total_uncompressed:.2%}")
# 文件类型分布
print(f"\nFile types:")
for ext, count in sorted(file_types.items(), key=lambda x: x[1], reverse=True):
print(f" .{ext}: {count}")
# 检查关键文件
key_files = [
'AndroidManifest.xml',
'classes.dex',
'resources.arsc',
'META-INF/MANIFEST.MF'
]
print(f"\nKey files:")
for key_file in key_files:
try:
file_data = apk.read(key_file)
print(f" {key_file}: {len(file_data):,} bytes")
except KeyError:
print(f" {key_file}: NOT FOUND")
# DEX文件分析
dex_files = [f for f in files if f.filename.endswith('.dex')]
print(f"\nDEX files: {len(dex_files)}")
for dex_file in dex_files:
print(f" {dex_file.filename}: {dex_file.file_size:,} bytes")
# Native库分析
lib_files = [f for f in files if f.filename.startswith('lib/')]
if lib_files:
print(f"\nNative libraries: {len(lib_files)}")
architectures = set()
for lib_file in lib_files:
arch = lib_file.filename.split('/')[1]
architectures.add(arch)
for arch in sorted(architectures):
arch_libs = [f for f in lib_files if f.filename.startswith(f'lib/{arch}/')]
print(f" {arch}: {len(arch_libs)} libraries")
for lib in arch_libs[:5]: # 显示前5个
lib_name = lib.filename.split('/')[-1]
print(f" {lib_name}: {lib.file_size:,} bytes")
if len(arch_libs) > 5:
print(f" ... and {len(arch_libs)-5} more")
# 运行分析
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
comprehensive_apk_analysis(sys.argv[1])
else:
print("Usage: python script.py <apk_file>")
练习2:自动化APK信息提取
#!/usr/bin/env python3
"""
APK信息自动提取工具
"""
import zipfile
import json
import hashlib
import argparse
from pathlib import Path
import struct
class APKInfoExtractor:
def __init__(self, apk_path):
self.apk_path = Path(apk_path)
self.info = {}
def extract_all_info(self):
"""提取所有信息"""
print("Extracting APK information...")
# 基本文件信息
self.extract_file_info()
# APK内容分析
with zipfile.ZipFile(self.apk_path, 'r') as apk:
self.extract_structure_info(apk)
self.extract_manifest_info(apk)
self.extract_dex_info(apk)
self.extract_resource_info(apk)
self.extract_native_lib_info(apk)
self.extract_certificate_info(apk)
return self.info
def extract_file_info(self):
"""提取文件基本信息"""
stat = self.apk_path.stat()
# 计算哈希值
with open(self.apk_path, 'rb') as f:
content = f.read()
md5_hash = hashlib.md5(content).hexdigest()
sha1_hash = hashlib.sha1(content).hexdigest()
sha256_hash = hashlib.sha256(content).hexdigest()
self.info['file_info'] = {
'name': self.apk_path.name,
'size': stat.st_size,
'md5': md5_hash,
'sha1': sha1_hash,
'sha256': sha256_hash,
'created': stat.st_ctime,
'modified': stat.st_mtime
}
def extract_structure_info(self, apk):
"""提取结构信息"""
files = apk.filelist
structure = {
'total_files': len(files),
'total_uncompressed_size': sum(f.file_size for f in files),
'total_compressed_size': sum(f.compress_size for f in files),
'file_types': {},
'directories': set(),
'largest_files': []
}
# 文件类型统计
for file_info in files:
if '.' in file_info.filename:
ext = file_info.filename.split('.')[-1].lower()
structure['file_types'][ext] = structure['file_types'].get(ext, 0) + 1
# 目录统计
if '/' in file_info.filename:
dir_path = '/'.join(file_info.filename.split('/')[:-1])
structure['directories'].add(dir_path)
# 最大文件
files_by_size = sorted(files, key=lambda x: x.file_size, reverse=True)
structure['largest_files'] = [
{'name': f.filename, 'size': f.file_size}
for f in files_by_size[:10]
]
structure['directories'] = list(structure['directories'])
structure['compression_ratio'] = structure['total_compressed_size'] / structure['total_uncompressed_size']
self.info['structure'] = structure
def extract_manifest_info(self, apk):
"""提取清单文件信息"""
try:
manifest_data = apk.read('AndroidManifest.xml')
manifest_info = {
'size': len(manifest_data),
'format': 'binary' if manifest_data.startswith(b'\x03\x00\x08\x00') else 'text'
}
# 如果有aapt工具,可以解析更多信息
# 这里简化处理
self.info['manifest'] = manifest_info
except KeyError:
self.info['manifest'] = {'error': 'AndroidManifest.xml not found'}
def extract_dex_info(self, apk):
"""提取DEX文件信息"""
dex_files = []
for file_info in apk.filelist:
if file_info.filename.endswith('.dex'):
dex_data = apk.read(file_info.filename)
dex_info = self.parse_dex_header(dex_data)
dex_info['filename'] = file_info.filename
dex_info['size'] = len(dex_data)
dex_files.append(dex_info)
self.info['dex_files'] = dex_files
def parse_dex_header(self, dex_data):
"""解析DEX文件头"""
if len(dex_data) < 112:
return {'error': 'Invalid DEX file size'}
try:
magic = dex_data[:8]
if magic[:4] != b'dex\n':
return {'error': 'Invalid DEX magic'}
version = magic[4:7].decode('ascii')
header_data = struct.unpack('<I I I I I I I I I I I I I I I I I I I I I I I I I I I I',
dex_data[8:112])
return {
'version': version,
'checksum': header_data[0],
'file_size': header_data[1],
'header_size': header_data[2],
'string_ids_size': header_data[7],
'type_ids_size': header_data[8],
'proto_ids_size': header_data[9],
'field_ids_size': header_data[10],
'method_ids_size': header_data[11],
'class_defs_size': header_data[12]
}
except Exception as e:
return {'error': f'DEX parsing error: {str(e)}'}
def extract_resource_info(self, apk):
"""提取资源信息"""
resource_info = {
'has_resources_arsc': False,
'res_files': 0,
'assets_files': 0,
'res_directories': set(),
'assets_directories': set()
}
# 检查resources.arsc
try:
arsc_data = apk.read('resources.arsc')
resource_info['has_resources_arsc'] = True
resource_info['resources_arsc_size'] = len(arsc_data)
except KeyError:
pass
# 统计资源文件
for file_info in apk.filelist:
if file_info.filename.startswith('res/'):
resource_info['res_files'] += 1
if '/' in file_info.filename[4:]: # 去掉'res/'前缀
dir_path = file_info.filename.split('/')[1]
resource_info['res_directories'].add(dir_path)
elif file_info.filename.startswith('assets/'):
resource_info['assets_files'] += 1
if '/' in file_info.filename[7:]: # 去掉'assets/'前缀
dir_path = file_info.filename.split('/')[1]
resource_info['assets_directories'].add(dir_path)
resource_info['res_directories'] = list(resource_info['res_directories'])
resource_info['assets_directories'] = list(resource_info['assets_directories'])
self.info['resources'] = resource_info
def extract_native_lib_info(self, apk):
"""提取Native库信息"""
lib_info = {}
for file_info in apk.filelist:
if file_info.filename.startswith('lib/') and file_info.filename.endswith('.so'):
parts = file_info.filename.split('/')
if len(parts) >= 3:
arch = parts[1]
lib_name = parts[2]
if arch not in lib_info:
lib_info[arch] = []
lib_info[arch].append({
'name': lib_name,
'size': file_info.file_size
})
self.info['native_libs'] = lib_info
def extract_certificate_info(self, apk):
"""提取证书信息"""
cert_info = {
'has_manifest_mf': False,
'signature_files': [],
'certificate_files': []
}
for file_info in apk.filelist:
if file_info.filename == 'META-INF/MANIFEST.MF':
cert_info['has_manifest_mf'] = True
elif file_info.filename.startswith('META-INF/') and file_info.filename.endswith('.SF'):
cert_info['signature_files'].append(file_info.filename)
elif file_info.filename.startswith('META-INF/') and (
file_info.filename.endswith('.RSA') or file_info.filename.endswith('.DSA')):
cert_info['certificate_files'].append(file_info.filename)
self.info['certificates'] = cert_info
def save_to_json(self, output_path):
"""保存到JSON文件"""
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(self.info, f, indent=2, ensure_ascii=False, default=str)
def main():
parser = argparse.ArgumentParser(description='APK Information Extractor')
parser.add_argument('apk', help='Path to APK file')
parser.add_argument('-o', '--output', help='Output JSON file')
parser.add_argument('-v', '--verbose', action='store_true', help='Verbose output')
args = parser.parse_args()
if not Path(args.apk).exists():
print(f"Error: APK file not found: {args.apk}")
return 1
extractor = APKInfoExtractor(args.apk)
info = extractor.extract_all_info()
if args.verbose:
print(json.dumps(info, indent=2, default=str))
if args.output:
extractor.save_to_json(args.output)
print(f"Information saved to: {args.output}")
# 简要报告
print(f"\n=== APK Analysis Summary ===")
print(f"File: {info['file_info']['name']}")
print(f"Size: {info['file_info']['size']:,} bytes")
print(f"Files: {info['structure']['total_files']}")
print(f"DEX files: {len(info['dex_files'])}")
print(f"Native architectures: {len(info['native_libs'])}")
print(f"Has resources.arsc: {info['resources']['has_resources_arsc']}")
print(f"Certificate files: {len(info['certificates']['certificate_files'])}")
if __name__ == "__main__":
main()
5.6 常见问题与解决方案
Q1:APK解包后无法重新打包
A1:常见原因和解决方案:
# 1. 使用正确的APKTool版本
apktool --version
# 2. 清理之前的framework文件
rm -rf ~/.local/share/apktool/framework/
# 3. 重新安装framework
apktool if framework-res.apk
# 4. 使用正确的参数重新打包
apktool b modified_apk_dir -o new_app.apk
# 5. 如果出现资源错误,尝试跳过资源
apktool b modified_apk_dir --use-aapt2 -o new_app.apk
Q2:二进制XML解析失败
A2:处理方法:
def safe_axml_parse(axml_data):
"""安全的AXML解析"""
try:
# 尝试使用自定义解析器
parser = AXMLParser(axml_data)
parser.parse()
return parser
except Exception as e:
print(f"Custom parser failed: {e}")
# 回退到aapt
try:
import subprocess
import tempfile
with tempfile.NamedTemporaryFile(suffix='.xml', delete=False) as tmp:
tmp.write(axml_data)
tmp.flush()
result = subprocess.run(['aapt', 'dump', 'xmltree', tmp.name],
capture_output=True, text=True)
if result.returncode == 0:
return result.stdout
else:
return f"aapt error: {result.stderr}"
except Exception as e2:
return f"All parsing methods failed: {e2}"
Q3:大型APK文件处理性能问题
A3:优化策略:
class OptimizedAPKAnalyzer:
"""优化的APK分析器"""
def __init__(self, apk_path, chunk_size=1024*1024):
self.apk_path = apk_path
self.chunk_size = chunk_size
def analyze_large_apk(self):
"""分析大型APK文件"""
import mmap
with open(self.apk_path, 'rb') as f:
# 使用内存映射处理大文件
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
return self.analyze_memory_mapped_apk(mm)
def analyze_memory_mapped_apk(self, mm):
"""分析内存映射的APK"""
# 只读取ZIP中央目录,避免加载整个文件
central_dir_offset = self.find_central_directory(mm)
if central_dir_offset:
return self.parse_central_directory(mm, central_dir_offset)
else:
return None
def find_central_directory(self, mm):
"""查找ZIP中央目录"""
# 从文件末尾开始查找EOCD记录
file_size = len(mm)
# 查找End of Central Directory记录
for i in range(file_size - 22, max(file_size - 65536, 0), -1):
if mm[i:i+4] == b'PK\x05\x06':
# 找到EOCD,解析中央目录偏移
central_dir_offset = struct.unpack('<I', mm[i+16:i+20])[0]
return central_dir_offset
return None
def selective_extraction(self, apk, file_patterns):
"""选择性提取,减少内存使用"""
import fnmatch
extracted = {}
for file_info in apk.filelist:
for pattern in file_patterns:
if fnmatch.fnmatch(file_info.filename, pattern):
# 只在需要时读取文件内容
extracted[file_info.filename] = {
'size': file_info.file_size,
'compressed_size': file_info.compress_size,
'extract_func': lambda: apk.read(file_info.filename)
}
break
return extracted
5.7 本章小结
本章深入介绍了APK文件的结构分析和解包技术:
- APK文件格式:理解APK作为ZIP文件的本质和内部结构
- 解包技术:掌握多种APK解包方法和工具使用
- 二进制XML解析:学习AXML格式的解析技术
- 资源文件分析:深入了解resources.arsc和资源映射机制
- 实践练习:通过实际项目掌握APK分析技能
APK结构分析是安卓逆向工程的基础,理解了APK的内部组织结构,才能有效地进行后续的静态和动态分析。
学习检查清单:
- 理解APK文件的ZIP格式本质
- 掌握APK各组成部分的作用
- 能够使用多种工具解包APK
- 理解二进制XML(AXML)格式
- 了解resources.arsc的结构
- 掌握资源ID的映射机制
- 能够编写自动化APK分析工具
- 了解常见问题的解决方案
下一章预告:
在下一章中,我们将学习Java层的静态分析技术,包括Java代码的反编译、静态分析方法、代码结构理解等核心技能。
1619

被折叠的 条评论
为什么被折叠?



