fishhook 源码阅读
准备
最好下载 machoview 和 hopper disassembler 来协助理解 MACH-O 格式。有图作证。
基本数据
struct rebinding {
char *name; // 原始函数的名称
void *replacement; // 新的函数地址
void **replaced; // 同原始函数原型一致的函数的指针的指针
};
struct rebindings_entry {
struct rebinding *rebindings;
size_t rebindings_nel;
struct rebindings_entry *next;
};
static struct rebindings_entry *_rebindings_head;
prepend_rebindings(…)
/* 由 rebindings 和 nel 的值生成一个 struct rebindings_entry 实例,\
并插入到 _rebindings_head 的头结点。
*/
static int prepend_rebindings(struct rebindings_entry **rebindings_head,
struct rebinding rebindings[],
size_t nel) {
// 创建一个 struct rebindings_entry 实例。
struct rebindings_entry *new_entry = (struct rebindings_entry *)malloc(sizeof(struct rebindings_entry));
// malloc 失败
if (new_entry == NULL) {
return -1;
}
/* 这里 new-entry->rebindings 实则是指向一个 struct rebinding 数组的指针。\
所以 malloc sizeof(struct rebinding) * nel 大小的数组空间,\
并用 new_entry->rebindings 指向它。
*/
new_entry->rebindings = (struct rebinding *)malloc(sizeof(struct rebinding) * nel);
/* 若 new_entry->rebindings malloc 失败,则 free(new_entry)。
*/
if (new_entry->rebindings == NULL) {
free(new_entry);
return -1;
}
/* 前面为 new_entry->rebindings 指向的空间分配了内存,\
这里把 rebindings[] 的内容拷贝到这块内存内。\
memcpy 函数:memcpy指的是c和c++使用的内存拷贝函数,\
memcpy函数的功能是从源内存地址的起始位置开始拷贝若干个字节到目标内存地址中。\
rebindings[],rebindings 指向第一个元素的地址。
*/
memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
new_entry->rebindings_nel = nel;
// 向链表头结点插入一个元素的步骤。
new_entry->next = (*rebindings_head);
*rebindings_head = new_entry;
return 0;
}
rebind_symbols_for_image(…)
/* 根据 mach_header,slide 定位 “符号表实际address”,“字符串表实际address”,“indirect表实际地址”,\
遍历“S_LAZY_SYMBOL_POINTERS S_NON_LAZY_SYMBOL_POINTERS”符号 \
slide 为实际 mach-o 镜像文件加入内存的偏移地址。ASLR 使用的是 slide + vmaddress 来计算符号内存地址的。
*/
static void rebind_symbols_for_image(struct rebindings_entry *rebindings_head,
const struct mach_header *mach_header,
intptr_t slide) {
/* 获取 mach_header 这个符号的信息,将信息放到 info 中。\
info->dli_fbase 为镜像image的基地址,因为mach_header也是指向image的基地址,所以两者相等。\
info->dli_fname 为镜像image的路径
*/
Dl_info info;
if (dladdr(mach_header, &info) == 0) {
return;
}
/* 指向 load_commands 位置。
*/
uintptr_t cur = (uintptr_t)mach_header + sizeof(mach_header_t);
segment_command_t *cur_seg_cmd = NULL;
segment_command_t *linkedit_segment = NULL;
struct symtab_command *symtab_cmd = NULL;
struct dysymtab_command *dysymtab_cmd = NULL;
/* 定位 “符号表 segment command位置”,“动态符号表 segment command位置”,“linkedit segment command 位置”
*/
for (uint i = 0; i < mach_header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
/* 遍历 load_commands,通过 off += segment_cmd->cmdsize 指向不同的 load_command。
*/
cur_seg_cmd = (segment_command_t *)cur;
/* LC_SEGMENT_xxx 下的 sections 是要映射到进程的内存空间的。
*/
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
linkedit_segment = (segment_command_t *)cur_seg_cmd;
}
} else if (cur_seg_cmd->cmd == LC_SYMTAB) {
/* symtab_command 里有:符号表 offset,字符串表 offset。
*/
symtab_cmd = (struct symtab_command *)cur_seg_cmd;
} else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
/* dysymtab_command 里有:indirect表 offset,indirectsyms number。
*/
dysymtab_cmd = (struct dysymtab_command *)cur_seg_cmd;
}
}
if (!linkedit_segment || !symtab_cmd || !dysymtab_cmd || !dysymtab_cmd->nindirectsyms) {
return;
}
/* 定位 “符号表”,“字符串表”,“重定向表” 的 实际address
*/
uintptr_t linkedit_base;
nlist_t *symtab = NULL;
char *strtab = NULL;
uint32_t *indirect_symtab = NULL;
/* linkedit_segment->vmaddr - linkedit_segment->fileoff == _PAGEZERO \
所有 table 的 实际 address 可以根据 slide + _PAGEZERO + offset 来计算 \
所有 table 的 虚拟 address 可以根据 _PAGEZERO + offset 来计算 \
segment _PAGEZERO vm size == segment.vmaddress - fileoffset == section.address - offset == 40G == 0x100000000
*/
linkedit_base = slide + (uintptr_t)linkedit_segment->vmaddr - linkedit_segment->fileoff;
symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff); // zxq:地址 -> 指针 可以直接转,等价于 nlist_t *p = &a;
strtab = (char *)(linkedit_base + symtab_cmd->stroff);
indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
/* 上面因为遍历 cmd,cur 指向不是 load_commands 了,这里再指向这里。
*/
cur = (uintptr_t)mach_header + sizeof(mach_header_t);
// 遍历 cmd
for (uint i = 0; i < mach_header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
/* S_LAZY_SYMBOL_POINTERS 是在 __DATA __la_symbol_ptr中,S_NON_LAZY_SYMBOL_POINTERS 在 __DATA __nl_symbol_ptr中。
*/
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0
&& strcmp(cur_seg_cmd->segname, SEG_DATA_CONST)) {
continue;
}
section_t *section0 = (section_t *)(cur + sizeof(segment_command_t));
// 遍历 sections
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
/* 每个 section_t 的大小是一样的,可以用 (section_t *)p++ 来计算下一个 section 的位置。\
每个 segment_command_t 的大小不一样,需要用 cmd 来计算下一个 command 的位置。\
cmdsize = sizeof(segment_command_t) + nsects * sizeof(section_t) + ...;
*/
section_t *sect = section0 + j;
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS
|| (sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(sect, rebindings_head, slide, symtab, strtab, indirect_symtab);
}
}
}
}
perform_rebinding_with_section(…)
/* _la_symbol_ptr(或 _nl_symbol_ptr)等 section 中的指向函数实现的指针的 index 和 字符串符号表中函数名称的 index 保持一致。\
原函数调用 -> 替换的函数调用
*/
static void perform_rebinding_with_section(section_t *section,
struct rebindings_entry *rebindings_head,
intptr_t slide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab) {
/* indirect_symtab 条目 = indirect_symbtab_baseaddress + section.reserved1 + same index。
*/
uint32_t *indirect_symtab_indices = indirect_symtab + section->reserved1;
/* _la_symbol_ptr 里面存的是指向函数实现的指针,那么其对齐方式即为 sizeof(void *)。\
indirect table 存的是 undefined extern 外部符号。
*/
void **indirect_symbol_bindings = (void **)(slide + section->addr);
/* 遍历 _la_symbol_ptr 或 _nl_symbol_ptr 等 section 的条目
*/
for (uint i = 0; i < section->size / sizeof(void *); i++) {
/* indirect_symtab: array of uint32_t
_la_symbol_ptr 等 section 中的条目 index == indirect_symbtab_baseaddress + section.reserved1 开始计数的 index。
*/
uint32_t symtab_index = indirect_symtab_indices[i];
/* INDIRECT_SYMBOL_ABS 和 INDIRECT_SYMBOL_LOCAL \
不是通过 indirect table -> symbol table -> string table 这个流程查找符号的。
*/
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL
|| symtab_index == (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL)) {
continue;
}
/* symtab: array of struct nlist_t \
indirect table 中条目的值 就是符号在 symtab 的索引。
nlist_t 条目的 n_un.nstrx 为 string table 中该符号的索引。
*/
nlist_t symtab_n = symtab[symtab_index];
uint32_t strtab_offset = symtab_n.n_un.n_strx;
/* 指向 符号的名称 首地址。
字符串以 \0 结尾
*/
char *symbol_name = strtab + strtab_offset;
if (strnlen(symbol_name, 2) < 2) {
// la_symbol_ptr 中的符号会在前面加一个"_",例如:_print,_open,_close
continue;
}
// 遍历 cur_rebinding_entry 链表中所有结点的 rebindings 中的 rebinding。
for (struct rebindings_entry *cur_rebinding_entry = rebindings_head; cur_rebinding_entry != NULL; cur_rebinding_entry = cur_rebinding_entry->next) {
char *symbol_name_real = &symbol_name[1];
for (uint j = 0; j < cur_rebinding_entry->rebindings_nel; j++) {
if (strcmp(symbol_name_real, cur_rebinding_entry->rebindings[j].name) == 0) {
// 符号名匹配成功
if (cur_rebinding_entry->rebindings[j].replaced != NULL
&& indirect_symbol_bindings[i] != cur_rebinding_entry->rebindings[j].replacement) {
/* 前面有讲到 indirect_symbol_bindings 里面的条目是指向 具体符号内容(这里是函数实现)的指针。
replaced 保留之前函数实现的指针。
*/
*(cur_rebinding_entry->rebindings[j].replaced) = indirect_symbol_bindings[i];
}
// 新的函数实现 赋值给 indirect_symbol_bindings 条目。
indirect_symbol_bindings[i] = cur_rebinding_entry->rebindings[j].replacement;
// 继续遍历 _la_symbol_ptr 中的条目
goto symbol_loop;
}
}
}
symbol_loop:;
}
}
_rebind_symbols_for_image
/* _dyld_register_func_for_add_image 注册时的回调 \
mach-o 中的地址都是虚拟地址,mach-o 被加载到进程时,查找符号内存地址采用 ASLR 技术 \
slide:ASLR 采用 slide(偏移) + mach-o address 来定位内存。\
_dyld_register_func_for_add_image 回调原型为 void (*callback)(const struct mach_header *mach_header, intptr_t slide)
*/
static void _rebind_symbols_for_image(const struct mach_header *mach_header,
intptr_t slide) {
rebind_symbols_for_image(_rebindings_head, mach_header, slide);
print_dyld_image_fname(mach_header, slide);
}
rebind_symbols(…)
/* 入口函数 */
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
/* 用 rebindings, rebindings_nel值生成 一个 struct rebindings_entry 实例,并插入到链表 _rebindings_head 的头部。
*/
int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
if (retval < 0) return retval;
// 若 _rebindings_head->next 为空,则为首次 rebind_symbols。
if (_rebindings_head->next == NULL) {
_dyld_register_func_for_add_image(_rebind_symbols_for_image);
} else {
/* 若新增了 rebinding,则获取所有的动态镜像,调用 _rebind_symbols_for_image 来 rebind 新的符号
如果要 rebind 符号,我们得遍历所有的动态镜像,查找镜像中和我们的符号名称一致的函数,并修改实现地址。
*/
uint32_t c = _dyld_image_count();
for (uint32_t i = 0; i < c; i++) {
_rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
}
}
return retval;
}
print_dyld_image_fname(…)
/* 打印 dyld 动态链接器 加载 共享镜像 的名字
*/
static void print_dyld_image_fname(const struct mach_header *mach_header, intptr_t slide) {
Dl_info info;
if (dladdr(mach_header, &info) == 0) {
return;
}
printf("dyld image file name:%s\n", info.dli_fname);
}
理解一下 _dyld_register_func_for_add_image(…) 和 _dyld_register_func_for_remove_image(…)
The following functions allow you to install callbacks which will be called by dyld whenever an image is loaded or unloaded. During a call to _dyld_register_func_for_add_image() the callback func is called for every existing image. Later, it is called as each new image is loaded and bound (but initializers not yet run). The callback registered with _dyld_register_func_for_remove_image() is called after any terminators in an image are run and before the image is un-memory-mapped.
大致意思:当进程读到 _dyld_register_func_for_add_image(callback) 后,所有已经存在的 image 都会回调 callback。之后若有新的 image 被加载和绑定时(但初始化程序尚未运行),callback 还会被回调。同理,若注册了 remove image 的 callback,会在 image remove 后,从内存移除映射前,被调用。
参考资源:
https://juejin.im/post/5a97b8e851882555666f144b