文末附完整代码链接
1.什么是libbfd
二进制文件描述符库(Binary File Descriptor,BFD)库为读取和解析所有二进制格式提供了一个公共接口,该库也提供了各种体系结构的编译版本,包括x86和x86-64的ELF和PE文件。基于libbfd构建的二进制加载器,可以实现对所有这些格式的支持,而无须针对特定格式进行支持。
2.设计二进制加载器的步骤
- 整理加载的二进制信息:ELF头部的信息+section
- 打开二进制文件:使用libbfd库函数open_bfd()
- 加载二进制文件信息:load_binary_bfd
- 卸载二进制文件信息:unload_binary_bfd
- 测试二进制加载器
3.实现二进制加载器
以下是二进制加载接口,该API公开一系列表示二进制文件不同组件的类,包括3个类和2个函数:Binary类表示整个二进制文件的抽象类;Section类和Symbol类分别表示二进制文件中包含的节和符号(对ELF格式不清楚的,可以翻看该专栏的文章2);另外两个函数是load_binary和unload_binary,下文介绍。
#ifndef LOADER_H
#define LOADER_H
#include <stdint.h>
#include <string>
#include <vector>
class Binary;
class Section;
class Symbol;
class Symbol {
public:
enum SymbolType {
SYM_TYPE_UKN = 0,
SYM_TYPE_FUNC = 1
};
Symbol() : type(SYM_TYPE_UKN), name(), addr(0) {}
SymbolType type;
std::string name;
uint64_t addr;
};
class Section {
public:
enum SectionType {
SEC_TYPE_NONE = 0,
SEC_TYPE_CODE = 1,
SEC_TYPE_DATA = 2
};
Section() : binary(NULL), type(SEC_TYPE_NONE), vma(0), size(0), bytes(NULL) {}
bool contains (uint64_t addr) { return (addr >= vma) && (addr-vma < size); }
Binary *binary;
std::string name;
SectionType type;
uint64_t vma;
uint64_t size;
uint8_t *bytes;
};
class Binary {
public:
enum BinaryType {
BIN_TYPE_AUTO = 0,
BIN_TYPE_ELF = 1,
BIN_TYPE_PE = 2
};
enum BinaryArch {
ARCH_NONE = 0,
ARCH_X86 = 1
};
Binary() : type(BIN_TYPE_AUTO), arch(ARCH_NONE), bits(0), entry(0) {}
Section *get_text_section() { for(auto &s : sections) if(s.name == ".text") return &s; return NULL; }
std::string filename;
BinaryType type;
std::string type_str;
BinaryArch arch;
std::string arch_str;
unsigned bits;
uint64_t entry;
std::vector<Section> sections;
std::vector<Symbol> symbols;
};
int load_binary (std::string &fname, Binary *bin, Binary::BinaryType type);
void unload_binary (Binary *bin);
#endif /* LOADER_H */
load_binary.cc:解析由文件名指定的二进制文件,并将其加载到指定的Binary对象中,这个过程在load_binary_bfd中完成:
- 使用库函数open_bfd打开fname参数指定的二进制文件;
- 使用库函数bfd_get_start_address获得二进制文件入口点地址;
- 收集二进制类型信息:ELF格式、PE格式还是其他格式,bfd_h->xvec提供了一个指向bfd_target结构的指针,在该结构中包含目标类型名称的字符串,即bfd_h->xvec->name;
- 使用swich语句检查bfd_h->xvec->flavour并设置相应的Binary类型;
- 获取二进制文件的体系结构信息:库函数bfd_get_arch_info()
- 使用switch获取体系结构的整数标识符:32位、64位;
- 加载二进制文件中包含的符号:load_symbols、load_dynsym_bfd(具体代码没有在文中展示,大家可以下载链接中的代码测试);
- 加载二进制文件中包含的节:load_sections_bfd((具体代码没有在文中展示,大家可以下载链接中的代码测试));
- 完成了libbfd的使用操作,不需要bfd句柄,用bfd_close将其关掉;
static int
load_binary_bfd(std::string &fname, Binary *bin, Binary::BinaryType type)
{
int ret;
bfd *bfd_h;
const bfd_arch_info_type *bfd_info;
bfd_h = NULL;
bfd_h = open_bfd(fname);
if(!bfd_h) {
goto fail;
}
bin->filename = std::string(fname);
bin->entry = bfd_get_start_address(bfd_h);
bin->type_str = std::string(bfd_h->xvec->name);
switch(bfd_h->xvec->flavour) {
case bfd_target_elf_flavour:
bin->type = Binary::BIN_TYPE_ELF;
break;
case bfd_target_coff_flavour:
bin->type = Binary::BIN_TYPE_PE;
break;
case bfd_target_unknown_flavour:
default:
fprintf(stderr, "unsupported binary type (%s)\n", bfd_h->xvec->name);
goto fail;
}
bfd_info = bfd_get_arch_info(bfd_h);
bin->arch_str = std::string(bfd_info->printable_name);
switch(bfd_info->mach) {
case bfd_mach_i386_i386:
bin->arch = Binary::ARCH_X86;
bin->bits = 32;
break;
case bfd_mach_x86_64:
bin->arch = Binary::ARCH_X86;
bin->bits = 64;
break;
default:
fprintf(stderr, "unsupported architecture (%s)\n",
bfd_info->printable_name);
goto fail;
}
/* Symbol handling is best-effort only (they may not even be present) */
load_symbols_bfd(bfd_h, bin);
load_dynsym_bfd(bfd_h, bin);
if(load_sections_bfd(bfd_h, bin) < 0) goto fail;
ret = 0;
goto cleanup;
fail:
ret = -1;
cleanup:
if(bfd_h) bfd_close(bfd_h);
return ret;
}
int
load_binary(std::string &fname, Binary *bin, Binary::BinaryType type)
{
return load_binary_bfd(fname, bin, type);
}
4.测试二进制加载器
load-demo.cc
#include <stdio.h>
#include <stdint.h>
#include <string>
#include "../inc/loader.h"
int
main(int argc, char *argv[])
{
size_t i;
Binary bin;
Section *sec;
Symbol *sym;
std::string fname;
if(argc < 2) {
printf("Usage: %s <binary>\n", argv[0]);
return 1;
}
fname.assign(argv[1]);
if(load_binary(fname, &bin, Binary::BIN_TYPE_AUTO) < 0) {
return 1;
}
printf("loaded binary '%s' %s/%s (%u bits) entry@0x%016jx\n",
bin.filename.c_str(),
bin.type_str.c_str(), bin.arch_str.c_str(),
bin.bits, bin.entry);
for(i = 0; i < bin.sections.size(); i++) {
sec = &bin.sections[i];
printf(" 0x%016jx %-8ju %-20s %s\n",
sec->vma, sec->size, sec->name.c_str(),
sec->type == Section::SEC_TYPE_CODE ? "CODE" : "DATA");
}
if(bin.symbols.size() > 0) {
printf("scanned symbol tables\n");
for(i = 0; i < bin.symbols.size(); i++) {
sym = &bin.symbols[i];
printf(" %-40s 0x%016jx %s\n",
sym->name.c_str(), sym->addr,
(sym->type & Symbol::SYM_TYPE_FUNC) ? "FUNC" : "");
}
}
unload_binary(&bin);
return 0;
}
测试结果:
完整代码链接:https://pan.baidu.com/s/1fZBW257Nu9ysO_KkxiRstw
提取码:bdz7