elf文件解析器

本文介绍了一个C语言编写的ELF文件解析器,用于打印出所有segments和sections及其映射关系。程序首先检查文件是否为ELF格式,然后读取并解析head、section和segment信息。通过遍历解析的数据,展示每个section和segment的相关属性,如名称、偏移量、大小等。

前两天网上投递了简历,面试了一家C++公司,然后对面负责人给我发了一份笔试题,题目是:

请写出一个ELF文件解析器, 需要能打印出所有segmentssections,并列出每个sectionsegment的映射关系。

首先了解elf是什么,它的结构是怎么样的,然后去读一下别人的源码,读懂之后,自己开始编码。

源码如下("elf.h" 头文件见后文),它会报warning,但是貌似不太影响最后结果:

#include<stdlib.h>
#include<stdio.h>
#include "elf.h"

int main(int argc, char* argv[])
{
    // 参数错误
    if(argc < 2)
    {
        printf("invalid arguments\n");
        exit(0);
    }

    // 打开文件
    FILE *fp;
    fp = fopen(argv[1], "r");
    if (NULL == fp)
    {
        printf("fail to open the file");
        exit(0);
    }

    // 解析head
    Elf64_Ehdr elf_head;
    int shnum, a;

    // 读取 head 到elf_head
    a = fread(&elf_head, sizeof(Elf64_Ehdr), 1, fp);
    if (0 == a)
    {
        printf("fail to read head\n");
        exit(0);
    }

    // 判断elf文件类型
    if(elf_head.e_ident[0] != 0x7F ||
        elf_head.e_ident[1] != 'E' ||
        elf_head.e_ident[2] != 'L' ||
        elf_head.e_ident[3] != 'F')
    {
        printf("Not a ELF file\n");
        exit(0);
    }

    // 解析section 分配内存 section * 数量
    Elf64_Shdr *shdr = (Elf64_Shdr*)malloc(sizeof(Elf64_Shdr) * elf_head.e_shnum);
    if (NULL == shdr)
    {
        printf("shdr malloc failed\n");
        exit(0);
    }

    // 设置fp偏移量 offset
    a = fseek(fp, elf_head.e_shoff, SEEK_SET);
    if(0 != a)
    {
        printf("\nfaile to fseek\n");
        exit(0);
    }

    // 读取section 到 shdr, 大小为shdr * 数量
    a = fread(shdr, sizeof(Elf64_Shdr) * elf_head.e_shnum, 1, fp);
    if (0 == a)
    {
        printf("\nfail to read section\n");
        exit(0);
    }

    // 重置指针位置
    rewind(fp);

    // 将fp指针移到 字符串表偏移位置处
    fseek(fp, shdr[elf_head.e_shstrndx].sh_offset, SEEK_SET);
    
    // 第e_shstrndx项是字符串表 定义 字节 长度 char类型 数组
    char shstrtab[shdr[elf_head.e_shstrndx].sh_size];
    char *temp = shstrtab;

    // 读取内容
    a = fread(shstrtab, shdr[elf_head.e_shstrndx].sh_size, 1, fp);
    if (0 == a)
    {
        printf("\nfaile to read\n");
    }

    // printf("\n\节的信息: \n");
    // 遍历
    // for (int i = 0; i < elf_head.e_shnum; i++)
    // {
    // temp = shstrtab;
    // temp = temp + shdr[i].sh_name;
    // printf("节的名称: %s\n", temp);
    // printf("节首的偏移: %x\n", shdr[i].sh_offset);
    // printf("节的大小: %x\n", shdr[i].sh_size);
    // printf("节尾的地址: %x\n", shdr[i].sh_offset + shdr[i].sh_size);
    // printf("\n");
    // }


    // 解析 segment
    Elf64_Phdr *phdr = (Elf64_Phdr*)malloc(sizeof(Elf64_Phdr) * elf_head.e_phnum);
    a = fseek(fp, elf_head.e_phoff, SEEK_SET);
    a = fread (phdr, sizeof(Elf64_Phdr) * elf_head.e_phnum, 1, fp);
    rewind(fp);
    fseek(fp, phdr[elf_head.e_shentsize].p_offset, SEEK_SET);
    char phstrtab[phdr[elf_head.e_shentsize].p_filesz];
    a = fread(phstrtab, phdr[elf_head.e_shentsize].p_filesz, 1, fp);
    printf("\n\n段的信息:\n");

    for (int i = 0; i < elf_head.e_phnum; i++)
    {
        printf("%d: \n", i);
        printf(" 该段首相对偏移: %x \n", phdr[i].p_offset);
        printf(" 该段的大小: %x \n", phdr[i].p_memsz);
        printf(" 该段尾相对偏移: %x \n", phdr[i].p_memsz + phdr[i].p_offset);
        printf(" *该段包含的节有:\n");

        for (int j = 0;j < elf_head.e_shnum; j++)
        {
            if (
                (shdr[j].sh_offset > phdr[i].p_offset) && 
                ( (shdr[j].sh_offset + shdr[j].sh_size) < (phdr[i].p_offset + phdr[i].p_memsz) )
               )
            {
                temp = shstrtab;
                temp = temp + shdr[j].sh_name;

                printf(" 节的名称: %s\n", temp);
                printf(" 节首的偏移: %x\n", shdr[j].sh_offset);
                printf(" 节的大小: %x\n", shdr[j].sh_size);
                printf(" 节尾的地址: %x\n", shdr[j].sh_offset + shdr[j].sh_size);
                printf("\n");
            }
        }    
        printf("\n");
    }

    printf("\n");
    return 0;
}

#include<stdlib.h>

#include<stdio.h>

#include "elf.h"

 

int main(int argc, char* argv[])

{

// 参数错误

if(argc < 2)

{

printf("invalid arguments\n");

exit(0);

}

 

// 打开文件

FILE *fp;

fp = fopen(argv[1], "r");

if (NULL == fp)

{

printf("fail to open the file");

exit(0);

}

 

// 解析head

Elf64_Ehdr elf_head;

int shnum, a;

// 读取 head 到elf_head

a = fread(&elf_head, sizeof(Elf64_Ehdr), 1, fp);

if (0 == a)

{

printf("fail to read head\n");

exit(0);

}

 

// 判断elf文件类型

if(elf_head.e_ident[0] != 0x7F ||

elf_head.e_ident[1] != 'E' ||

elf_head.e_ident[2] != 'L' ||

elf_head.e_ident[3] != 'F')

{

printf("Not a ELF file\n");

exit(0);

}

 

// 解析section 分配内存 section * 数量

Elf64_Shdr *shdr = (Elf64_Shdr*)malloc(sizeof(Elf64_Shdr) * elf_head.e_shnum);

if (NULL == shdr)

{

printf("shdr malloc failed\n");

exit(0);

}

// 设置fp偏移量 offset

a = fseek(fp, elf_head.e_shoff, SEEK_SET);

if(0 != a)

{

printf("\nfaile to fseek\n");

exit(0);

}

 

// 读取section 到 shdr, 大小为shdr * 数量

a = fread(shdr, sizeof(Elf64_Shdr) * elf_head.e_shnum, 1, fp);

if (0 == a)

{

printf("\nfail to read section\n");

exit(0);

}

 

// 重置指针位置

rewind(fp);

// 将fp指针移到 字符串表偏移位置处

fseek(fp, shdr[elf_head.e_shstrndx].sh_offset, SEEK_SET);

 

// 第e_shstrndx项是字符串表 定义 字节 长度 char类型 数组

char shstrtab[shdr[elf_head.e_shstrndx].sh_size];

char *temp = shstrtab;

// 读取内容

a = fread(shstrtab, shdr[elf_head.e_shstrndx].sh_size, 1, fp);

if (0 == a)

{

printf("\nfaile to read\n");

}

// printf("\n\节的信息: \n");

// 遍历

// for (int i = 0; i < elf_head.e_shnum; i++)

// {

// temp = shstrtab;

// temp = temp + shdr[i].sh_name;

// printf("节的名称: %s\n", temp);

// printf("节首的偏移: %x\n", shdr[i].sh_offset);

// printf("节的大小: %x\n", shdr[i].sh_size);

// printf("节尾的地址: %x\n", shdr[i].sh_offset + shdr[i].sh_size);

// printf("\n");

// }

 

// 解析 segment

Elf64_Phdr *phdr = (Elf64_Phdr*)malloc(sizeof(Elf64_Phdr) * elf_head.e_phnum);

a = fseek(fp, elf_head.e_phoff, SEEK_SET);

a = fread (phdr, sizeof(Elf64_Phdr) * elf_head.e_phnum, 1, fp);

 

rewind(fp);

fseek(fp, phdr[elf_head.e_shentsize].p_offset, SEEK_SET);

 

char phstrtab[phdr[elf_head.e_shentsize].p_filesz];

a = fread(phstrtab, phdr[elf_head.e_shentsize].p_filesz, 1, fp);

 

printf("\n\n段的信息:\n");

for (int i = 0; i < elf_head.e_phnum; i++)

{

printf("%d: \n", i);

printf(" 该段首相对偏移: %x \n", phdr[i].p_offset);

printf(" 该段的大小: %x \n", phdr[i].p_memsz);

printf(" 该段尾相对偏移: %x \n", phdr[i].p_memsz + phdr[i].p_offset);

printf(" *该段包含的节有:\n");

for (int j = 0;j < elf_head.e_shnum; j++)

{

if ((shdr[j].sh_offset > phdr[i].p_offset) && ((shdr[j].sh_offset + shdr[j].sh_size) < (phdr[i].p_offset + phdr[i].p_memsz)))

{

temp = shstrtab;

temp = temp + shdr[j].sh_name;

printf(" 节的名称: %s\n", temp);

printf(" 节首的偏移: %x\n", shdr[j].sh_offset);

printf(" 节的大小: %x\n", shdr[j].sh_size);

printf(" 节尾的地址: %x\n", shdr[j].sh_offset + shdr[j].sh_size);

printf("\n");

}

}

printf("\n");

 

}

printf("\n");

return 0;

}

 

 

这里用到了一个头文件 "elf.h" ,里面定义了elf文件结构的各种数据结构,能够使解析elf的过程中更加方便:

#ifndef _QEMU_ELF_H

#define _QEMU_ELF_H

#include <inttypes.h>

/* 32-bit ELF base types. */

 

/* 字节 uint8_t

2字节 uint16_t

4字节 uint32_t

8字节 uint64_t */

typedef uint32_t Elf32_Addr;

typedef uint16_t Elf32_Half;

typedef uint32_t Elf32_Off;

typedef int32_t Elf32_Sword;

typedef uint32_t Elf32_Word;

/* 64-bit ELF base types. */

typedef uint64_t Elf64_Addr;

typedef uint16_t Elf64_Half;

typedef int16_t  Elf64_SHalf;

typedef uint64_t Elf64_Off;

typedef int32_t  Elf64_Sword;

typedef uint32_t Elf64_Word;

typedef uint64_t Elf64_Xword;

typedef int64_t Elf64_Sxword;

/* These constants are for the segment types stored in the image headers */

#define PT_NULL 0

#define PT_LOAD 1

#define PT_DYNAMIC 2

#define PT_INTERP 3

#define PT_NOTE 4

#define PT_SHLIB 5

#define PT_PHDR 6

#define PT_LOPROC 0x70000000

#define PT_HIPROC 0x7fffffff

#define PT_MIPS_REGINFO 0x70000000

#define PT_MIPS_OPTIONS 0x70000001

/* Flags in the e_flags field of the header */

/* MIPS architecture level. */

#define EF_MIPS_ARCH_1  0x00000000  /* -mips1 code. */

#define EF_MIPS_ARCH_2  0x10000000  /* -mips2 code. */

#define EF_MIPS_ARCH_3  0x20000000  /* -mips3 code. */

#define EF_MIPS_ARCH_4  0x30000000  /* -mips4 code. */

#define EF_MIPS_ARCH_5  0x40000000  /* -mips5 code. */

#define EF_MIPS_ARCH_32 0x50000000  /* MIPS32 code. */

#define EF_MIPS_ARCH_64 0x60000000  /* MIPS64 code. */

/* The ABI of a file. */

#define EF_MIPS_ABI_O32 0x00001000  /* O32 ABI. */

#define EF_MIPS_ABI_O64 0x00002000  /* O32 extended for 64 bit. */

#define EF_MIPS_NOREORDER 0x00000001

#define EF_MIPS_PIC 0x00000002

#define EF_MIPS_CPIC 0x00000004

#define EF_MIPS_ABI2  0x00000020

#define EF_MIPS_OPTIONS_FIRST 0x00000080

#define EF_MIPS_32BITMODE 0x00000100

#define EF_MIPS_ABI 0x0000f000

#define EF_MIPS_ARCH 0xf0000000

/* These constants define the different elf file types */

#define ET_NONE 0

#define ET_REL 1

#define ET_EXEC 2

#define ET_DYN 3

#define ET_CORE 4

#define ET_LOPROC 0xff00

#define ET_HIPROC 0xffff

/* These constants define the various ELF target machines */

#define EM_NONE 0

#define EM_M32 1

#define EM_SPARC 2

#define EM_386 3

#define EM_68K 4

#define EM_88K 5

#define EM_486 6 /* Perhaps disused */

#define EM_860 7

#define EM_MIPS 8 /* MIPS R3000 (officially, big-endian only) */

#define EM_MIPS_RS4_BE 10 /* MIPS R4000 big-endian */

#define EM_PARISC 15 /* HPPA */

#define EM_SPARC32PLUS 18 /* Sun's "v8plus" */

#define EM_PPC   20 /* PowerPC */

#define EM_PPC64 21 /* PowerPC64 */

#define EM_ARM  40  /* ARM */

#define EM_SH  42 /* SuperH */

#define EM_SPARCV9 43 /* SPARC v9 64-bit */

#define EM_IA_64  50  /* HP/Intel IA-64 */

#define EM_X86_64 62  /* AMD x86-64 */

#define EM_S390 22  /* IBM S/390 */

#define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */

#define EM_V850 87  /* NEC v850 */

#define EM_H8_300H 47 /* Hitachi H8/300H */

#define EM_H8S 48 /* Hitachi H8S */

/*

* This is an interim value that we will use until the committee comes

* up with a final number.

*/

#define EM_ALPHA  0x9026

/* Bogus old v850 magic number, used by old tools. */

#define EM_CYGNUS_V850  0x9080

/*

* This is the old interim value for S/390 architecture

*/

#define EM_S390_OLD 0xA390

/* This is the info that is needed to parse the dynamic section of the file */

#define DT_NULL 0

#define DT_NEEDED 1

#define DT_PLTRELSZ 2

#define DT_PLTGOT 3

#define DT_HASH 4

#define DT_STRTAB 5

#define DT_SYMTAB 6

#define DT_RELA 7

#define DT_RELASZ 8

#define DT_RELAENT  9

#defin

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值