gcc对C语言的编译分为四个步骤:
预处理
预处理阶段会针对#开头的语句做处理,头文件和宏定义会展开直接插入到代码中,但是比如#if这样的判断会根据条件是否满足做展开处理。
#include <stdio.h> //预处理阶段此处会展开
#include "io.h" //预处理阶段会在此处展开
typedef struct
{
int a;
char b;
short c;
}ST_HELLO;
#define WHILE(value) while((value) < 10) //预处理阶段此处会展开
int main()
{
int i = 0;
show("hello");
printf("%lu\n", sizeof(ST_HELLO));
WHILE(i+1)
{
printf("hello\n");
i++;
}
#if 0 //根据条件是否满足判断是否展开
printf("Yes\n");
#else
printf("NO\n");
#endif
}
接下来用作预处理,gcc -E hello.c -o hello.i ; -E选项是预处理选项,-o则是输出命令,由于stdio.h太大了,这里只截取一部分
extern char *ctermid (char *__s) __attribute__ ((__nothrow__ , __leaf__));
# 912 "/usr/include/stdio.h" 3 4
extern void flockfile (FILE *__stream) __attribute__ ((__nothrow__ , __leaf__));
extern int ftrylockfile (FILE *__stream) __attribute__ ((__nothrow__ , __leaf__)) ;
extern void funlockfile (FILE *__stream) __attribute__ ((__nothrow__ , __leaf__));
# 942 "/usr/include/stdio.h" 3 4
# 2 "hello.c" 2
# 1 "io.h" 1
# 1 "io.h"
extern void show(char *s);
# 3 "hello.c" 2
typedef struct
{
int a;
char b;
short c;
}ST_HELLO;
int main()
{
int i = 0;
show("hello");
printf("%lu\n", sizeof(ST_HELLO));
while((i+1) < 10)
{
printf("hello\n");
i++;
}
printf("NO\n");
}
很明显,头文件宏定义被展开,#if 0语句也被去除,其他没有变化,甚至多打的换行业依然存在,接下来就是编译阶段了。
编译
编译阶段会把展开后hello.i翻译成汇编语言:gcc -Og -S hello.c
.file "hello.c"
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "hello"
.LC1:
.string "%lu\n"
.LC2:
.string "NO"
.text
.globl main
.type main, @function
main:
.LFB23:
.cfi_startproc
pushq %rbx //将i压栈
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
movl $.LC0, %edi
call show //调用show函数
movl $8, %edx //这里编译器已经将sizeof(ST_HELLO)的大小算出来了=8
movl $.LC1, %esi
movl $1, %edi
movl $0, %eax
call __printf_chk
movl $0, %ebx
jmp .L2
.L3:
movl $.LC0, %edi
call puts
.L2:
addl $1, %ebx
cmpl $9, %ebx
jle .L3
movl $.LC2, %edi
call puts
movl $0, %eax
popq %rbx
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE23:
.size main, .-main
.ident "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.12) 5.4.0 20160609"
.section .note.GNU-stack,"",@progbits
有意思的是这里已经将sizeof(ST_HELLO)的值算出来的,也就是说后面程序运行的时候就不需要再计算了。
这里代码比较难以理解,可以使用反汇编逐行对比:objdump -d -S -l hello
汇编
汇编将hello.s翻译成机器语言,生成一个重定位文件hello.o
gcc -c hello.c io.c 可以使用readelf命令查看hello.o的内容
ELF头 | 编译及系统信息 |
.text | 已经编译程序的机器代码 |
.rodata | 只读数据 |
.data | 已经初始化的全局和静态变量 |
.bss | 未初始化(初始化为0)的全局和静态变量 |
.symtab | 符号表:全局变量 函数 |
.rel.text | .text节的位置表,链接时用 |
.rel.data | .data节的位置表,链接时用 |
.debug | 调试符号表 |
.line | 代码行信息 |
.strtab | 字符串表 |
ELF Header:
Magic: 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00
Class: ELF64
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: REL (Relocatable file)
Machine: Advanced Micro Devices X86-64
Version: 0x1
Entry point address: 0x0
Start of program headers: 0 (bytes into file)
Start of section headers: 2192 (bytes into file)
Flags: 0x0
Size of this header: 64 (bytes)
Size of program headers: 0 (bytes)
Number of program headers: 0
Size of section headers: 64 (bytes)
Number of section headers: 21
Section header string table index: 18
Section Headers:
[Nr] Name Type Address Offset
Size EntSize Flags Link Info Align
[ 0] NULL 0000000000000000 00000000
0000000000000000 0000000000000000 0 0 0
[ 1] .text PROGBITS 0000000000000000 00000040
0000000000000059 0000000000000000 AX 0 0 1
[ 2] .rela.text RELA 0000000000000000 00000528
00000000000000c0 0000000000000018 I 19 1 8
[ 3] .data PROGBITS 0000000000000000 00000099
0000000000000000 0000000000000000 WA 0 0 1
[ 4] .bss NOBITS 0000000000000000 00000099
0000000000000000 0000000000000000 WA 0 0 1
[ 5] .rodata PROGBITS 0000000000000000 00000099
000000000000000e 0000000000000000 A 0 0 1
[ 6] .debug_info PROGBITS 0000000000000000 000000a7
000000000000009e 0000000000000000 0 0 1
[ 7] .rela.debug_info RELA 0000000000000000 000005e8
0000000000000198 0000000000000018 I 19 6 8
[ 8] .debug_abbrev PROGBITS 0000000000000000 00000145
0000000000000051 0000000000000000 0 0 1
[ 9] .debug_aranges PROGBITS 0000000000000000 00000196
0000000000000030 0000000000000000 0 0 1
[10] .rela.debug_arang RELA 0000000000000000 00000780
0000000000000030 0000000000000018 I 19 9 8
[11] .debug_line PROGBITS 0000000000000000 000001c6
0000000000000045 0000000000000000 0 0 1
[12] .rela.debug_line RELA 0000000000000000 000007b0
0000000000000018 0000000000000018 I 19 11 8
[13] .debug_str PROGBITS 0000000000000000 0000020b
00000000000000db 0000000000000001 MS 0 0 1
[14] .comment PROGBITS 0000000000000000 000002e6
0000000000000036 0000000000000001 MS 0 0 1
[15] .note.GNU-stack PROGBITS 0000000000000000 0000031c
0000000000000000 0000000000000000 0 0 1
[16] .eh_frame PROGBITS 0000000000000000 00000320
0000000000000038 0000000000000000 A 0 0 8
[17] .rela.eh_frame RELA 0000000000000000 000007c8
0000000000000018 0000000000000018 I 19 16 8
[18] .shstrtab STRTAB 0000000000000000 000007e0
00000000000000b0 0000000000000000 0 0 1
[19] .symtab SYMTAB 0000000000000000 00000358
00000000000001b0 0000000000000018 20 14 8
[20] .strtab STRTAB 0000000000000000 00000508
000000000000001f 0000000000000000 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings), l (large)
I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)
O (extra OS processing required) o (OS specific), p (processor specific)
There are no section groups in this file.
There are no program headers in this file.
Relocation section '.rela.text' at offset 0x528 contains 8 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000000010 00050000000a R_X86_64_32 0000000000000000 .rodata + 0
000000000015 000f00000002 R_X86_64_PC32 0000000000000000 show - 4
00000000001f 00050000000a R_X86_64_32 0000000000000000 .rodata + 6
000000000029 001000000002 R_X86_64_PC32 0000000000000000 printf - 4
000000000030 00050000000a R_X86_64_32 0000000000000000 .rodata + 0
000000000035 001100000002 R_X86_64_PC32 0000000000000000 puts - 4
000000000049 00050000000a R_X86_64_32 0000000000000000 .rodata + b
00000000004e 001100000002 R_X86_64_PC32 0000000000000000 puts - 4
Relocation section '.rela.debug_info' at offset 0x5e8 contains 17 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000000006 00070000000a R_X86_64_32 0000000000000000 .debug_abbrev + 0
00000000000c 000a0000000a R_X86_64_32 0000000000000000 .debug_str + 8b
000000000011 000a0000000a R_X86_64_32 0000000000000000 .debug_str + 5b
000000000015 000a0000000a R_X86_64_32 0000000000000000 .debug_str + 63
000000000019 000200000001 R_X86_64_64 0000000000000000 .text + 0
000000000029 00090000000a R_X86_64_32 0000000000000000 .debug_line + 0
000000000030 000a0000000a R_X86_64_32 0000000000000000 .debug_str + d
000000000037 000a0000000a R_X86_64_32 0000000000000000 .debug_str + 1f
00000000003e 000a0000000a R_X86_64_32 0000000000000000 .debug_str + 39
000000000045 000a0000000a R_X86_64_32 0000000000000000 .debug_str + 0
00000000004c 000a0000000a R_X86_64_32 0000000000000000 .debug_str + 2d
000000000053 000a0000000a R_X86_64_32 0000000000000000 .debug_str + 4c
000000000061 000a0000000a R_X86_64_32 0000000000000000 .debug_str + 74
000000000068 000a0000000a R_X86_64_32 0000000000000000 .debug_str + 7d
00000000006f 000a0000000a R_X86_64_32 0000000000000000 .debug_str + 56
000000000074 000a0000000a R_X86_64_32 0000000000000000 .debug_str + 86
00000000007e 000200000001 R_X86_64_64 0000000000000000 .text + 0
Relocation section '.rela.debug_aranges' at offset 0x780 contains 2 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000000006 00060000000a R_X86_64_32 0000000000000000 .debug_info + 0
000000000010 000200000001 R_X86_64_64 0000000000000000 .text + 0
Relocation section '.rela.debug_line' at offset 0x7b0 contains 1 entries:
Offset Info Type Sym. Value Sym. Name + Addend
00000000002b 000200000001 R_X86_64_64 0000000000000000 .text + 0
Relocation section '.rela.eh_frame' at offset 0x7c8 contains 1 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000000020 000200000002 R_X86_64_PC32 0000000000000000 .text + 0
The decoding of unwind sections for machine type Advanced Micro Devices X86-64 is not currently supported.
Symbol table '.symtab' contains 18 entries:
Num: Value Size Type Bind Vis Ndx Name
0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
1: 0000000000000000 0 FILE LOCAL DEFAULT ABS hello.c
2: 0000000000000000 0 SECTION LOCAL DEFAULT 1
3: 0000000000000000 0 SECTION LOCAL DEFAULT 3
4: 0000000000000000 0 SECTION LOCAL DEFAULT 4
5: 0000000000000000 0 SECTION LOCAL DEFAULT 5
6: 0000000000000000 0 SECTION LOCAL DEFAULT 6
7: 0000000000000000 0 SECTION LOCAL DEFAULT 8
8: 0000000000000000 0 SECTION LOCAL DEFAULT 9
9: 0000000000000000 0 SECTION LOCAL DEFAULT 11
10: 0000000000000000 0 SECTION LOCAL DEFAULT 13
11: 0000000000000000 0 SECTION LOCAL DEFAULT 15
12: 0000000000000000 0 SECTION LOCAL DEFAULT 16
13: 0000000000000000 0 SECTION LOCAL DEFAULT 14
14: 0000000000000000 89 FUNC GLOBAL DEFAULT 1 main
15: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND show
16: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND printf
17: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND puts
No version information found in this file.
链接
将可重定位文件合并为可执行文件,可执行文件也同样是ELF格式,这个阶段会将每个重定位文件的节合并在一起。