通过sizeof和strlen认识字符串常量
字符串型常量
字符串常量是双引号中的字符序列(可能是空的)。可以用字符常量所用的转义机制表示字符串中的字符。标准C语言允许在字符串型常量前面加上L前缀来指定宽字符串常量。
对每个n字符的非宽字符串常量,运行时静态分配n+1个字符的内存块,其中前n个字符是字符串中的字符,最后一个字符是null字符'/0'。这个内存块是字符串常量的值,类型为char[n+1]。同样,宽字符串常量变成n个宽字符加上一个宽null字符,类型为wchar_t [n+1]。
-
#include <stdio.h>
-
#include <string.h>
-
-
int
-
main ( int argc, char *argv [ ] )
-
{
-
char str [ ] = "hello";
-
-
sizeof (str ) );
-
strlen ( "hello" ), "hello", sizeof ( "hello" ) );
-
sizeof ( "" ) );
-
return 0;
-
}
结果:
strlen("hello") = 5 sizeof("hello") = 6
strlen("") = 0 sizeof("") = 1
分析:
sizeof操作符返回操作数的长度,而strlen函数返回字符串中的字符数。因此,sizeof("hello")的返回值是6而不是5,sizeof("")的返回值是1而不是0;而strlen("hello")的返回值是5,strlen("")的返回值是0。
-
.file "a.c"
-
.section .rodata #只读数据段
-
. align 4
-
.LC1:
-
.string "strlen(/"%s/ ") = %d sizeof(/"%s/ ") = %d/n" #只读
-
.LC0:
-
.string "hello" #只读
-
.LC2:
-
.string ""
-
.text
-
.globl main
-
. type main, @function
-
main:
-
leal 4 (% esp ), % ecx
-
andl $ -16, % esp
-
pushl -4 (% ecx )
-
pushl % ebp
-
movl % esp, % ebp
-
pushl % edi
-
pushl % ecx
-
subl $ 48, % esp
-
movl .LC0, % eax
-
movl % eax, -14 (% ebp )
-
movzwl .LC0 +4, % eax
-
movw % ax, -10 (% ebp )
-
leal -14 (% ebp ), % eax
-
movl $ -1, % ecx
-
movl % eax, -28 (% ebp )
-
movl $ 0, % eax
-
cld
-
movl -28 (% ebp ), % edi
-
repnz
-
scasb
-
movl % ecx, % eax
-
notl % eax
-
leal -1 (% eax ), % edx
-
movl $ 6, 16 (% esp )
-
leal -14 (% ebp ), % eax
-
movl % eax, 12 (% esp )
-
movl % edx, 8 (% esp )
-
leal -14 (% ebp ), % eax
-
movl % eax, 4 (% esp )
-
movl $.LC1, (% esp )
-
call printf
-
movl $ 6, 16 (% esp )
-
movl $.LC0, 12 (% esp )
-
movl $ 5, 8 (% esp )
-
movl $.LC0, 4 (% esp )
-
movl $.LC1, (% esp )
-
call printf
-
movl $ 1, 16 (% esp )
-
movl $.LC2, 12 (% esp )
-
movl $ 0, 8 (% esp )
-
movl $.LC2, 4 (% esp )
-
movl $.LC1, (% esp )
-
call printf
-
movl $ 0, % eax
-
addl $ 48, % esp
-
popl % ecx
-
popl % edi
-
popl % ebp
-
leal -4 (% ecx ), % esp
-
ret
-
. size main, .-main
-
.ident "GCC: (GNU) 4.1.2 (Ubuntu 4.1.2-0ubuntu4)"
-
.section .note.GNU- stack, "",@progbits
存储字符串型常量
不能修改保存字符串型常量字符的内存,因为这个内存可能是只读的,即物理上是防止修改的。有些函数(如mktemp)要接受就地修改的字符串指针,此时不要向这些函数传递字符串型常量,而要将这个字符串型常量的内存初始化到一个非const字符数组中,然后传递数组第一个元素的地址。
-
#include <stdio.h>
-
-
int main ( int argc, char* argv [ ] )
-
{
-
char p1 [ ] = "Always writable";
-
char *p2 = "Possibly not writable";
-
const char p3 [ ] = "Never writable"; /* Standard C only */
-
p1 [ 0 ] = 'a';
-
p2 [ 0 ] = 'p'; /* runtime error: segment error*/
-
p3 [ 0 ] = 'n'; /* compile error: error induced by writting data into read-only postion */
-
return 0;
-
}
p1、p2与p3的值都是字符数组的指针,但其可写性不同。赋值语句p1[0] =' a'总是可行的,p2[0] = 'p'会造成运行时错误,而p3[0]='n'总是会造成编译错误,这里由const的含义决定的。
-
#include <stdio.h>
-
-
int main ( int argc, char* argv [ ] )
-
{
-
char p1 [ ] = "Always writable";
-
char *p2 = "Possibly not writable";
-
const char p3 [ ] = "Never writable"; /* Standard C only */
-
//p1[0] = 'a';
-
//p2[0] = 'p'; /* runtime error: segment error*/
-
//p3[0] = 'n'; /* compile error: error induced by writting data into read-only postion */
-
return 0;
-
}
-
-
.file "b.c"
-
.section .rodata
-
.LC1:
-
.string "Possibly not writable"
-
.LC0:
-
.string "Always writable"
-
.LC2:
-
.string "Never writable"
-
.text
-
.globl main
-
.type main, @function
-
main:
-
leal 4(%esp), %ecx
-
andl $-16, %esp
-
pushl -4(%ecx)
-
pushl %ebp
-
movl %esp, %ebp
-
pushl %ecx
-
subl $52, %esp
-
movl 4(%ecx), %eax
-
movl %eax, -56(%ebp)
-
movl %gs:20, %eax
-
movl %eax, -8(%ebp)
-
xorl %eax, %eax
-
movl .LC0, %eax
-
movl %eax, -39(%ebp)
-
movl .LC0+4, %eax
-
movl %eax, -35(%ebp)
-
movl .LC0+8, %eax
-
movl %eax, -31(%ebp)
-
movl .LC0+12, %eax
-
movl %eax, -27(%ebp)
-
movl $.LC1, -44(%ebp)
-
movl .LC2, %eax
-
movl %eax, -23(%ebp)
-
movl .LC2+4, %eax
-
movl %eax, -19(%ebp)
-
movl .LC2+8, %eax
-
movl %eax, -15(%ebp)
-
movzwl .LC2+12, %eax
-
movw %ax, -11(%ebp)
-
movzbl .LC2+14, %eax
-
movb %al, -9(%ebp)
-
movl $0, %eax
-
movl -8(%ebp), %edx
-
xorl %gs:20, %edx
-
je .L3
-
call __stack_chk_fail
-
.L3:
-
addl $52, %esp
-
popl %ecx
-
popl %ebp
-
leal -4(%ecx), %esp
-
ret
-
.size main, .-main
-
.ident "GCC: (GNU) 4.1.2 (Ubuntu 4.1.2-0ubuntu4)"
-
.section .note.GNU-stack,"",@progbits
__stack_chk_fail干什么用的???困惑中......
===========================================================================================
到linuxquestion.org上询问了一下,老外真是热心肠啊,很快就得到了答案。在bbs@ustc,和linuxforum.net上问了都没有人回答啊,这俩个地方不行啊,很少能遇到技术好的并且热心肠的人。以后到国外的论坛上混了。
from : http://www.linuxquestions.org/questions/showthread.php?t=584863
hello there,
In the process of dissecting an example program from C: A Reference Manual (Fifth Edition) as follows:
=======================================
#include <stdio.h>
int main(int argc, char* argv[])
{
char p1[] = "Always writable";
char *p2 = "Possibly not writable";
const char p3[] = "Never writable"; /* Standard C only */
return 0;
}
=======================================
A puzzle shocked me after I got the assembly of above program:
=======================================
.file "b.c"
.section .rodata
.LC1:
.string "Possibly not writable"
.LC0:
.string "Always writable"
.LC2:
.string "Never writable"
.text
.globl main
.type main, @function
main:
leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
movl %esp, %ebp
pushl %ecx
subl $52, %esp
movl 4(%ecx), %eax
movl %eax, -56(%ebp)
movl %gs:20, %eax
movl %eax, -8(%ebp)
xorl %eax, %eax
movl .LC0, %eax
movl %eax, -39(%ebp)
movl .LC0+4, %eax
movl %eax, -35(%ebp)
movl .LC0+8, %eax
movl %eax, -31(%ebp)
movl .LC0+12, %eax
movl %eax, -27(%ebp)
movl $.LC1, -44(%ebp)
movl .LC2, %eax
movl %eax, -23(%ebp)
movl .LC2+4, %eax
movl %eax, -19(%ebp)
movl .LC2+8, %eax
movl %eax, -15(%ebp)
movzwl .LC2+12, %eax
movw %ax, -11(%ebp)
movzbl .LC2+14, %eax
movb %al, -9(%ebp)
movl $0, %eax
movl -8(%ebp), %edx
xorl %gs:20, %edx
je .L3
call __stack_chk_fail
.L3:
addl $52, %esp
popl %ecx
popl %ebp
leal -4(%ecx), %esp
ret
.size main, .-main
.ident "GCC: (GNU) 4.1.2 (Ubuntu 4.1.2-0ubuntu4)"
.section .note.GNU-stack,"",@progbits
=============================================
what is "__stack_chk_fail" for in the assembly?
replied by paulsm4
Hi -
This code is simply doing a "sanity check" of the stack before it pops the stack and does a "return":
...
movl -8(%ebp), %edx
xorl %gs:20, %edx
je .L3
call __stack_chk_fail
.L3:
...
If everything's OK, then the CPU's zero flag will be set, we'll jump to .L3, and we will not call "__stack_chk_fail".
Here's a link describing the GS register (%gs) and "protected mode" memory addressing issues in a bit more detail:
http://my.execpc.com/~geezer/johnfine/segments.htm