Februray 28th Wednesday （一月　二十八日　水曜日）-优快云博客

本文链接：https://blog.youkuaiyun.com/Lu_ming/article/details/1518498

作者在生日当天用一整天的时间编写了一个汇编语言程序，该程序能够将文本文件中的字符转换为大写或小写。在开发过程中遇到了一些bug，如意外修改寄存器%eax的内容和将常量误设为指针等问题。通过调试发现了更隐蔽的bug，即两个功能函数之间的跳转错误。最后分享了一些汇编语言开发的经验教训。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

今日は私の誕生日です。

　　Today is my birthday. I took a whole work day to write a small program in assembly language. This program can convert upper characters
to lower, or convert that lower to upper. The source of this program is more than 270 lines.

During developing I met some bugs. One of them is to change the content of register %eax unconsciously. Another is to set a contants as
a pointer, by the way, I ignore of the prefix of the contants -- $. Those bugs is easy to find in that the "segment fault" often occured.

However, there is a bug that is is hard to catch. I wrote two functions, one is usd to convert characters into upper, another is used to
convert into lower. During debuging I found my program can not convert characters from a text file to lower. After fight I got the root of
the problem. When writting the function "convert_to_lower" I just have modified the copy of the function "convert_to_upper". Although I change
the most labels or macros for avoiding conflicts, I ignored of those lables which is at the end of jump instructions. So, the flow jumped
from the "convert_to_lower" back to "convert_to_upper". The function "convert_to_upper" converted characters to upper.

In C language, the C compiler usually can prevent from using "goto" statement to jump between functions. But in Assembly language, there
is no that mechanism. So, you had better separate functions into various files, don't put them together.

Today is the last day of my 29 years. I have basically finished studying on developing software base on x86 architecture. In actual fact,
this time is just a concise reviwe of assembly developing. In other words, I basically finished the courses of software developing from top
to bottom. Of course, what I need studying is still immense, what I known is still nothing.

The next step, I will study a branch of computer science technology. I will not spend more time in learning a new program language except
for necessity.

Finally, I copied the source of "ud" here. It's convenient to review. This is also a good example! ^_^

If you are reading my blog, you can down it, and compile it by using "AS" at linux.

The command is "as ud.s -o ud.o", "ld ud.o -o ud -lc -dynamic-linker /lib/ld-linux.so.2". Try it!!!

#############################
# ud < u | d > filename | - #
#############################

.equ TRUE, 1
.equ FALSE, 0

.equ LINUX_SYSCALL, 0x80

## standard file descriptors ##
.equ STDIN, 0
.equ STDOUT, 1
.equ STDERR, 2

## file operation ##
.equ OPEN_FILE, 5
.equ CLOSE_FILE, 6
.equ READ_FILE, 3
.equ WRITE_FILE, 4
.equ O_RDONLY, 0
.equ EOF, 0

.equ SYS_EXIT, 1

## arguments ##
.equ ARGC, 0
.equ ARGV0, 4
.equ ARGV1, 8
.equ ARGV2, 12

## stack positions ##
.equ ST_SIZE_RESERVE, 12
.equ ST_CLOSE_FLAG, -8
.equ ST_FD_IN, -4

.section .data
## error messages ##
err_open_msg:
.ascii "Can't open file %s /n/0"
err_opt_msg:
.ascii "The option is error./n/0"

.section .bss
.equ BUFFER_SIZE, 500
.lcomm BUFFER_DATA, BUFFER_SIZE

.section .text
.globl _start
_start:
movl %esp, %ebp
subl $ST_SIZE_RESERVE, %esp  # local variables

## check the number of arguments.
movl ARGC(%ebp), %eax
cmpl $3, %eax
jne show_usage

## check arg2
xorl %edi, %edi    # clear the %edi register
movl ARGV2(%ebp), %eax
movb (%eax, %edi, 1), %cl
cmpb $45, %cl    # 45 is the ascii code of '-'
jne openfile
movl $STDOUT, ST_FD_IN(%ebp)
movl $FALSE, ST_CLOSE_FLAG(%ebp) # set CLOSE_FLAG to FALSE
jmp main_loop

openfile:
## open file ##
movl $OPEN_FILE, %eax
movl ARGV2(%ebp), %ebx
movl $O_RDONLY, %ecx
movl $0666, %edx
int $LINUX_SYSCALL
movl %eax, ST_FD_IN(%ebp) # store file descriptor.
cmpl $0, %eax
jl error_open_handler
movl $TRUE, ST_CLOSE_FLAG(%ebp)
jmp main_loop

error_open_handler:
pushl ARGV2(%ebp)
pushl $err_open_msg
call printf
jmp finish

main_loop:
## read in a block data from the input file ##
movl $READ_FILE, %eax
movl ST_FD_IN(%ebp), %ebx
movl $BUFFER_DATA, %ecx
movl $BUFFER_SIZE, %edx
int $LINUX_SYSCALL

## check for the end of file marker
cmpl $EOF, %eax
jle end_up

## check the case flag.

## check arg1
pushl %eax
xorl %edi, %edi # clear the %edi register.
movl ARGV1(%ebp), %eax
movb (%eax, %edi, 1), %cl
popl %eax

#check 'd' option
cmpb $'d', %cl
je down_case

# check 'u' option
cmpb $'u', %cl
je upper_case

opt_err:
pushl $err_opt_msg
call printf
jmp finish

down_case:
pushl $BUFFER_DATA
pushl %eax
call convert_to_lower
popl %eax
addl $4, %esp
jmp write_data

upper_case:
pushl $BUFFER_DATA
pushl %eax
call convert_to_upper
popl %eax
addl $4, %esp

write_data:
## write the block out to the stdout
movl %eax, %edx
movl $WRITE_FILE, %eax
movl $STDOUT, %ebx
movl $BUFFER_DATA, %ecx
int $LINUX_SYSCALL

## continue the loop.
jmp main_loop

show_usage:
call print_usage

end_up:
movl ST_CLOSE_FLAG(%ebp), %eax
andl %eax, %eax
jz finish

## close file ##
movl $CLOSE_FILE, %eax
movl ST_FD_IN(%ebp), %ebx
int $LINUX_SYSCALL

finish:
pushl $0
call exit

## print usage ##
.section .data
usage:
.ascii "ud <u | d> <filename | -> /n/0"
.globl print_usage
.type print_udage, @function
print_usage:
pushl %ebp
movl %esp, %ebp

pushl $usage
call printf

leave
ret

## convert to upper case ##
.equ UPPER_CONVERSION, 'A' - 'a'
.equ ST_BUFFER_LEN, 8
.equ ST_BUFFER, 12

.globl convert_to_upper
.type convert_to_upper, @function
convert_to_upper:
pushl %ebp
movl %esp, %ebp

## set up variables
movl ST_BUFFER(%ebp), %eax
movl ST_BUFFER_LEN(%ebp), %ebx

xorl %edi, %edi

# if a buffer with zero length was given
# to us, just leave
cmpl $0, %ebx
je end_convert_loop

convert_loop:
# get the current byte
movb (%eax, %edi, 1), %cl

# go to the next byte unless it is between
# 'a' and 'z'
cmpb $'a', %cl
jl next_byte
cmpb $'z', %cl
jg next_byte

addb $UPPER_CONVERSION, %cl
movb %cl, (%eax, %edi, 1)
next_byte:
incl %edi
cmpl %edi, %ebx # continue unless reached the end
jne convert_loop

end_convert_loop:
movl %ebp, %esp
popl %ebp
ret

## convert to lower case ##
.equ DOWN_CONVERSION, 'a' - 'A'
.equ BUFFER_LEN, 8
.equ BUFFER, 12

.globl convert_to_lower
.type convert_to_lower, @function
convert_to_lower:
pushl %ebp
movl %esp, %ebp

## set up variables
movl BUFFER(%ebp), %eax
movl BUFFER_LEN(%ebp), %ebx

xorl %edi, %edi

# if a buffer with zero length was given
# to us, just leave
cmpl $0, %ebx
je end_convert

convert_down_loop:
# get the current byte
movb (%eax, %edi, 1), %cl

# go to the next byte unless it is between
# 'A' and 'Z'
cmpb $'A', %cl
jl next_char
cmpb $'Z', %cl
jg next_char

addb $DOWN_CONVERSION, %cl
movb %cl, (%eax, %edi, 1)
next_char:
incl %edi
cmpl %edi, %ebx # continue unless reached the end
jne convert_down_loop

end_convert:
movl %ebp, %esp
popl %ebp
ret