今日は私の誕生日です。
Today is my birthday. I took a whole work day to write a small program in assembly language. This program can convert upper characters
to lower, or convert that lower to upper. The source of this program is more than 270 lines.
During developing I met some bugs. One of them is to change the content of register %eax unconsciously. Another is to set a contants as
a pointer, by the way, I ignore of the prefix of the contants -- $. Those bugs is easy to find in that the "segment fault" often occured.
However, there is a bug that is is hard to catch. I wrote two functions, one is usd to convert characters into upper, another is used to
convert into lower. During debuging I found my program can not convert characters from a text file to lower. After fight I got the root of
the problem. When writting the function "convert_to_lower" I just have modified the copy of the function "convert_to_upper". Although I change
the most labels or macros for avoiding conflicts, I ignored of those lables which is at the end of jump instructions. So, the flow jumped
from the "convert_to_lower" back to "convert_to_upper". The function "convert_to_upper" converted characters to upper.
In C language, the C compiler usually can prevent from using "goto" statement to jump between functions. But in Assembly language, there
is no that mechanism. So, you had better separate functions into various files, don't put them together.
Today is the last day of my 29 years. I have basically finished studying on developing software base on x86 architecture. In actual fact,
this time is just a concise reviwe of assembly developing. In other words, I basically finished the courses of software developing from top
to bottom. Of course, what I need studying is still immense, what I known is still nothing.
The next step, I will study a branch of computer science technology. I will not spend more time in learning a new program language except
for necessity.
Finally, I copied the source of "ud" here. It's convenient to review. This is also a good example! ^_^
If you are reading my blog, you can down it, and compile it by using "AS" at linux.
The command is "as ud.s -o ud.o", "ld ud.o -o ud -lc -dynamic-linker /lib/ld-linux.so.2". Try it!!!
#############################
# ud < u | d > filename | - #
#############################
.equ TRUE, 1
.equ FALSE, 0
.equ LINUX_SYSCALL, 0x80
## standard file descriptors ##
.equ STDIN, 0
.equ STDOUT, 1
.equ STDERR, 2
## file operation ##
.equ OPEN_FILE, 5
.equ CLOSE_FILE, 6
.equ READ_FILE, 3
.equ WRITE_FILE, 4
.equ O_RDONLY, 0
.equ EOF, 0
.equ SYS_EXIT, 1
## arguments ##
.equ ARGC, 0
.equ ARGV0, 4
.equ ARGV1, 8
.equ ARGV2, 12
## stack positions ##
.equ ST_SIZE_RESERVE, 12
.equ ST_CLOSE_FLAG, -8
.equ ST_FD_IN, -4
.section .data
## error messages ##
err_open_msg:
.ascii "Can't open file %s /n/0"
err_opt_msg:
.ascii "The option is error./n/0"
.section .bss
.equ BUFFER_SIZE, 500
.lcomm BUFFER_DATA, BUFFER_SIZE
.section .text
.globl _start
_start:
movl %esp, %ebp
subl $ST_SIZE_RESERVE, %esp # local variables
## check the number of arguments.
movl ARGC(%ebp), %eax
cmpl $3, %eax
jne show_usage
## check arg2
xorl %edi, %edi # clear the %edi register
movl ARGV2(%ebp), %eax
movb (%eax, %edi, 1), %cl
cmpb $45, %cl # 45 is the ascii code of '-'
jne openfile
movl $STDOUT, ST_FD_IN(%ebp)
movl $FALSE, ST_CLOSE_FLAG(%ebp) # set CLOSE_FLAG to FALSE
jmp main_loop
openfile:
## open file ##
movl $OPEN_FILE, %eax
movl ARGV2(%ebp), %ebx
movl $O_RDONLY, %ecx
movl $0666, %edx
int $LINUX_SYSCALL
movl %eax, ST_FD_IN(%ebp) # store file descriptor.
cmpl $0, %eax
jl error_open_handler
movl $TRUE, ST_CLOSE_FLAG(%ebp)
jmp main_loop
error_open_handler:
pushl ARGV2(%ebp)
pushl $err_open_msg
call printf
jmp finish
main_loop:
## read in a block data from the input file ##
movl $READ_FILE, %eax
movl ST_FD_IN(%ebp), %ebx
movl $BUFFER_DATA, %ecx
movl $BUFFER_SIZE, %edx
int $LINUX_SYSCALL
## check for the end of file marker
cmpl $EOF, %eax
jle end_up
## check the case flag.
## check arg1
pushl %eax
xorl %edi, %edi # clear the %edi register.
movl ARGV1(%ebp), %eax
movb (%eax, %edi, 1), %cl
popl %eax
#check 'd' option
cmpb $'d', %cl
je down_case
# check 'u' option
cmpb $'u', %cl
je upper_case
opt_err:
pushl $err_opt_msg
call printf
jmp finish
down_case:
pushl $BUFFER_DATA
pushl %eax
call convert_to_lower
popl %eax
addl $4, %esp
jmp write_data
upper_case:
pushl $BUFFER_DATA
pushl %eax
call convert_to_upper
popl %eax
addl $4, %esp
write_data:
## write the block out to the stdout
movl %eax, %edx
movl $WRITE_FILE, %eax
movl $STDOUT, %ebx
movl $BUFFER_DATA, %ecx
int $LINUX_SYSCALL
## continue the loop.
jmp main_loop
show_usage:
call print_usage
end_up:
movl ST_CLOSE_FLAG(%ebp), %eax
andl %eax, %eax
jz finish
## close file ##
movl $CLOSE_FILE, %eax
movl ST_FD_IN(%ebp), %ebx
int $LINUX_SYSCALL
finish:
pushl $0
call exit
## print usage ##
.section .data
usage:
.ascii "ud <u | d> <filename | -> /n/0"
.globl print_usage
.type print_udage, @function
print_usage:
pushl %ebp
movl %esp, %ebp
pushl $usage
call printf
leave
ret
## convert to upper case ##
.equ UPPER_CONVERSION, 'A' - 'a'
.equ ST_BUFFER_LEN, 8
.equ ST_BUFFER, 12
.globl convert_to_upper
.type convert_to_upper, @function
convert_to_upper:
pushl %ebp
movl %esp, %ebp
## set up variables
movl ST_BUFFER(%ebp), %eax
movl ST_BUFFER_LEN(%ebp), %ebx
xorl %edi, %edi
# if a buffer with zero length was given
# to us, just leave
cmpl $0, %ebx
je end_convert_loop
convert_loop:
# get the current byte
movb (%eax, %edi, 1), %cl
# go to the next byte unless it is between
# 'a' and 'z'
cmpb $'a', %cl
jl next_byte
cmpb $'z', %cl
jg next_byte
addb $UPPER_CONVERSION, %cl
movb %cl, (%eax, %edi, 1)
next_byte:
incl %edi
cmpl %edi, %ebx # continue unless reached the end
jne convert_loop
end_convert_loop:
movl %ebp, %esp
popl %ebp
ret
## convert to lower case ##
.equ DOWN_CONVERSION, 'a' - 'A'
.equ BUFFER_LEN, 8
.equ BUFFER, 12
.globl convert_to_lower
.type convert_to_lower, @function
convert_to_lower:
pushl %ebp
movl %esp, %ebp
## set up variables
movl BUFFER(%ebp), %eax
movl BUFFER_LEN(%ebp), %ebx
xorl %edi, %edi
# if a buffer with zero length was given
# to us, just leave
cmpl $0, %ebx
je end_convert
convert_down_loop:
# get the current byte
movb (%eax, %edi, 1), %cl
# go to the next byte unless it is between
# 'A' and 'Z'
cmpb $'A', %cl
jl next_char
cmpb $'Z', %cl
jg next_char
addb $DOWN_CONVERSION, %cl
movb %cl, (%eax, %edi, 1)
next_char:
incl %edi
cmpl %edi, %ebx # continue unless reached the end
jne convert_down_loop
end_convert:
movl %ebp, %esp
popl %ebp
ret