上学期在编译实习课上在Window环境中用Lex/yacc完成了一个不大不小的Cm(C minus,呵呵,比C小多了)编译器, 而今天做体系实习2,要求设计一个新的指令系统,然后写汇编,再到二进制,再到simplescalar执行.汇编到二进制要是手工就很麻烦了.
由于汇编到二进制,基本上是直接翻译就可,所以可以使用awk来完成.但我花了一个通宵的时间(也不算,还做了别的事情),在Linux环境下使用GNU的lex(flex)和Yacc(bison),写了一个简单的汇编器,贴出来,让初学者看看,如果你是高手,就找找毛病啦,我的没有进行错误处理,默认输入是stdin,并且是正确的.
首先先看一个写好的简单汇编代码.
















































呵呵,是一个Insert Sort排序,对上面的十个数排序.
要生成二进制代码和二进制文件.我的Lex/Yacc的程序是在控制台里运行,标准输入输出.文件只需使用管道就好.
下面首先是lex文件:
%
{
#include " myp.tab.h "
extern " C " {
int yywrap( void );
int yylex( void );
}
% }
% x cc //用来处理注释
%%
" 0x " { return HEX; } //处理16进制
" CODE SEG " { return CODEBEG; }
" CODE END " { return CODEEND; }
" DATA SEG " { return DATABEG; }
" DATA END " { return DATAEND; }
" lw " { return LW; }
" sw " { return SW; }
" add " { return ADD; }
" addi " { return ADDI; }
" sub " { return SUB; }
" subi " { return SUBI; }
" mult " { return MULT; }
" multi " { return MULTI; }
" swi " { return SWI; }
" jmp " { return JMP; }
" slt " { return SLT; }
" beq " { return BEQ; }
" bne " { return BNE; }
" swp " { return SWP; }
[ 0 - 9 ] + {
yylval.ival = atoi(yytext);
return NUMBER;
}
[a - zA - Z_][ 0 - 9a - zA - Z_] * {
strcpy(yylval.imm,yytext);
return ID;
}
" ( " { return ' ( ' ; }
" ) " { return ' ) ' ; }
" : " { return ' : ' ; }
" , " { return ' , ' ; }
" $ " { return ' $ ' ; }
" - " { return ' - ' ; }
[ /t] | [/n] ;
" # " { BEGIN cc; }
< cc > (.) * { ; }
< cc > " " { BEGIN 0 ; }
%%
int yywrap( void )
{
return 1 ;
}
#include " myp.tab.h "
extern " C " {
int yywrap( void );
int yylex( void );
}
% }
% x cc //用来处理注释
%%
" 0x " { return HEX; } //处理16进制
" CODE SEG " { return CODEBEG; }
" CODE END " { return CODEEND; }
" DATA SEG " { return DATABEG; }
" DATA END " { return DATAEND; }
" lw " { return LW; }
" sw " { return SW; }
" add " { return ADD; }
" addi " { return ADDI; }
" sub " { return SUB; }
" subi " { return SUBI; }
" mult " { return MULT; }
" multi " { return MULTI; }
" swi " { return SWI; }
" jmp " { return JMP; }
" slt " { return SLT; }
" beq " { return BEQ; }
" bne " { return BNE; }
" swp " { return SWP; }
[ 0 - 9 ] + {
yylval.ival = atoi(yytext);
return NUMBER;
}
[a - zA - Z_][ 0 - 9a - zA - Z_] * {
strcpy(yylval.imm,yytext);
return ID;
}
" ( " { return ' ( ' ; }
" ) " { return ' ) ' ; }
" : " { return ' : ' ; }
" , " { return ' , ' ; }
" $ " { return ' $ ' ; }
" - " { return ' - ' ; }
[ /t] | [/n] ;
" # " { BEGIN cc; }
< cc > (.) * { ; }
< cc > " " { BEGIN 0 ; }
%%
int yywrap( void )
{
return 1 ;
}
呵呵,下面是Yacc文件
%
{
#include < iostream >
#include < string >
#include < fstream >
#include " src/func.h "
// #include "inc/func.cpp"
using namespace std;
extern " C " {
void yyerror( const char * s);
extern int yylex( void );
}
// extern int yylex();
// mylexer lexer;
char bin[MAX][ 33 ]; //用来记录生成的01字符串结果
int binNum = 0 ; //记录指令条数
int data[MAX]; //记录数据段数据
int dataNum = 0 ; //数据段数目
int curPC = 0 ; //当前处理的PC值
int dataInd = 0 ; //
char pindex[max][ 33 ]; //标签的名称
int indPo[max]; //用来记录标签对应的位置,这里是PC值,不过因为PC值和当前的指令位置有4倍关系
int indexNum = 0 ; //记录标签的数目
#include < iostream >
#include < string >
#include < fstream >
#include " src/func.h "
// #include "inc/func.cpp"
using namespace std;
extern " C " {
void yyerror( const char * s);
extern int yylex( void );
}
// extern int yylex();
// mylexer lexer;
char bin[MAX][ 33 ]; //用来记录生成的01字符串结果
int binNum = 0 ; //记录指令条数
int data[MAX]; //记录数据段数据
int dataNum = 0 ; //数据段数目
int curPC = 0 ; //当前处理的PC值
int dataInd = 0 ; //
char pindex[max][ 33 ]; //标签的名称
int indPo[max]; //用来记录标签对应的位置,这里是PC值,不过因为PC值和当前的指令位置有4倍关系
int indexNum = 0 ; //记录标签的数目
//条件转移时
char backId[max][ 33 ]; //需要回填的标签
int backPo[max]; //要回填的标签所在的指令位置
int backPC[max]; //要回填的标签所在的PC值
int backNum = 0 ; //要回填的标签数目
char backId[max][ 33 ]; //需要回填的标签
int backPo[max]; //要回填的标签所在的指令位置
int backPC[max]; //要回填的标签所在的PC值
int backNum = 0 ; //要回填的标签数目
//非条件转移,因为只是为了简单的实现InsertSort函数,所以现在的指令系统中没有非条件转移指令
//不过设计时加上了
char
jbackId[max][
33
]; //要回填的JMP标签int jbackPo[max]; //要回填的JMP标签位置
int jbackNum = 0 ; //回填的JMP数目
% }
% union {
char imm[ 33 ];
char opName[ 6 ];
int ival;
}
% start PROGRAM
% token ADDI ADD SUB SUBI LW SWI SW MULT MULTI DATABEG DATAEND CODEBEG CODEEND BEQ BNE SLT JMP SWP HEX
% token < imm > ID
% type < opName > I_OP R_OP J_OP
% token < ival > NUMBER
% type < ival > IMM
%%
PROGRAM: {
// cout<<"world"<<endl;
}DATASEG CODESEG{
fillback();
// cout<<"fillback ok"<<endl;
int i;
for (i = 0 ;i < binNum;i ++ )
cout << bin[i] << endl;
}
;
DATASEG: DATABEG ' : ' DATA DATAEND{
int i = 0 ;
char temp[ 33 ];
for (i = 0 ;i < dataNum; i ++ ){
int2bin32(data[i],temp);
strcpy(bin[i + 2 ],temp);
}
binNum = dataNum + 2 ;
// printf("data ok ");
}
;
DATA: IMM {data[dataNum ++ ] = $ 1 ;} DATA
|
;
CODESEG:CODEBEG ' : ' CODE CODEEND{
// cout<<"CODESEG OK"<<endl;
}
;
CODE: STATE CODE
|
;
STATE: ID {
strcpy(pindex[indexNum],$ 1 );
indPo[indexNum ++ ] = curPC;
} ' : ' COMMAND
{curPC += 4 ;binNum ++ ;}
| COMMAND {curPC += 4 ;binNum ++ ;}
;
COMMAND:I_COM
| R_COM
| J_COM
;
I_COM:I_OP ' $ ' NUMBER ' , ' IMM ' ( ' ' $ ' NUMBER ' ) '
{
char temp[ 17 ];
strcat(bin[binNum],$ 1 );
int2bin5($ 3 ,temp);
strcat(bin[binNum],temp);
int2bin5($ 8 ,temp);
strcat(bin[binNum],temp);
int2bin16($ 5 ,temp);
strcat(bin[binNum],temp);
}
| I_OP ' $ ' NUMBER ' , ' ' $ ' NUMBER ' , ' IMM
{
char temp[ 17 ];
strcat(bin[binNum], $ 1 );
int2bin5($ 3 ,temp);
strcat(bin[binNum],temp);
int2bin5($ 6 ,temp);
strcat(bin[binNum],temp);
int2bin16($ 8 , temp);
strcat(bin[binNum],temp);
}
| I_OP ' $ ' NUMBER ' , ' ' $ ' NUMBER ' , ' ID
{
char temp[ 17 ];
strcpy(bin[binNum],$ 1 );
int2bin5($ 3 ,temp);
strcat(bin[binNum],temp);
int2bin5($ 6 ,temp);
strcat(bin[binNum],temp);
strcpy(backId[backNum],$ 8 );
backPC[backNum] = curPC;
backPo[backNum ++ ] = binNum;
}
| I_OP ' $ ' NUMBER ' , ' ' $ ' NUMBER
{
// cout<<"swp"<<endl;
char temp[ 17 ];
strcpy(bin[binNum],$ 1 );
int2bin5($ 3 ,temp);
strcat(bin[binNum], temp);
int2bin5($ 6 ,temp);
strcat(bin[binNum],temp);
strcat(bin[binNum], " 0000000000000000 " );
}
;
R_COM: R_OP ' $ ' NUMBER ' , ' ' $ ' NUMBER ' , ' ' $ ' NUMBER
{
char temp[ 6 ];
strcpy(bin[binNum],$ 1 );
int2bin5($ 3 ,temp);
strcat(bin[binNum],temp);
int2bin5($ 6 ,temp);
strcat(bin[binNum],temp);
int2bin5($ 9 ,temp);
strcat(bin[binNum],temp);
strcat(bin[binNum], " 00000000000 " );
}
;
J_COM: J_OP ID{
strcpy(bin[binNum],$ 1 );
strcpy(jbackId[jbackNum],$ 2 );
jbackPo[jbackNum] = binNum;
}
| J_OP NUMBER{
strcpy(bin[binNum],$ 1 );
char temp[ 27 ];
if ( ! int2bin26($ 2 ,temp))
cout << " J_op Number:failed " << endl;
strcat(bin[binNum],temp);
}
;
I_OP: ADDI { int2bin6(opAddi,$$);}
| SUBI { int2bin6(opSubi,$$);}
| MULTI { int2bin6(opMulti,$$);}
| LW { int2bin6(opLw,$$);}
| SW { int2bin6(opSw,$$);}
| BEQ { int2bin6(opBeq,$$);}
| BNE { int2bin6(opBne,$$);}
| SWP { int2bin6(opSwp,$$);}
;
R_OP: ADD { int2bin6(opAdd,$$);}
| SUB { int2bin6(opSub,$$);}
| MULT { int2bin6(opMult,$$);}
| SLT { int2bin6(opSlt,$$);}
;
J_OP: JMP { int2bin6(opJmp,$$);}
| SWI { int2bin6(opSwi,$$);}
;
IMM: NUMBER { $$ = $ 1 ;}
| HEX NUMBER { $$ = hex2dec($ 2 ); }
| ' - ' NUMBER { $$ = 0 - $ 2 ;}
;
%%
void yyerror( const char * s)
{
extern int yylineno;
cout << " error: " << s << " : line " << yylineno << endl;
}
int main( int args, char * argv[])
{
/* parse parser;
if(args<=2)
cout<<"no file indicated"<<endl;
ifstream *fin = new ifstream(argv[1]);
if(!fin){
cout<<"Can't open file: "<<argv[1]<<endl;
return 1;
}
lexer.yyin = fin;
int n = 1;
if(parser.yycreate(&lexer)){
if(lexer.yycreate(&parser)){
n = parser.yyparser();
}
}
*/
// printf("hello ");
yyparse();
return 0 ;
}
然后是func.cpp和func.h文件,两个文件放在src子目录里面
func.h
































func.cpp

#include<iostream>
using namespace std;
#define MAX 1000
#define max 100
bool int2bin5(int a,char *b) //regNum 转化寄存器号成5位01字符串
{
int i = 0;
unsigned int base = 16;
for(i = 0;i< 5;i++){
b[i]=( a&base ?'1':'0');
base /=2;
}
b[i] = '/0';
return true;
}
bool int2bin6(int a,char *b) //opCode,操作码 占六位
{
int i = 0;
unsigned int base = 32;
for(i = 0;i< 6;i++){
b[i] = (a&base?'1':'0');
base/=2;
}
b[i] = '/0';
return true;
}
bool int2bin26(int a,char *b) //J型指令的立即数转换
{
int i = 0;
unsigned int base;
bool int2bin26(int a,char *b) //J型指令的立即数转换
{
int i = 0;
unsigned int base;
base = (1<<25);
if(a>0&&(a&base))
return false;
if(a<0&&!(a&base))
return false;
for(i = 0;i<26;i++){
b[i] = (a&base?'1':'0');
base >>=1;
}
b[i] = '/0';
return true;
}
bool int2bin16(int a,char *b) //I型指令的立即数转换
{
int i = 0;
unsigned int base = (1<<15);
if(a>0&&(a&base))
return false;
if(a<0&&!(a&base))
return false;
for(i = 0;i< 16;i++){
b[i] = (a&base?'1':'0');
base>>=1;
}
b[i] = '/0';
return false;
}
bool int2bin32(int a,char *b) //数据段的数据转换
{
// cout<<"int2bin32:"<<a<<endl;
int i = 0;
unsigned int base = (1<<31);
if(a>0&&(a&base))
return false;
if(a<0&&!(a&base))
return false;
for(i = 0;i< 32;i++){
b[i] = (a&base?'1':'0');
// cout<<base<<endl;
base>>=1;
}
b[i] = '/0';
//cout<<b<<endl;
return false;
}
if(a>0&&(a&base))
return false;
if(a<0&&!(a&base))
return false;
for(i = 0;i<26;i++){
b[i] = (a&base?'1':'0');
base >>=1;
}
b[i] = '/0';
return true;
}
bool int2bin16(int a,char *b) //I型指令的立即数转换
{
int i = 0;
unsigned int base = (1<<15);
if(a>0&&(a&base))
return false;
if(a<0&&!(a&base))
return false;
for(i = 0;i< 16;i++){
b[i] = (a&base?'1':'0');
base>>=1;
}
b[i] = '/0';
return false;
}
bool int2bin32(int a,char *b) //数据段的数据转换
{
// cout<<"int2bin32:"<<a<<endl;
int i = 0;
unsigned int base = (1<<31);
if(a>0&&(a&base))
return false;
if(a<0&&!(a&base))
return false;
for(i = 0;i< 32;i++){
b[i] = (a&base?'1':'0');
// cout<<base<<endl;
base>>=1;
}
b[i] = '/0';
//cout<<b<<endl;
return false;
}
bool fillback() //回填
{
extern char pindex[max][33],bin[MAX][33],
backId[max][33],jbackId[max][33];
extern int backPo[max], backNum,backPC[max];
extern int jbackPo[max],jbackNum;
extern int indPo[max],indexNum;
extern int dataNum,binNum;
//cout<<"fillback begins"<<endl;
int i = 0,j = 0, offset;
char temp[33];
//cout<<"back fill Num:"<<backNum<<" and indexNum is "<<indexNum<<endl;
for(i = 0;i < backNum;i++){
//cout<<"This time "<<backId[i]<<endl;
for(j = 0;j < indexNum;j++){
if(strcmp(backId[i],pindex[j])==0)
break;
}
if(j == indexNum){
cout<<backId[i]<<" not found"<<endl;
return false;
}
offset = (indPo[j]-backPC[i])/4;
int2bin16(offset,temp);
strcat(bin[backPo[i]],temp);
//cout<<i<<"hello"<<endl;
}
//cout<<"backfill ok"<<endl;
//非条件转移
for(i = 0;i< jbackNum;i++){
for(j = 0;j< indexNum;j++){
if(strcmp(jbackId[i],pindex[j])==0)
break;
}
if(j == indexNum){
cout<<jbackId[i]<<"not found"<<endl;
return false;
}
int2bin26(indPo[j],temp);
strcat(bin[jbackPo[i]],temp);
}
for(i = 0;i< jbackNum;i++){
for(j = 0;j< indexNum;j++){
if(strcmp(jbackId[i],pindex[j])==0)
break;
}
if(j == indexNum){
cout<<jbackId[i]<<"not found"<<endl;
return false;
}
int2bin26(indPo[j],temp);
strcat(bin[jbackPo[i]],temp);
}
//完成程序开头的设置,第一个为指令数目,第二个是起始地址
// cout<<"jback ok"<<endl;
int2bin32(binNum -2,temp);
strcpy(bin[0],temp);
int2bin32(0x200 + dataNum*4,temp);
strcpy(bin[1],temp);
return true;
}
// cout<<"jback ok"<<endl;
int2bin32(binNum -2,temp);
strcpy(bin[0],temp);
int2bin32(0x200 + dataNum*4,temp);
strcpy(bin[1],temp);
return true;
}
int hex2dec(int a) //16进制处理
{
int res = 0;
int base = 1;
while(a!=0){
res += (a%10)*base;
base *= 16;
a /= 10;
}
// cout<<res<<endl;
return res;
}
{
int res = 0;
int base = 1;
while(a!=0){
res += (a%10)*base;
base *= 16;
a /= 10;
}
// cout<<res<<endl;
return res;
}
接着就是Makefile了
LEX
=
flex
YACC = bison
CPP = g ++
CC = gcc
all:myasm
myasm:lex.yy.o myp.tab.o func.o
$(CPP) lex.yy.o myp.tab.o func.o - o myasm
func.o:src / func.cpp src / func.h
$(CPP) - c src / func.cpp - o func.o
lex.yy.o:lex.yy.c myp.tab.h
$(CPP) - c lex.yy.c
myp.tab.o:myp.tab.c
$(CPP) - c myp.tab.c
myp.tab.c myp.tab.h:myp.y
$(YACC) - d myp.y
lex.yy.c:myassm.l myp.tab.h
$(LEX) myassm.l
clean:
rm - f * .o * .c * .h
rm - f * .bin * .txt
YACC = bison
CPP = g ++
CC = gcc
all:myasm
myasm:lex.yy.o myp.tab.o func.o
$(CPP) lex.yy.o myp.tab.o func.o - o myasm
func.o:src / func.cpp src / func.h
$(CPP) - c src / func.cpp - o func.o
lex.yy.o:lex.yy.c myp.tab.h
$(CPP) - c lex.yy.c
myp.tab.o:myp.tab.c
$(CPP) - c myp.tab.c
myp.tab.c myp.tab.h:myp.y
$(YACC) - d myp.y
lex.yy.c:myassm.l myp.tab.h
$(LEX) myassm.l
clean:
rm - f * .o * .c * .h
rm - f * .bin * .txt
嘿嘿,这就是整个代码,在debian 3.1r5,gcc g++版本为4.1.2,flex,bison 环境下运行很正常.