#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <locale.h> // 添加locale头文件支持中文
typedef enum {
TOKEN_EOF,
TOKEN_ID,
TOKEN_INT,
TOKEN_ASSIGN,
TOKEN_IF,
TOKEN_THEN,
TOKEN_ELSE,
TOKEN_WHILE,
TOKEN_DO,
TOKEN_BEGIN,
TOKEN_END,
TOKEN_PROGRAM,
TOKEN_PERIOD,
TOKEN_EQ,
TOKEN_NE,
TOKEN_LT,
TOKEN_GT,
TOKEN_LE,
TOKEN_GE,
TOKEN_PLUS,
TOKEN_MINUS,
TOKEN_MUL,
TOKEN_DIV,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_SEMICOLON
} TokenType;
typedef struct {
TokenType type;
char lexeme[32];
int line;
int column;
} Token;
char *keywords[] = {"program", "begin", "end", "if", "then", "else", "while", "do"};
TokenType keyword_types[] = {TOKEN_PROGRAM, TOKEN_BEGIN, TOKEN_END,
TOKEN_IF, TOKEN_THEN, TOKEN_ELSE, TOKEN_WHILE, TOKEN_DO};
Token current_token;
FILE *input;
int ch;
int current_line = 1;
int current_column = 0;
void next_char() {
ch = fgetc(input);
current_column++;
if (ch == '\n') {
current_line++;
current_column = 0;
}
}
void skip_whitespace() {
while (isspace(ch)) next_char();
}
void read_identifier_or_keyword(Token *token) {
int i = 0;
token->line = current_line;
token->column = current_column;
while (isalnum(ch) || ch == '_') {
token->lexeme[i++] = ch;
next_char();
}
token->lexeme[i] = '\0';
for (int j = 0; j < sizeof(keywords)/sizeof(keywords[0]); j++) {
if (strcmp(token->lexeme, keywords[j]) == 0) {
token->type = keyword_types[j];
return;
}
}
token->type = TOKEN_ID;
}
void read_number(Token *token) {
int i = 0;
token->line = current_line;
token->column = current_column;
while (isdigit(ch)) {
token->lexeme[i++] = ch;
next_char();
}
token->lexeme[i] = '\0';
token->type = TOKEN_INT;
}
void get_next_token(Token *token) {
skip_whitespace();
token->line = current_line;
token->column = current_column;
switch (ch) {
case EOF:
token->type = TOKEN_EOF;
strcpy(token->lexeme, "EOF");
break;
case ':':
next_char();
if (ch == '=') {
token->type = TOKEN_ASSIGN;
strcpy(token->lexeme, ":=");
next_char();
} else {
token->type = TOKEN_ASSIGN;
strcpy(token->lexeme, ":");
}
break;
case '=':
next_char();
if (ch == '=') {
token->type = TOKEN_EQ;
strcpy(token->lexeme, "==");
next_char();
} else {
token->type = TOKEN_ASSIGN;
strcpy(token->lexeme, "=");
}
break;
case '!':
next_char();
if (ch == '=') {
token->type = TOKEN_NE;
strcpy(token->lexeme, "!=");
next_char();
} else {
token->type = TOKEN_NE;
strcpy(token->lexeme, "!");
}
break;
case '<':
next_char();
if (ch == '=') {
token->type = TOKEN_LE;
strcpy(token->lexeme, "<=");
next_char();
} else {
token->type = TOKEN_LT;
strcpy(token->lexeme, "<");
}
break;
case '>':
next_char();
if (ch == '=') {
token->type = TOKEN_GE;
strcpy(token->lexeme, ">=");
next_char();
} else {
token->type = TOKEN_GT;
strcpy(token->lexeme, ">");
}
break;
case '+':
token->type = TOKEN_PLUS;
strcpy(token->lexeme, "+");
next_char();
break;
case '-':
token->type = TOKEN_MINUS;
strcpy(token->lexeme, "-");
next_char();
break;
case '*':
token->type = TOKEN_MUL;
strcpy(token->lexeme, "*");
next_char();
break;
case '/':
token->type = TOKEN_DIV;
strcpy(token->lexeme, "/");
next_char();
break;
case '(':
token->type = TOKEN_LPAREN;
strcpy(token->lexeme, "(");
next_char();
break;
case ')':
token->type = TOKEN_RPAREN;
strcpy(token->lexeme, ")");
next_char();
break;
case ';':
token->type = TOKEN_SEMICOLON;
strcpy(token->lexeme, ";");
next_char();
break;
case '.':
token->type = TOKEN_PERIOD;
strcpy(token->lexeme, ".");
next_char();
break;
default:
if (isalpha(ch) || ch == '_') {
read_identifier_or_keyword(token);
} else if (isdigit(ch)) {
read_number(token);
} else {
token->line = current_line;
token->column = current_column;
sprintf(token->lexeme, "非法字符 '%c'", ch);
token->type = TOKEN_EOF;
next_char();
}
}
}
void parse_program();
void parse_statement();
void parse_assignment();
void parse_if_stmt();
void parse_while_stmt();
void parse_compound_stmt();
void parse_expression();
void parse_logical_expr();
void parse_term();
void parse_factor();
void match(TokenType expected_type) {
if (current_token.type == expected_type) {
get_next_token(¤t_token);
} else {
// 中文错误信息
printf("语法错误 [%d:%d] 预期: ", current_token.line, current_token.column);
switch(expected_type) {
case TOKEN_PROGRAM: printf("'program'"); break;
case TOKEN_BEGIN: printf("'begin'"); break;
case TOKEN_END: printf("'end'"); break;
case TOKEN_IF: printf("'if'"); break;
case TOKEN_THEN: printf("'then'"); break;
case TOKEN_ELSE: printf("'else'"); break;
case TOKEN_WHILE: printf("'while'"); break;
case TOKEN_DO: printf("'do'"); break;
case TOKEN_ID: printf("标识符"); break;
case TOKEN_INT: printf("整数"); break;
case TOKEN_ASSIGN: printf("':='"); break;
case TOKEN_EQ: printf("'=='"); break;
case TOKEN_NE: printf("'!='"); break;
case TOKEN_LT: printf("'<'"); break;
case TOKEN_GT: printf("'>'"); break;
case TOKEN_LE: printf("'<='"); break;
case TOKEN_GE: printf("'>='"); break;
case TOKEN_PLUS: printf("'+'"); break;
case TOKEN_MINUS: printf("'-'"); break;
case TOKEN_MUL: printf("'*'"); break;
case TOKEN_DIV: printf("'/'"); break;
case TOKEN_LPAREN: printf("'('"); break;
case TOKEN_RPAREN: printf("')'"); break;
case TOKEN_SEMICOLON: printf("';'"); break;
case TOKEN_PERIOD: printf("'.'"); break;
default: printf("token %d", expected_type);
}
printf(", 实际: '%s'\n", current_token.lexeme);
exit(1);
}
}
void parse_program() {
match(TOKEN_PROGRAM);
match(TOKEN_ID);
match(TOKEN_SEMICOLON);
parse_compound_stmt();
match(TOKEN_PERIOD);
if (current_token.type != TOKEN_EOF) {
printf("语法错误 [%d:%d] 预期结束符, 实际: '%s'\n",
current_token.line, current_token.column, current_token.lexeme);
exit(1);
}
printf("该程序是正确的。\n");
}
void parse_compound_stmt() {
match(TOKEN_BEGIN);
parse_statement();
while (current_token.type == TOKEN_SEMICOLON) {
match(TOKEN_SEMICOLON);
parse_statement();
}
match(TOKEN_END);
}
void parse_statement() {
switch (current_token.type) {
case TOKEN_ID:
parse_assignment();
break;
case TOKEN_IF:
parse_if_stmt();
break;
case TOKEN_WHILE:
parse_while_stmt();
break;
case TOKEN_BEGIN:
parse_compound_stmt();
break;
default:
printf("语法错误 [%d:%d] 非法的语句开始: '%s'\n",
current_token.line, current_token.column, current_token.lexeme);
exit(1);
}
}
void parse_assignment() {
match(TOKEN_ID);
match(TOKEN_ASSIGN);
parse_expression();
}
void parse_if_stmt() {
match(TOKEN_IF);
parse_logical_expr();
match(TOKEN_THEN);
parse_statement();
if (current_token.type == TOKEN_ELSE) {
match(TOKEN_ELSE);
parse_statement();
}
}
void parse_while_stmt() {
match(TOKEN_WHILE);
parse_logical_expr();
match(TOKEN_DO);
parse_statement();
}
void parse_logical_expr() {
parse_expression();
switch (current_token.type) {
case TOKEN_EQ:
case TOKEN_NE:
case TOKEN_LT:
case TOKEN_GT:
case TOKEN_LE:
case TOKEN_GE:
match(current_token.type);
parse_expression();
break;
default:
printf("语法错误 [%d:%d] 预期关系运算符, 实际: '%s'\n",
current_token.line, current_token.column, current_token.lexeme);
exit(1);
}
}
void parse_expression() {
parse_term();
while (current_token.type == TOKEN_PLUS || current_token.type == TOKEN_MINUS) {
match(current_token.type);
parse_term();
}
}
void parse_term() {
parse_factor();
while (current_token.type == TOKEN_MUL || current_token.type == TOKEN_DIV) {
match(current_token.type);
parse_factor();
}
}
void parse_factor() {
switch (current_token.type) {
case TOKEN_INT:
match(TOKEN_INT);
break;
case TOKEN_ID:
match(TOKEN_ID);
break;
case TOKEN_LPAREN:
match(TOKEN_LPAREN);
parse_expression();
match(TOKEN_RPAREN);
break;
default:
printf("语法错误 [%d:%d] 非法的表达式因子: '%s'\n",
current_token.line, current_token.column, current_token.lexeme);
exit(1);
}
}
int main(int argc, char *argv[]) {
// 设置本地化环境支持中文
setlocale(LC_ALL, "zh_CN.UTF-8");
if (argc < 2) {
printf("用法: %s <文件名>\n", argv[0]);
return 1;
}
input = fopen(argv[1], "r");
if (!input) {
perror("打开文件失败");
return 1;
}
// 初始化词法分析器
next_char();
get_next_token(¤t_token);
// 开始语法分析
parse_program();
fclose(input);
return 0;
}