编译原理实验
词法分析器
//实验一
#include<iostream>
#include<fstream>
#include<string>
#include<algorithm>
#include<map>
#include<vector>
#include<iterator>
#include<regex>
using namespace std;
string KeyWord[11] = { "begin","end","if","then","else","for","while","do","and","or","not" }; //关键字
string Operator[13] = { "+","-","*","/",">","<","=",":=",">=","<=","<>","++","--" };//操作符
string Delimiter[3] = { "(",")",";" }; //分割符
string L; //全局变量,保存CP语言字符串
map<string, int> T; //全局映射,保存符号表
vector<pair<string, int>> P;//全局向量,保存词法分析结果
void CreateSymbolTable() { //定义符号表
int i, count;
count = 0;
for (i = 0; i < 11; ++i) { //每一个关键词一个编号
T[KeyWord[i]] = count++;
}
for (i = 0; i < 13; ++i) { //每一个操作符一个编码
T[Operator[i]] = count++;
}
for (i = 0; i < 3; ++i) { //每一个分割符一个编码
T[Delimiter[i]] = count++;
}
T["ID"] = count++; //标识符ID同一个编码
T["NUM"] = count++; //无符号整数NUM同一个编码
}
void ShowSymbolTable() { //显示符号表
map<string, int>::iterator it;
vector<pair<string, int>> v;
for (it = T.begin(); it != T.end(); it++) { //将映射转换为向量,方便排序
v.push_back(pair<string, int>(it->first, it->second));
}
sort(v.begin(), v.end(), [](pair<string, int> a, pair<string, int> b) { return a.second < b.second; }); //符号表排序,便于查看
int i;
cout << "————————符号表——————————"<<endl; //显示CP语言的符号表
for (i = 0; i < int(v.size()); ++i) {
cout << v[i].first << "\t" << v[i].second<<"\t";
if ((i + 1) % 3 == 0)cout << endl;
}
cout << endl << endl;
}
void GetLanguage() { //读取CP语言字符串
char FileName[256];
ifstream fin;
cout << "请输入文件名和后缀:" << endl; //从文件中读取程序
while (1) {
cin >> FileName;
fin.open(FileName,ios::out);
if (!fin) {
cout << "未找到文件,请重新输入!" << endl;
}
else break;
}
L= string((istreambuf_iterator<char>(fin)), istreambuf_iterator<char>());
fin.close();
cout << endl << L << endl<< endl;
}
void RemoveComments() { //去掉程序中的注释
regex r("#.*\n");
L = regex_replace(L, r, "\n"); //正则匹配替换
}
bool IsOperatorChar(char c) { //是否是操作符字符
int i;
for (i = 0; i < 13; ++i) {
if (c == Operator[i][0]) return true;
}
return false;
}
bool IsDelimiter(char c) { //是否是分隔符字符
return c == '(' || c == ')' || c == ';';
}
bool IsDigit(char c) { //是否是数字
return (c >= '0') && (c <= '9');
}
bool IsDownLine(char c) { //是否是下划线
return c == '_';
}
bool IsLetter(char c) { //是否是字母
return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
}
bool IsBlank(char c) { //是否空白符
return c == ' ' || c == '\t' or c == '\n';
}
bool IsParticiple(char c) { //是否是空白符,分隔符或者操作符,这些字符将关键字,标识符ID,无符号整数分开
return IsBlank(c) || IsDelimiter(c) || IsOperatorChar(c);
}
bool IsKeyWord(string s) { //字符串是否是关键词
int i;
for (i = 0; i < 11; ++i) {
if (s == KeyWord[i]) return true;
}
return false;
}
bool IsOperator(string s) { //字符串是否是操作符
int i;
for (i = 0; i < 13; ++i) {
if (s == Operator[i])return true;
}
return false;
}
bool IsNum(string s) { //字符串是否是无符号整数
regex num("\\d+");
return regex_match(s,num);
}
bool IsID(string s) { //字符串是否是标识符
regex ID("[a-zA-Z_]\\w*");
return regex_match(s,ID);
}
void LexicalAnalysis() { //语法分析函数
int i = 0;
int flag = 1;
int line = 1; //保存行数
int l = L.length();
string s; //保存当前字符串
P.clear(); //清除向量
while (i < l) {
if(IsBlank(L[i])) { //遇到空白符则跳过
if (L[i] == '\n')line++; //换行符,行数line+1
i++;
}
else if(IsDelimiter(L[i])) { //分隔符,只需将分隔符及其编号保存
s += L[i];
i++;
P.push_back(make_pair(s, T[s]));
}
else if (IsOperatorChar(L[i])) { //操作符字符开头,可能为操作符
s += L[i];
i++;
if (IsOperatorChar(L[i])&&i<l) { //可能是由2个操作符字符组成
s += L[i];
i++;
}
if (IsOperator(s)) { //如果是操作符,则保存
P.push_back(make_pair(s, T[s]));
} //否则输出错误
else flag = 0;
}
else if (IsDigit(L[i])) { //数字开头,可能为无符号整数
s += L[i];
i++;
while (!IsParticiple(L[i]) && i < l) { //当后跟字符为不是空白符,分隔符,操作符字符时候,字符串继续
s += L[i];
i++;
}
if (IsNum(s))P.push_back(make_pair(s, T["NUM"]));
else flag = 0;
}
else if (IsLetter(L[i]) || IsDownLine(L[i])) { //字母或者下划线开头,可能为标识符ID或者关键字
s += L[i];
i++;
while (!IsParticiple(L[i]) && i < l) { //当后跟字符为不是空白符,分隔符,操作符字符时候,字符串继续
s += L[i];
i++;
}
if (IsKeyWord(s))P.push_back(make_pair(s, T[s])); //s是关键词,在向量中push关键词和其对应的编号
else if (IsID(s)) P.push_back(make_pair(s, T["ID"])); //s是标识符ID,同样push进入向量
else flag = 0;
}
else { //其他字符,未定义报错
while (!IsParticiple(L[i]) && i < l) { //当后跟字符为不是空白符,分隔符,操作符字符时候,字符串继续
s += L[i];
i++;
}
flag = 0;
}
if (!flag) {
P.push_back(make_pair(s, -1));
cout << "line " << line << ":未定义符号" << '"' << s << '"' << endl;
flag = 1;
}
s = ""; //清空s;
}
}
void Result() { //显示词法分析结果
int i;
cout << "————————词法分析结果——————————"<<endl;
for (i = 0; i < int(P.size()); ++i) {
cout << "<" << P[i].first << "," << P[i].second << ">" << endl;
}
cout << endl;
}
void Save() { //保存词法分析结果
ofstream fout;
char FileName[256];
cout << "请输入结果保存文件名字" << endl;
cin >> FileName;
fout.open(FileName,ios::out);
for_each(P.begin(), P.end(), [&fout](pair<string, int> x) {fout << "<" << x.first << "," << x.second << ">" << endl; }); //for_each算法输出结果
cout << "结果已经保存"<< endl << endl;
fout.close();
}
int main() {
CreateSymbolTable();
while (1) {
ShowSymbolTable();
GetLanguage();
RemoveComments();
LexicalAnalysis();
Result();
Save();
}
return 0;
}
测试文件
while(a==b)begin
a:=a+1;#zhushi
b:=b-1;
c=c*d;
d=c/d;
if(a>b) then c=C else C=c;
end
for(int i=1;i<=10;i++) begin
a_b++;#zszszszszs
b_C--;#zszszszszs
B123:=1234567;
a=@;
123a=0;
a.b;
end
int i;
int a=10;
int b=5;
for(i=0;i<a;++i)
begin
if(b<a)b=b+i;
else b=b-i;
end
结果示例
LL(1)文法判断及分析
#include<iostream>
#include<vector>
#include<map>
#include<set>
#include<fstream>
#include<string>
#include<regex>
#include<stack>
#include<iomanip>
using namespace std;
vector<string> N; //非终结符号
vector<string> T; //终结符号
map<string, vector<string>> E; //产生式
map<string, set<string>> FirstN;//非终结符号First集合
map<string, set<string>> FirstE;//产生式First集合
map<string, set<string>> Follow;//非终结符号Follow集合
map<string, map<string, set<string>>> Select;//产生式Select集合
map<string, map<string, string>> Table; //保存预测分析表
map<string, bool> Record; //记录一个字符串能否推导出空字符
string p = "’"; //构造新终结符号后跟字符
string q = "ε"; //空字符
string d = "→"; //产生式分割字符
string o = "|"; //产生式间隔字符
bool DerivationEmptyN(string N); //从一个非终结符号开始能否推导出空字符
bool DerivationEmptyE(string e); //从一个字符串开始能否推导出空字符
set<string> GetFirstSetN(string n, int flag); //获得一个非终结字符的First集
set<string> GetFirstSetE(string e, int flag); //获得一个字符串的First集
//运算符重载,方便输出产生式
ostream& operator <<(ostream& os, map<string, vector<string>>& m) {
unsigned i;
vector<string> v;
for (i = 0; i < N.size(); ++i) {
string n = N[i];
v = E[n];
for (unsigned i = 0; i < v.size(); ++i) {
cout << n + "->" + v[i] << endl;
}
}
os << endl;
return os;
}
//输出First集和Follow集合
ostream& operator <<(ostream& os, map<string, set<string>>& m) {
map<string, set<string>>::iterator it;
for (it = m.begin(); it != m.end(); ++it) {
string s = it->first;
os << s << "\t";
set<string> t = it->second;
set<string>::iterator it1;
for (it1 = t.begin(); it1 != t.end(); ++it1) {
os << *it1 << " ";
}
os << endl;
}
os << endl;
return os;
}
//输出Select集
ostream& operator <<(ostream& os, map<string,map<string, set<string>>>& m) {
map<string,map<string, set<string>>>::iterator it;
map<string, set<string>>::iterator it1;
set<string>::iterator it2;
for (it = m.begin(); it != m.end(); ++it) {
for (it1 = it->second.begin(); it1 != it->second.end(); ++it1) {
os << left<<setw(16)<<it->first +d+ it1->first;
for (it2 = it1->second.begin(); it2 != it1->second.end(); ++it2) {
os << *it2 << " ";
}
os << endl;
}
}
os << endl;
return os;
}
//输出预测分析表
ostream& operator <<(ostream& os, map<string, map<string,string>>& m) {
unsigned i, j;
cout << "\t";
for (j = 0; j < T.size(); ++j) cout<<left << setw(10)<<T[j];
cout << endl;
for (i = 0; i < N.size(); ++i) {
cout << N[i] << "\t";
for (j = 0; j < T.size(); ++j) {
if(Table[N[i]][T[j]]!=""&& Table[N[i]][T[j]] != "synch") cout <<left<<setw(10)<< N[i]+d+Table[N[i]][T[j]];
else cout << left << setw(10) << Table[N[i]][T[j]];
}
cout << endl<<endl;
}
return os;
}
//输出推导出空字符情况
ostream& operator <<(ostream& os, map<string, bool>& m) {
map<string, bool>::iterator it;
for (it = m.begin(); it != m.end(); ++it) {
os << it->first << "\t" << it->second << endl;
}
os << endl;
return os;
}
//获得新的非终结字符
string GetNewN(string n) {
while (1) { //遍历终结字符集合,如果没找到这个字符,则可以使用,否则继续在后面添加符号
if (find(N.begin(), N.end(), n) != N.end())n += p;
else break;
}
return n;
}
//将字符串分割为产生式并保存在向量中
void GetVector(string s,vector<string>& v) {
unsigned start, end; //取字符串开始,末尾位置
string t; //一条产生式
start = 0;
while ((end = s.find(o)) != string::npos) { //获取分隔产生式的字符位置
t = s.substr(start, end); //分隔出产生式
v.push_back(t); //产生式加入到向量中
s = s.substr(end + o.size(), s.size()); //字符串取后一部分
}
v.push_back(s); //最后的产生式
}
//获得产生式
void GetExpression() {
unsigned i;
regex m("\\s");
string s; //保存读取的产生式
string n; //非终结符号
vector<string> e; //表达式数组指针
ifstream fin;
char FileName[256];
cout << "请输入保存文法的文件:" << endl;
cin >> FileName;
fin.open(FileName, ios::out);
while (!fin) {
fin.open(FileName, ios::out);
cout << "文件未找到,请重新输入:" << endl;
cin >> FileName;
}
while (getline(fin, s))
{
i = 0;
s = regex_replace(s, m, ""); //正则匹配替换空格,换行,制表符
i = s.find(d); //查找分割字符出现的位置
n = s.substr(0, i); //分割出非终结符号
s = s.substr(i+2, s.length()); //分割出右部产生式
GetVector(s,e); //获得产生式向量
N.push_back(n); //保存非终结符号
E[n] =e; //保存非终结符号对应产生式
e.clear();
}
}
//获得两个字符串左部相同最长符号串和长度
int GetSameLength(string x,string y,string& r) {
string k = "";
unsigned i = 0;
unsigned lx = x.length();
unsigned ly = x.length();
unsigned l = lx < ly ? lx : ly; //较小的字符串长度
while (x[i] == y[i]&&i < lx) {
k += x[i];
i++;
}
r = k;
return i;
}
//提取公共左因子
void ExtractCommonLeftFactor() {
map<string, vector<string>>::iterator it; //迭代器
string LastN,NextN,NewN,s1,s2,s3;
vector<string> Lastv,Nextv;
unsigned i,j,k,n;
int m;
unsigned size;
int flag;
int* f = new int[100]; //标志数组,标识某一条产生式是否进行过左递归
int* l = new int[100]; //每一条产生式与后面产生式左边最长相同字符串的长度
int* index = new int[100]; //对上述长度排行后其对应下标,若无相同字符串,则为-1,提取左部公因子会跳过
string* s = new string[100]; //保留上述索引下对应左边最长相同字符串
for (it = E.begin(); it != E.end(); ++it) { //遍历文法
flag = 0;
LastN = it->first; //左边原非终结符号
Lastv = it->second; //右部原产生式集合
size = Lastv.size(); //产生式数量
memset(f, 0, sizeof(int) * size); //初始化数组
memset(l, 0, sizeof(int) * size);
memset(index, -1, sizeof(int) * size);
for (i = 0; i < size; ++i) {
for (j = i + 1; j < size; ++j) { //
if (Lastv[i][0] == Lastv[j][0]) { //如果第一个字符相同,则获得产生式左边最长的相同字符串及其长度
m = GetSameLength(Lastv[i], Lastv[j], s1); //m保存长度,s1保存字符串
if (m > l[i]) { //相同长度更长,跟新长度和字符串
l[i] = m;
s[i] = s1;
}
flag = 1; //能提取公因子
}
}
}
if (!flag) continue; //无需提取,则跳过
k = 0;
for (i = 0; i < size - 1; ++i) { //对左部相同长度排序,减少提取公因子步骤数
if (l[i] <= 0)continue;
m = l[i];
n = i;
for (j = i; j < size; ++j) {
if (l[j] > m) { //获取最大
m = l[j];
n = j;
}
}
l[n] = l[i]; //交换位置
l[i] = m;
index[k++] = n; //保存排序后索引
}
for (i = 0; i < size; ++i) { //提取左公因子
if (f[i])continue; //已经提取过
if (index[i] == -1) { //没有公因子的产生式
break;
}
NewN = GetNewN(LastN); //新的非终结符号
m=index[i]; //获取产生式在vector的位置
s1 = s[m]; //获取相同部分字符串
Nextv.push_back(s1 + NewN);
vector<string> Newv; //新非终结符号的产生式集合
vector<string> Oldv; //原非终结符号的产生式集合
for (j = m; j < size; ++j) { //往后遍历,将所有具有公因子的产生式找出来
if (f[j])continue; //已经提取过,则跳过
s2 = Lastv[j]; //s2,可能是待提取的产生式
k = s2.find(s1);
if (k == 0) { //是目标产生式
f[j] = 1; //标记为已经提取
s3 = s2.substr(s1.length(), s2.length()); //后半部分字符
if (s3 == "")Newv.push_back(q); //如果减后为空,则加入空字符
else Newv.push_back(s3);
}
else {
Nextv.push_back(s2);
}
}
N.push_back(NewN);
E[NewN] = Newv;
Newv.clear();
}
E[LastN] = Nextv;
Nextv.clear();
}
}
//消除直接左递归
void EliminateLeftRecursionDirect(string n, vector<string> v) {
unsigned i;
int flag;
unsigned idx,idy;
unsigned len = n.length();
string e;
unsigned size = v.size();
int* f=new int [size]; //标志数组,1代表左边有非终结符号,0代表没有
memset(f, 0, sizeof(int)*size); //初始化为0
flag = 0;
for (i = 0; i < size; ++i) {
e = v[i];
idx = e.find(n); //在右边产生式中寻找左边的非终结符号
if (idx == 0) {
idy = e.substr(n.length(), e.length()).find(p);//在后续字符中寻找构建新符号后跟符号
if (idy != 0) {
flag = 1; //需要进行直接左递归
f[i] = 1; //标记
}
}
}
if (flag) { //需要直接左递归
string NewN = GetNewN(n); //新的非终结符号
vector<string> Newv; //保存更新后产生式
vector<string> Nextv; //保存更新后产生式
for (i = 0; i < v.size(); ++i) {
e = v[i];
if (!f[i]) { //产生式不是以左边非终结符号开头,则加上新非终结符号进入原非终结符号的产生式集合
Nextv.push_back(v[i] + NewN);
}
else { //产生式是以左边非终结符号开头,将其去掉,后跟新非终结符号加入新非终结符号的产生式集合
Newv.push_back(v[i].replace(0, len, "") + NewN);
}
}
Newv.push_back(q); //加入空字符
N.push_back(NewN);
E[n] = Nextv;
E[NewN] = Newv;
}
else {
E[n] = v;
}
}
//消除左递归
void EliminateLeftRecursion(){
unsigned i,j,k,l;
unsigned idx, len;
string UseN,LastN, NextN,s1,s2,s;
vector<string> Usev ,Lastv, Nextv;
unsigned size = N.size();
for (i = 0; i < size; ++i) { //先消除一遍直接左递归
LastN = N[i];
Nextv = E[LastN];
EliminateLeftRecursionDirect(LastN, Nextv);
}
for (i = 1; i < size; ++i) { //消除间接左递归
if (N[i] == "B") {
cout << 1;
}
Nextv.clear();
LastN = N[i]; //被替换对应字符
Lastv = E[LastN]; //其对应产生式
copy(begin(Lastv), end(Lastv), back_inserter(Nextv));
for (j = 0; j < i; ++j) {
UseN = N[j]; //替换使用非终结字符
Usev = E[UseN]; //其对应产生式
for (k = 0; k < Lastv.size(); ++k) {
s = Lastv[k];
len = UseN.length();
idx = s.find(UseN); //产生式寻找替换使用字符
if (idx != 0)continue; //左边第一个不是替换使用字符,跳过
else {
s1 = s;
Nextv.erase(Nextv.begin() + k); //删除能被替换的产生式
}
for (l = 0; l < Usev.size(); ++l) { //替换产生式
s2 = Usev[l];
Nextv.push_back(s1.replace(idx, len, s2));
s1 = s;
}
}
Lastv = Nextv;
}
Nextv.erase(unique(Nextv.begin(), Nextv.end()), Nextv.end()); //去重
EliminateLeftRecursionDirect(LastN, Nextv); //消除直接左递归
}
}
//判断是否从非终结字符推导出空字符
bool DerivationEmptyN(string n) {
unsigned i;
string e;
vector<string> v = E[n];
int flag;
for (i = 0; i < v.size(); ++i) {
e = v[i];
if (Record.find(e) != Record.end()) flag = Record[e]; //查询推导记录
else flag = DerivationEmptyE(e);
if (flag)break;
}
if (i < v.size()) {
Record[n] = 1;
return true;
}
else {
Record[n] = 0;
return false;
}
}
//判断是否能从字符串推导出空字符
bool DerivationEmptyE(string e) {
unsigned i;
string n, ne;
vector<string>v;
i = 0;
int flag;
while (1) {
flag = 0;
if (e[i] >= 'A' && e[i] <= 'Z') { //下一个是非终结字符
n += e[0];
i++;
while (i + 1 < e.length() && e.find(p, i) == i) { //获得产生式左边非终结符号
n += p;
i += 2;
}
if (Record.find(n) != Record.end()) flag = Record[n]; //查询推导记录
else flag = DerivationEmptyN(n);
if (!flag)break;
n = "";
}
else if (i + 1 < e.length() && e.find(q, i) == i)i += q.length(); //下一个是空字符
else break;
}
if (i >= e.size()) {
Record[e] = 1;
return true;
}
else {
Record[e] = 0;
return false;
}
}
//获得一个字符串的First集
set<string> GetFirstSetE(string e,int flag) { //获得一个非终结字符的First集合
string n;
vector<string> v;
set<string> s,s1;
unsigned i = 0;
while (i<e.size()) {
if (e[i] >= 'A' && e[i] <= 'Z') {
n += e[i];
i++;
while (i + 1 < e.length() && e.find(p, i) == i) { //获得产生式左边非终结符号
n += p;
i += 2;
}
s1 = GetFirstSetN(n, flag); //非终结符号的First集合
s.insert(s1.begin(), s1.end());
if (!DerivationEmptyN(n))break; //不能推导出空
}
else if (i + 1 < e.length() && e.find(q, i) == i) {
i += 2;
s.insert(q);
}
else {
s.insert(string(1,e[i]));
break;
}
}
if (flag)FirstE[e] = s;
return s;
}
//获得一个非终结符号的First集
set<string> GetFirstSetN(string n,int flag) {
unsigned i;
vector<string> v = E[n];
set<string> r, t;
for (i = 0; i < v.size(); ++i) {
t = GetFirstSetE(v[i],flag);
r.insert(t.begin(), t.end());
}
if(flag)FirstN[n] = r;
return r;
}
//获得First集
void GetFirst() {
unsigned i;
string n;
for (i = 0; i < N.size(); ++i) {
n = N[i]; //非终结符号
FirstN[n] = GetFirstSetN(n,1);
}
}
//获得Follow集
void GetFollow() {
Follow[N[0]].insert("$"); //$符号加入开始符号的Follow集
unsigned i, j, k;
string nl, e;
string nr, nt;
vector<string> v;
set<string> s, sf;
unsigned count = N.size();
while (count--) {
for (i = 0; i < N.size(); ++i) {
nl = N[i]; //左非终结符号
v = E[nl]; //右产生式集合
for (j = 0; j < v.size(); ++j) {
e = v[j]; //一条产生式
k = 0;
while(k < e.length()) {
nr = "";
if (e[k] >= 'A' && e[k] <= 'Z') { //如果是非终结符号
nr += e[k];
k++;
while (k + 1 < e.length() && e.find(p, k) == k) { //获得产生式中的终结字符
nr += p;
k += p.length();
}
if (k == e.length()) { //如果非终介符号在末尾,则左部非终结符号Follow集加入这个非终结符号Follow集合
Follow[nr].insert(Follow[nl].begin(), Follow[nl].end());
}
else { //非终结符号不在末尾
nt = e.substr(k, e.length()); //非终结字符后面的字符串
if (DerivationEmptyE(nt)) { //如果推导后续字符串为空,则左部非终结符号Follow集加入这个非终结符号Follow集合
Follow[nr].insert(Follow[nl].begin(), Follow[nl].end());
}
sf = GetFirstSetE(nt, 0); //获得非终结字符后面的First集加入这个非终结符号Follow集合
sf.erase(q);
Follow[nr].insert(sf.begin(), sf.end());
}
nr = "";
}
else k++;
}
}
}
}
}
//获得Select集
void GetSelect() {
unsigned i,j;
string n,e;
vector<string> v;
set<string> fi,fo;
for (i = 0; i < N.size(); ++i) {
n = N[i];
v = E[n];
for (j = 0; j < v.size(); ++j) {
e = v[j];
fi = FirstE[e];
fi.erase(q);
if (DerivationEmptyE(e)) {
fo = Follow[n];
fi.insert(fo.begin(), fo.end());
Select[n][e] = fi;
}
else {
Select[n][e] = fi;
}
}
}
}
//验证文法是否是LL(1)文法
bool Test() {
unsigned i;
string n;
set<string> s1, s2, s;
map<string, set<string>> m;
map<string, set<string>>::iterator it1, it2;
int flag = 1;
for (i = 0; i < N.size(); ++i) {
n = N[i];
m = Select[n];
for (it1 = m.begin(); it1 != m.end(); ++it1) {
it2 = it1;
s1 = it1->second;
for (advance(it2, 1); it2 != m.end(); ++it2) {
s2 = it2->second;
set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(), inserter(s, s.begin()));
if (s.size() != 0) {
flag = 0;
cout << setiosflags(ios::left) << setw(12)<<n + d + it1->first << "和" <<setw(12)<<n + d + it2->first << "的Select集有交集:";
for_each(s.begin(), s.end(), [](string x) {cout << x << " "; });
cout << endl;
}
}
}
}
return flag==1;
}
//获得预测分析表
void GetTable() {
set<string> t; //所有终结符号
map<string, map<string, set<string>>>::iterator it;
map<string, set<string>>::iterator it1;
set<string>::iterator it2;
for (it = Select.begin(); it != Select.end(); ++it) {
for (it1 = it->second.begin(); it1 != it->second.end(); ++it1) {
t.insert(it1->second.begin(), it1->second.end());
for (it2 = it1->second.begin(); it2 != it1->second.end(); ++it2) {
Table[it->first][*it2] = it1->first;
}
}
}
copy(t.begin(), t.end(), back_inserter(T)); //将非终结符号转入全局向量
}
//为预测分析表增加同步化入口
void AddSynch() {
unsigned i;
string n;
set<string> s;
set<string>::iterator it;
for (i = 0; i < N.size(); ++i) {
n = N[i];
s = Follow[n];
for (it = s.begin(); it != s.end(); ++it) {
if (Table[n][*it] == "")Table[n][*it] = "synch";
}
}
}
//栈转为字符串
string GetString(stack<string> s) {
string t = "";
while (!s.empty()) {
t += s.top();
s.pop();
}
return t;
}
//自顶向下分析
void TopToDownAnalysis() {
while (1) {
unsigned i;
int j;
unsigned size;
int step;
string sl; //已匹配
stack<string> st; //栈
string si; //输入串
string c; //输入串中下一个符号
string e; //分析表中内容
string s; //栈中下一个符号
string k;
step = 1;
int flag = 0;
int len;
sl = ""; //初始化已经匹配字符
st.push("$"); //初始化栈
st.push(N[0]);
cout << "请输入句子:" << endl;
if (!(cin >> si))break;
si = si + "$";
len = si.length() + 10;
size = si.length();
if (size < 16)size = 16; //输出宽度控制
cout << "预测分析结果:" << endl;
cout << setiosflags(ios::left) << setw(6) << "步骤" << setw(size) << "已匹配" << setw(size) << "栈" << setw(size) << "输入串" << setw(size) << "动作" << endl;
while (st.size() != 1) {
s = st.top(); //获得栈顶符号
c = "";
for (i = 0; i < si.length(); ++i) { //获得输入串符号
c += si[i];
if (find(T.begin(), T.end(), c) != T.end())break;
}
if (i == si.length()) {
flag = 1;
break;
}
if (find(N.begin(), N.end(), s) != N.end()) { //非终结符号,尝试推导
e = Table[s][c]; //对应分析表内容
if (e == "") {
flag = 1;
cout << setiosflags(ios::left) << setw(6) << step++ << setw(size) << sl << setw(size) << GetString(st) << setw(size) << si << setw(size) << "错误,忽略" + c + ",继续分析" << endl; //查询表为空
si = si.substr(c.length(), si.length()); //输入串更新
}
else if (e == "synch") {
flag = 1;
cout << setiosflags(ios::left) << setw(6) << step++ << setw(size) << sl << setw(size) << GetString(st) << setw(size) << si << setw(size) << "错误,弹出" + s + ",继续分析" << endl; //查询表为空
st.pop();
}
else {
cout << setiosflags(ios::left) << setw(6) << step++ << setw(size) << sl << setw(size) << GetString(st) << setw(size) << si << setw(size) << "推导:" + s + d + e << endl; //输出结果
st.pop();
k = "";
for (j = e.size() - 1; j >= 0; j--) { //产生式中符号入栈
k = e[j] + k;
if (k == q) { //空字符则跳过
k = "";
continue;
}
if (find(T.begin(), T.end(), k) != T.end() || find(N.begin(), N.end(), k) != N.end()) { //产生式最右边符号
st.push(k); //入栈
k = "";
}
}
}
}
else { //终结符号,尝试匹配
if (s == c) { //输入串以此终结符号开头
cout << setiosflags(ios::left) << setw(6) << step++ << setw(size) << sl << setw(size) << GetString(st) << setw(size) << si << setw(size) << "匹配:" + s << endl; //输出结果
si = si.substr(s.length(), si.length()); //输入串更新
sl += c; //已匹配串更新
}
else {
flag = 1;
cout << setiosflags(ios::left) << setw(6) << step++ << setw(size) << sl << setw(size) << GetString(st) << setw(size) << si << setw(size) << "无法匹配,弹出" + s + ",继续分析" << endl; //终结符号不匹配
}
st.pop(); //栈更新
}
}
cout << setiosflags(ios::left) << setw(6) << step++ << setw(size) << sl << setw(size) << GetString(st) << setw(size) << si << setw(size) << endl; //输出结果
if (si.length() != 1)cout << "输入串有多余字符:" <<si.substr(0,si.length()-1)<< endl;
if (flag == 0) {
cout << "句子合法" << endl << endl;
}
else {
cout << "句子非法" << endl << endl;
}
}
}
//主函数
int main() {
GetExpression();
cout <<"读取文法:" <<endl<< E;
EliminateLeftRecursion();
cout << "消除左递归:" << endl << E;
ExtractCommonLeftFactor();
cout << "提取左公因子:" << endl << E;
GetFirst();
cout << "非终结字符First集:" << endl << FirstN;
GetFollow();
cout << "非终结字符Follow集:" << endl << Follow;
GetSelect();
cout << "产生式Select集:" << endl << Select;
if (Test()) {
cout << "判断:该文法是LL(1)文法" << endl<<endl;
GetTable();
cout << "预测分析表:" << endl << Table;
AddSynch();
cout << "含同步化入口的预测分析表:" << endl << Table;
TopToDownAnalysis();
}
else {
cout << "判断:该文法不是LL(1)文法" << endl<<endl;
}
}
测试文件(文法),字符串自行根据文法输入
E→TE’
E’→ATE’|ε
T→FT’
T’→MFT’ |ε
F→(E) | i
A → + | -
M → * | /
E→E + T|T
T→T*F|F
F→(E)|i
E→TE’
E’→+TE’|ε
T→FT’
T’→*FT’|ε
F→(E)|i
S→Ac|c
A→Bb|b
B→Sa|a
S→iEtS|iEtSeS|a
E→b
结果示例
SLR(1)分析与中间代码生成
所用文法(程序中分析表为手动填入)
S’ → S
S → i := E
E → E+T
E → E-T
E → T
T → T*F
T → T/F
T →F
F→ (E)
F→ i
#include<iostream>
#include<vector>
#include<map>
#include<stack>
#include<iomanip>
#include<string>
using namespace std;
map<string,map<string, string>> T; //SLR(1)分析表
map<string, int>TP; //名字和偏移地址
map<string,string> E; //产生式
map<string, string> R; //语法制导定义
vector<string> newtemp; //临时变量
int Count; //临时变量数量
vector<string> save; //保存三地址代码
vector<string> N; //保存出现的非终结符号;
map<string, string> place; //保存非终结符号的place
string O[14] = {"i",":","=","(","+","-","*","/",")","$","S","E","T","F"};
//获得语言字符串
string GetString(stack<string> s) {
string t = "";
while (!s.empty()) {
t = s.top() + t;
s.pop();
}
return t;
}
//创建SLR(1)分析表,保存产生式
void CreateTable() { //创建SLR(1)预测分析表
//文法
E["0"] = "S'→S";
E["1"] = "S→i:=E";
E["2"] = "E→E+T";
E["3"] = "E→E-T";
E["4"] = "E→T";
E["5"] = "T→T*F";
E["6"] = "T→T/F";
E["7"] = "T→F";
E["8"] = "F→(E)";
E["9"] = "F→i";
//语法制导定义
R["S'→S"] = "";
R["S→i:=E"] = "i.lexval=E.val";
R["E→E+T"] = "E.val=E.val+T.val";
R["E→E-T"] = "E.val=E.val-T.val";
R["E→T"] = "E.val=T.val";
R["T.val"] = "T.val * F.val";
R["T→T/F"] = "T.val=T.val/F.val";
R["T→F"] = "T.val=F.val";
R["F→(E)"] = "T.val=E.val";
R["F→i"] = "F.val=i.lecval";
//SlR(1)分析表
T["0"]["i"] = "S2";
T["0"]["S"] = "S1";
T["1"]["$"] = "Acc";
T["2"][":"] = "S3";
T["3"]["="] = "S4";
T["4"]["i"] = "S9";
T["4"]["("] = "S8";
T["4"]["E"] = "S5";
T["4"]["T"] = "S6";
T["4"]["F"] = "S7";
T["5"]["+"] = "S10";
T["5"]["-"] = "S11";
T["5"]["$"] = "R1";
T["6"]["+"] = "R4";
T["6"]["-"] = "R4";
T["6"]["*"] = "S12";
T["6"]["/"] = "S13";
T["6"][")"] = "R4";
T["6"]["$"] = "R4";
T["7"]["+"] = "R7";
T["7"]["-"] = "R7";
T["7"]["*"] = "R7";
T["7"]["/"] = "R7";
T["7"][")"] = "R7";
T["7"]["$"] = "R7";
T["8"]["i"] = "S9";
T["8"]["("] = "S8";
T["8"]["E"] = "S14";
T["8"]["T"] = "S6";
T["8"]["F"] = "S7";
T["9"]["+"] = "R9";
T["9"]["-"] = "R9";
T["9"]["*"] = "R9";
T["9"]["/"] = "R9";
T["9"][")"] = "R9";
T["9"]["$"] = "R9";
T["10"]["i"] = "S9";
T["10"]["("] = "S8";
T["10"]["T"] = "S15";
T["10"]["F"] = "S7";
T["11"]["i"] = "S9";
T["11"]["("] = "S8";
T["11"]["T"] = "S16";
T["11"]["F"] = "S7";
T["12"]["i"] = "S9";
T["12"]["("] = "S8";
T["12"]["F"] = "S17";
T["13"]["i"] = "S9";
T["13"]["("] = "S8";
T["13"]["F"] = "S18";
T["14"]["+"] = "S10";
T["14"]["-"] = "S11";
T["14"][")"] = "S19";
T["15"]["+"] = "R2";
T["15"]["-"] = "R2";
T["15"]["*"] = "S12";
T["15"]["/"] = "S13";
T["15"][")"] = "R2";
T["15"]["$"] = "R2";
T["16"]["+"] = "R3";
T["16"]["-"] = "R3";
T["16"]["*"] = "S12";
T["16"]["/"] = "S13";
T["16"][")"] = "R3";
T["16"]["$"] = "R3";
T["17"]["+"] = "R5";
T["17"]["-"] = "R5";
T["17"]["*"] = "R5";
T["17"]["/"] = "R5";
T["17"][")"] = "R5";
T["17"]["$"] = "R5";
T["18"]["+"] = "R6";
T["18"]["-"] = "R6";
T["18"]["*"] = "R6";
T["18"]["/"] = "R6";
T["18"][")"] = "R6";
T["18"]["$"] = "R6";
T["19"]["+"] = "R8";
T["19"]["-"] = "R8";
T["19"]["*"] = "R8";
T["19"]["/"] = "R8";
T["19"][")"] = "R8";
T["19"]["$"] = "R8";
}
//展示文法和SLR(1)分析表
void Show() {
int i;
cout << "文法:" << endl; //输出文法
for (i = 0; i < 10; ++i) {
string j = to_string(i);
cout << i << "\t" << E[j] << endl;
}
cout << endl << endl;
cout << "SLR(1)分析表:" << endl; //输出分析表
cout << "\t";
for (int k = 0; k < 14; ++k) {
cout << O[k]<<"\t";
}
cout << endl;
for (i = 0; i < 20; ++i) {
string j = to_string(i);
cout << i<<"\t";
for (int k = 0; k < 14; ++k) {
cout << T[j][O[k]]<<"\t";
}
cout << endl;
}
cout << endl << endl;
}
//获得新的非终结字符
string GetNewN(string l) {
while (1) { //遍历终结字符集合,如果没找到这个字符,则可以使用,否则继续在后面添加符号
if (find(N.begin(), N.end(), l) != N.end())l += "'";
else break;
}
return l;
}
//中间代码生成
string IntermediateCode(string l,string I,string m) { //返回新的非终结符号
unsigned i;
string f;
string e;
string t;
l = GetNewN(l);
if (I == "1") {
i = m.find(":");
t = m.substr(0, i);
i = m.find("=");
e = m.substr(i+1, m.length());
save.push_back(t + " := " + place[e]);
}
if (I == "2") {
i = m.find("+");
f = m.substr(0, i);
e = m.substr(i + 1, m.length());
t = "t" + to_string(Count++); //临时变量
save.push_back(t + " := " + place[f] + " + " + place[e]);
place[l] = t;
}
if (I == "3") {
i = m.find("-");
f = m.substr(0, i);
e = m.substr(i + 1, m.length());
t = "t" + to_string(Count++); //临时变量
save.push_back(t + " := " + place[f] + " - " + place[e]);
place[l] = t;
}
if (I == "4") {
place[l] = place[m];
}
if (I == "5") {
i = m.find("*");
f = m.substr(0, i);
e = m.substr(i + 1, m.length());
t = "t" + to_string(Count++); //临时变量
save.push_back(t + " := " + place[f] + " * " + place[e]);
place[l] = t;
}
if (I == "6") {
i = m.find("/");
f = m.substr(0, i);
e = m.substr(i + 1, m.length());
t = "t" + to_string(Count++); //临时变量
save.push_back(t + " := " + place[f] + " / " + place[e]);
place[l] = t;
}
if (I == "7") {
place[l] = place[m];
}
if (I == "8") {
place[l] = place[m.substr(1,m.length()-2)];
}
if (I == "9") {
place[l] = m;
}
if (l == "f''''") {
cout << 1;
}
N.push_back(l);
return l;
}
//展示文件中保存的字符串
void ShowCode() {
for (int i = 0; i < save.size(); ++i) {
cout << save[i] << endl;
}
cout << endl;
}
//预测分析函数
void PredictiveAnalytics(string si) {
N.clear(); //初始化
place.clear();
save.clear();
TP["i"] = 100; //初始化偏移位置
Count = 0; //临时变量个数为0
newtemp.clear(); //临时变量清空
string n, c, action, go,f, e,m;
int step;
stack<string> st;
step = 1;
st.push("0");
si = si + "$";
int size = si.length();
if (size < 10)size = 30;
else size = size*2 ;
cout << setiosflags(ios::left) << setw(6) << "步骤" << setw(size) << "栈" << setw(size) << "输入串" << setw(8) << "action" << setw(6) << "go" << endl;
while (action != "Acc") {
n = st.top(); //栈顶序号
c = si.substr(0, 1); //输入串第一个字符
action = T[n][c];
if (action[0] == 'S') { //移进
go = "";
cout << setiosflags(ios::left) << setw(6) << step++ << setw(size) << GetString(st) << setw(size) << si << setw(8) << action << setw(6) << go << endl;
st.push(c);
st.push(action.substr(1, action.length()));
si = si.substr(1, si.length());
}
else if (action[0] == 'R') {
string I = action.substr(1, action.length()); //获得产生式入口
e = E[I]; //获得产生式
string l = e.substr(0, 1); //产生式左部非终结符号
string r = e.substr(3, e.length()); //获得右部分产生式
f = ""; //暂时保存字符串
m = ""; //实际产生式
for (unsigned i = 0; i < r.length(); ++i) {
f =st.top()+f;
st.pop();
m = st.top() + m;
f =st.top()+f;
st.pop();
}
n = st.top(); //获得当前序号
string t = T[n][l]; //获得序号对应非终结符号的跳转
go = t.substr(1, t.length());//获得跳转入口
cout << setiosflags(ios::left) << setw(6) << step++ << setw(size) << GetString(st) + f << setw(size) << si << setw(8) << action << setw(6) << go << endl;
l = IntermediateCode(l, I, m);
st.push(l); //栈推入非终结符号
st.push(go); //栈推入跳转入口
}
else break;
}
cout << setiosflags(ios::left) << setw(6) << step++ << setw(size) << GetString(st) << setw(size) << si << setw(8) << action << setw(6) << go << endl;
if (action == "Acc") {
cout << "句子合法" << endl;
cout << "中间代码:" << endl;
ShowCode();
}
else cout << "句子非法" << endl;
}
int main() {
CreateTable();
Show();
string si;
cout << "请输入字符串:"<<endl;
while (cin >> si) {
PredictiveAnalytics(si);
cout << "请输入字符串:" << endl;
}
}
结果示例