package com;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class WordAnalysis {
public static int length = 0;
static WordsType sym = WordsType.SYM_IDENTIFIER;// 识别出来的字符串的类型
static String id = ""; // 识别出来的字符串
static Integer countID = 1; // 记录标示符列表
static Integer countNUM = 1;// 记录数组列表
static Map<String, Integer> comMap = new HashMap<String, Integer>();// 普通类型的列表
static Map<String, Integer> idMap = new HashMap<String, Integer>();// 标示符列表
static Map<String, Integer> numMap = new HashMap<String, Integer>();// 数字列表
public enum WordsType {
SYM_IDENTIFIER, // 标识符
SYM_NUMBER, // 常数
SYM_PLUS, // +
SYM_MINUS, // -
SYM_TIMES, // *
SYM_SLASH, // /
SYM_ODD, // odd
SYM_EQU, // =
SYM_NEQ, // <>
SYM_LES, // <
SYM_LEQ, // <=
SYM_GTR, // >
SYM_GEQ, // >=
SYM_LPAREN, // (
SYM_RPAREN, // )
SYM_COMMA, // ,
SYM_SEMICOLON, // ;
SYM_PERIOD, // .
SYM_BECOMES, // :=
SYM_BEGIN, // begin
SYM_END, // end
SYM_IF, // if
SYM_THEN, // then
SYM_WHILE, // while
SYM_DO, // do
SYM_CONST, // const
SYM_VAR, // var
SYM_CALL, // call
SYM_PROCEDURE; // procedure
public String toString(WordsType wt){
String word="";
switch(wt){
case SYM_IDENTIFIER:word="id";break;
case SYM_PLUS:word="+";break;
case SYM_TIMES:word="*";break;
case SYM_MINUS:word="-";break;
case SYM_SLASH:word="/";break;
case SYM_LPAREN:word="(";break;
case SYM_RPAREN:word=")";break;
case SYM_NUMBER:word="num";break;
}
return word;
}
}
// 其中len是开始位置
public static WordsType analysis(char[] temp, int len) {
String sys = "";
length = len;
while (temp[length] == '\t' || temp[length] == ' ')
length++; // 过滤制表符,空格
// 字母序列
if (length < temp.length && isLetter(temp[length])) {
while (length < temp.length && temp[length] != ' '
&& isLetter(temp[length])) {
sys += temp[length];
length++;
}
id = sys;
WordsType wt = reserve(id);
if(length==temp.length)
{
comMap.put(id ,null);
}
if (length < temp.length && !isLetter(temp[length])) {
if (wt == WordsType.SYM_IDENTIFIER) {
if(!idMap.containsKey(id)){
idMap.put(id, countID);// 标示符列表,可以添加如果标示符已经存在则不再添加
countID++;
}
} else
comMap.put(id, null);
}
return wt;
}
// 数字序列
if (length < temp.length && isDigit(temp[length])) {
while (length < temp.length && temp[length] != ' '
&& isDigit(temp[length])) {
sys += temp[length];
length++;
}
if (length < temp.length && !isDigit(temp[length])) {
// ?是否赋值为id,转型
id = sys;
sym = WordsType.SYM_NUMBER;
if(!numMap.containsKey(id)){
numMap.put(id, countNUM); // 数字列表
countNUM++;
}
return sym;
}
}
// 运算符
if (length < temp.length)
return caculator(temp);
return null;
}
// 判断基本运算
public static WordsType caculator(char[] temp) {
sym = null;
int len = length;
length++;
id = temp[len] + "";
switch (temp[len]) {
case '+':
sym = WordsType.SYM_PLUS;
break;
case '-':
sym = WordsType.SYM_MINUS;
break;
case '*':
sym = WordsType.SYM_TIMES;
break;
case '/':
sym = WordsType.SYM_SLASH;
break;
case '(':
sym = WordsType.SYM_LPAREN;
break;
case ')':
sym = WordsType.SYM_RPAREN;
break;
case ',':
sym = WordsType.SYM_COMMA;
break;
case ';':
sym = WordsType.SYM_SEMICOLON;
break;
case '.':
sym = WordsType.SYM_PERIOD;
break;
case '=':
sym = WordsType.SYM_BECOMES;
break;
}
if (temp[len] == '>' && (len + 1 < temp.length) && temp[len + 1] == '=') {
length++;
id += temp[len + 1];
sym = WordsType.SYM_GEQ;
} else if (temp[len] == '>' && (len + 1 < temp.length)
&& temp[len + 1] != '=') {
sym = WordsType.SYM_GTR;
} else if (temp[len] == '<' && (len + 1 < temp.length)
&& temp[len + 1] != '=') {
sym = WordsType.SYM_LES;
} else if (temp[len] == '<' && (len + 1 < temp.length)
&& temp[len + 1] == '=') {
length++;
id += temp[len + 1];
sym = WordsType.SYM_LEQ;
}
return sym;
}
// 判断单个字符是否为字母
public static boolean isLetter(char a) {
if ((a >= 'a' && a <= 'z') || (a >= 'A' && a <= 'Z'))
return true;
return false;
}
// 判断单个字符是否为数字
public static boolean isDigit(char a) {
if (a >= '0' && a <= '9')
return true;
return false;
}
// 保留字
public static WordsType reserve(String str) {
if ("begin".equals(str))
return WordsType.SYM_BEGIN;
else if ("end".equals(str))
return WordsType.SYM_END;
else if("const".equals(str))
return WordsType.SYM_CONST;
else if ("if".equals(str))
return WordsType.SYM_IF;
else if ("then".equals(str))
return WordsType.SYM_THEN;
else if ("while".equals(str))
return WordsType.SYM_WHILE;
else if ("do".equals(str))
return WordsType.SYM_DO;
else if ("var".equals(str))
return WordsType.SYM_VAR;
else if ("procedure".equals(str))
return WordsType.SYM_PROCEDURE;// 保留字
else
return WordsType.SYM_IDENTIFIER;// 标示符
}
public static List<String> getInputStream(String filename)
throws IOException {
List<String> list = new ArrayList<String>();
FileInputStream fis = new FileInputStream(filename);
BufferedReader br = new BufferedReader(new InputStreamReader(fis));
String buffer = "";
while ((buffer = br.readLine()) != null) {
list.add(buffer);
}
return list;
}
public static void output(String str) {
length = 0;// 必须初始化,因为读取文件时每读一行进行清零操作
char[] chs = str.toCharArray();
while (length < str.length()) {
WordsType wt = analysis(chs, length);
if (wt == WordsType.SYM_NUMBER)
System.out.println(wt + ":" + id + ",\t位置:" + numMap.get(id));
else if (wt == WordsType.SYM_IDENTIFIER)
System.out.println(wt + ":" + id + ",\t位置:" + idMap.get(id));
else
System.out.println(wt + ":" + id + ",\t位置:" + comMap.get(id));
}
}
/*public static void main(String[] args) throws IOException {
List<String> list = getInputStream("ypf.txt");
for (int i = 0; i < list.size(); i++) {
System.out.println(list.get(i));
output(list.get(i));
System.out.println("*************");
}
}*/
}
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class WordAnalysis {
public static int length = 0;
static WordsType sym = WordsType.SYM_IDENTIFIER;// 识别出来的字符串的类型
static String id = ""; // 识别出来的字符串
static Integer countID = 1; // 记录标示符列表
static Integer countNUM = 1;// 记录数组列表
static Map<String, Integer> comMap = new HashMap<String, Integer>();// 普通类型的列表
static Map<String, Integer> idMap = new HashMap<String, Integer>();// 标示符列表
static Map<String, Integer> numMap = new HashMap<String, Integer>();// 数字列表
public enum WordsType {
SYM_IDENTIFIER, // 标识符
SYM_NUMBER, // 常数
SYM_PLUS, // +
SYM_MINUS, // -
SYM_TIMES, // *
SYM_SLASH, // /
SYM_ODD, // odd
SYM_EQU, // =
SYM_NEQ, // <>
SYM_LES, // <
SYM_LEQ, // <=
SYM_GTR, // >
SYM_GEQ, // >=
SYM_LPAREN, // (
SYM_RPAREN, // )
SYM_COMMA, // ,
SYM_SEMICOLON, // ;
SYM_PERIOD, // .
SYM_BECOMES, // :=
SYM_BEGIN, // begin
SYM_END, // end
SYM_IF, // if
SYM_THEN, // then
SYM_WHILE, // while
SYM_DO, // do
SYM_CONST, // const
SYM_VAR, // var
SYM_CALL, // call
SYM_PROCEDURE; // procedure
public String toString(WordsType wt){
String word="";
switch(wt){
case SYM_IDENTIFIER:word="id";break;
case SYM_PLUS:word="+";break;
case SYM_TIMES:word="*";break;
case SYM_MINUS:word="-";break;
case SYM_SLASH:word="/";break;
case SYM_LPAREN:word="(";break;
case SYM_RPAREN:word=")";break;
case SYM_NUMBER:word="num";break;
}
return word;
}
}
// 其中len是开始位置
public static WordsType analysis(char[] temp, int len) {
String sys = "";
length = len;
while (temp[length] == '\t' || temp[length] == ' ')
length++; // 过滤制表符,空格
// 字母序列
if (length < temp.length && isLetter(temp[length])) {
while (length < temp.length && temp[length] != ' '
&& isLetter(temp[length])) {
sys += temp[length];
length++;
}
id = sys;
WordsType wt = reserve(id);
if(length==temp.length)
{
comMap.put(id ,null);
}
if (length < temp.length && !isLetter(temp[length])) {
if (wt == WordsType.SYM_IDENTIFIER) {
if(!idMap.containsKey(id)){
idMap.put(id, countID);// 标示符列表,可以添加如果标示符已经存在则不再添加
countID++;
}
} else
comMap.put(id, null);
}
return wt;
}
// 数字序列
if (length < temp.length && isDigit(temp[length])) {
while (length < temp.length && temp[length] != ' '
&& isDigit(temp[length])) {
sys += temp[length];
length++;
}
if (length < temp.length && !isDigit(temp[length])) {
// ?是否赋值为id,转型
id = sys;
sym = WordsType.SYM_NUMBER;
if(!numMap.containsKey(id)){
numMap.put(id, countNUM); // 数字列表
countNUM++;
}
return sym;
}
}
// 运算符
if (length < temp.length)
return caculator(temp);
return null;
}
// 判断基本运算
public static WordsType caculator(char[] temp) {
sym = null;
int len = length;
length++;
id = temp[len] + "";
switch (temp[len]) {
case '+':
sym = WordsType.SYM_PLUS;
break;
case '-':
sym = WordsType.SYM_MINUS;
break;
case '*':
sym = WordsType.SYM_TIMES;
break;
case '/':
sym = WordsType.SYM_SLASH;
break;
case '(':
sym = WordsType.SYM_LPAREN;
break;
case ')':
sym = WordsType.SYM_RPAREN;
break;
case ',':
sym = WordsType.SYM_COMMA;
break;
case ';':
sym = WordsType.SYM_SEMICOLON;
break;
case '.':
sym = WordsType.SYM_PERIOD;
break;
case '=':
sym = WordsType.SYM_BECOMES;
break;
}
if (temp[len] == '>' && (len + 1 < temp.length) && temp[len + 1] == '=') {
length++;
id += temp[len + 1];
sym = WordsType.SYM_GEQ;
} else if (temp[len] == '>' && (len + 1 < temp.length)
&& temp[len + 1] != '=') {
sym = WordsType.SYM_GTR;
} else if (temp[len] == '<' && (len + 1 < temp.length)
&& temp[len + 1] != '=') {
sym = WordsType.SYM_LES;
} else if (temp[len] == '<' && (len + 1 < temp.length)
&& temp[len + 1] == '=') {
length++;
id += temp[len + 1];
sym = WordsType.SYM_LEQ;
}
return sym;
}
// 判断单个字符是否为字母
public static boolean isLetter(char a) {
if ((a >= 'a' && a <= 'z') || (a >= 'A' && a <= 'Z'))
return true;
return false;
}
// 判断单个字符是否为数字
public static boolean isDigit(char a) {
if (a >= '0' && a <= '9')
return true;
return false;
}
// 保留字
public static WordsType reserve(String str) {
if ("begin".equals(str))
return WordsType.SYM_BEGIN;
else if ("end".equals(str))
return WordsType.SYM_END;
else if("const".equals(str))
return WordsType.SYM_CONST;
else if ("if".equals(str))
return WordsType.SYM_IF;
else if ("then".equals(str))
return WordsType.SYM_THEN;
else if ("while".equals(str))
return WordsType.SYM_WHILE;
else if ("do".equals(str))
return WordsType.SYM_DO;
else if ("var".equals(str))
return WordsType.SYM_VAR;
else if ("procedure".equals(str))
return WordsType.SYM_PROCEDURE;// 保留字
else
return WordsType.SYM_IDENTIFIER;// 标示符
}
public static List<String> getInputStream(String filename)
throws IOException {
List<String> list = new ArrayList<String>();
FileInputStream fis = new FileInputStream(filename);
BufferedReader br = new BufferedReader(new InputStreamReader(fis));
String buffer = "";
while ((buffer = br.readLine()) != null) {
list.add(buffer);
}
return list;
}
public static void output(String str) {
length = 0;// 必须初始化,因为读取文件时每读一行进行清零操作
char[] chs = str.toCharArray();
while (length < str.length()) {
WordsType wt = analysis(chs, length);
if (wt == WordsType.SYM_NUMBER)
System.out.println(wt + ":" + id + ",\t位置:" + numMap.get(id));
else if (wt == WordsType.SYM_IDENTIFIER)
System.out.println(wt + ":" + id + ",\t位置:" + idMap.get(id));
else
System.out.println(wt + ":" + id + ",\t位置:" + comMap.get(id));
}
}
/*public static void main(String[] args) throws IOException {
List<String> list = getInputStream("ypf.txt");
for (int i = 0; i < list.size(); i++) {
System.out.println(list.get(i));
output(list.get(i));
System.out.println("*************");
}
}*/
}