需求:
使用java代码,将mysql建表语句转成hive建表语句。
先上正确的代码
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class ExcelToHive {
public static void main(String[] args) {
try {
FileInputStream file = new FileInputStream("input.xlsx");
Workbook workbook = new XSSFWorkbook(file);
Sheet sheet = workbook.getSheetAt(0);
for (Row row : sheet) {
Cell mysqlCell = row.getCell(0);
Cell hiveCell = row.createCell(1);
if (mysqlCell != null) {
String mysqlStatement = mysqlCell.getStringCellValue();
String hiveStatement = convertToHiveStatement(mysqlStatement);
hiveCell.setCellValue(hiveStatement);
}
}
FileOutputStream outFile = new FileOutputStream("output.xlsx");
workbook.write(outFile);
workbook.close();
file.close();
outFile.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static String convertToHiveStatement(String mysqlStatement) {
String hiveStatement = mysqlStatement;
// 替换数据类型
hiveStatement = hiveStatement.replaceAll("INT\\s*\\(\\s*\\d+\\s*\\)", "INT");
hiveStatement = hiveStatement.replaceAll("VARCHAR\\s*\\(\\s*\\d+\\s*\\)", "STRING");
hiveStatement = hiveStatement.replaceAll("DOUBLE", "DOUBLE");
hiveStatement = hiveStatement.replaceAll("TIMESTAMP\\s*NOT\\s*NULL\\s*DEFAULT\\s*CURRENT_TIMESTAMP", "TIMESTAMP");
// 替换其他语法
hiveStatement = hiveStatement.replace("`", "");
hiveStatement = hiveStatement.replace("DEFAULT CHARSET = utf8", "");
hiveStatement = hiveStatement.replace("ENGINE = INNODB", "");
hiveStatement = hiveStatement.replace("COMMENT = '", "COMMENT '");
// 添加 Hive 分区语句
hiveStatement += "\nPARTITIONED BY (part STRING, intime STRING);";
return hiveStatement;
}
}
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.FileInputStream;
import java.io.FileOutputStream;
public class ExcelToHive {
public static void main(String[] args) {
try {
// 读取Excel文件
FileInputStream file = new FileInputStream("input.xlsx");
Workbook workbook = new XSSFWorkbook(file);
Sheet sheet = workbook.getSheetAt(0);
// 循环遍历每一行
for (Row row : sheet) {
Cell mysqlCell = row.getCell(0);
Cell hiveCell = row.createCell(1);
if (mysqlCell != null) {
// 获取MySQL建表语句
String mysqlStatement = mysqlCell.getStringCellValue();
// 转换为Hive建表语句
String hiveStatement = convertToHiveStatement(mysqlStatement);
// 在Hive单元格中写入建表语句
hiveCell.setCellValue(hiveStatement);
}
}
// 保存修改后的Excel文件
FileOutputStream outFile = new FileOutputStream("output.xlsx");
workbook.write(outFile);
workbook.close();
file.close();
outFile.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static String convertToHiveStatement(String mysqlStatement) {
// 在此处实现将MySQL建表语句转换为Hive建表语句的逻辑
// 添加分区等操作
// 示例中直接将MySQL建表语句复制到Hive建表语句
return mysqlStatement;
}
}
可以发现转换的代码没有成功转换
后续多次测试发现是正则出了问题,容错率很低,需要给出示例。