Java百万级数据现在找到比较好的办法就是通过JDBC批量去处理,当达到某一个量级的时候统一提交,mysql和oracle的最佳两级有所差异,mysql可以10万条提交一次,但是oracle不能超过6万5000,否则就会出现数据丢失,插入的要比预期的少。如果将oracle的量级设置为10万的话,插入100万数据最终进库的只有344640(好像是这个数,不会超过35万)
引文:oracle量级借鉴
下面分别介绍MYSQL和ORACLE的处理方式
一、mysql插入百万级数据
1、实现类
public class BigDataDispose implements Runnable {
@Override
public void run() {
String url = "jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf-8";
String driver = "com.mysql.jdbc.Driver";
String userName = "root";
String password = "root";
Connection conn = null;
try {
Class.forName(driver);
conn = DriverManager.getConnection(url, userName, password);// 获取连接
conn.setAutoCommit(false);// 关闭自动提交,不然conn.commit()运行到这句会报错
} catch (ClassNotFoundException | SQLException e) {
e.printStackTrace();
}
// 开始时间
Long begin = new Date().getTime();
// SQL前缀
String prefix = "INSERT INTO finance(id, bill, money_num, amount_in_words, pay_time, remark, create_user, create_time) VALUES";
// 保存SQL后缀
StringBuffer suffix = new StringBuffer();
try {
// 设置事务为非自动提交
conn.setAutoCommit(false);
// 比起st,pst会更好些
PreparedStatement pst = conn.prepareStatement("");//准备执行语句
// 外层循环,总提交事务次数
for (int i = 1; i <= 10; i++) {
suffix = new StringBuffer();
// 第j次提交步长
for (int j = 1; j <= 100000; j++) {
String num = String.valueOf(i * j);
if (num.length() < 7) {
for (int a = num.length(); a < 7; a++) {
num = "0" + num;
}
}
num = "DJ" + num;
// 构建SQL后缀
suffix.append("('" + UUID.randomUUID().toString() + "', '" + num + "', 1000, '1000', '2017-10-30', '订单,哈哈', '李四', '2017-10-31 21:38:30'),");
}
// 构建完整SQL
String sql = prefix + suffix.substring(0, suffix.length() - 1);
// 添加执行SQL
pst.addBatch(sql);
// 执行操作
pst.executeBatch();
// 提交事务
conn.commit();
// 清空上一次添加的数据
suffix = new StringBuffer();
}
// 关闭连接
pst.close();
conn.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// 结束时间
Long end = new Date().getTime();
// 耗时
System.out.println("100万条数据插入花费时间 : " + (end - begin) / 1000 + " s"+" 插入完成");
}
}
2、测试
public class Test {
public static void main(String[] args) {
BigDataDispose bdd = new BigDataDispose();
Thread thread = new Thread(bdd);
thread.start();
}
}
二、oracle插入百万级数据
1、实现类(驱动使用jdbc14)
public class BigDataDispose implements Runnable {
@Override
public void run() {
String url = "jdbc:oracle:thin:@127.0.0.1:1521:ORCL";
String driver = "oracle.jdbc.driver.OracleDriver";
String userName = "testInfo";
String password = "root";
Connection conn = null;
try {
Class.forName(driver);
conn = DriverManager.getConnection(url, userName, password);// 获取连接
conn.setAutoCommit(false);// 关闭自动提交,不然conn.commit()运行到这句会报错
} catch (ClassNotFoundException | SQLException e) {
e.printStackTrace();
}
// 开始时间
Long begin = new Date().getTime();
String prefix = "INSERT INTO finance(id, bill, money_num, amount_in_words, pay_time, remark, create_user, create_time) VALUES(?, ?, ?, ?,?, ?, ?, ?)";
try {
// 设置事务为非自动提交
conn.setAutoCommit(false);
// 比起st,pst会更好些
PreparedStatement pst = conn.prepareStatement(prefix);//准备执行语句
// 外层循环,总提交事务次数
for (int i = 1; i <= 10; i++) {
// 第j次提交步长
for (int j = 1; j <= 100000; j++) {
String num = String.valueOf(i * j);
if (num.length() < 7) {
for (int a = num.length(); a < 7; a++) {
num = "0" + num;
}
}
num = "DJ" + num;
pst.setString(1, UUID.randomUUID().toString());
pst.setString(2, num);
pst.setDouble(3, 1000.5);
pst.setString(4, "一千元五角");
pst.setDate(5, new java.sql.Date(System.currentTimeMillis()));
pst.setString(6, "批量插入数据,哈哈");
pst.setString(7, "你大爷");
java.sql.Timestamp time = new java.sql.Timestamp(new Date().getTime());
pst.setTimestamp(8, time);
pst.addBatch();
}
// 执行操作
pst.executeBatch();
// 提交事务
conn.commit();
}
// 关闭连接
pst.close();
conn.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// 结束时间
Long end = new Date().getTime();
// 耗时
System.out.println("100万条数据插入花费时间 : " + (end - begin) / 1000 + " s"+" 插入完成");
}
}
2、测试
public class Test {
public static void main(String[] args) {
BigDataDispose bdd = new BigDataDispose();
Thread thread = new Thread(bdd);
thread.start();
}
}
mysql和oracle的不同之处就在于拼接SQL的方式,因为oracle不支持mysql的那种方式
哪位大神若是有更好的优化方式,欢迎在下面评论