项目场景需要从文件服务器读取实时更新的数据文件(csv格式)
1.读取文件地址中所有数据包括迭代读取子文件夹。
2.解析数据
3.多线程存入数据库(本机PC能够实现一分钟50万条数据)
import com.univocity.parsers.common.processor.RowListProcessor;
import com.univocity.parsers.csv.CsvParser;
import com.univocity.parsers.csv.CsvParserSettings;
import java.io.File;
import java.sql.*;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.*;
import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor;
public class ReadCSVFile {
/** 文件夹根路径,会扫描所有子文件夹*/
public static String root="";
/** 数据库的ip和端口 127.0.0.1:3306*/
public static String db_ip_port="";
/** 数据库名称*/
public static String db_name="";
/** 数据库用户名*/
public static String db_user="";
/** 数据库密码*/
public static String db_pwd="";
/** 定时执行的间隔时间 单位毫秒*/
public static int time=0;
public static void main(String[] args) throws SQLException {
/** 从启动参数里获取*/
root = System.getProperty("root");
db_ip_port = System.getProperty("db_ip_port");
db_name = System.getProperty("db_name");
db_user = System.getProperty("db_user");
db_pwd = System.getProperty("db_pwd");
time =Integer.parseInt(System.getProperty("time"));
/** 需要定时执行的任务*/
Runnable runnable = new Runnable() {
@Override
public void run() {
System.out.println("-----开始读取-----");
try {
dojob();
} catch (SQLException throwables) {
throwables.printStackTrace();
}
}
};
ScheduledExecutorService ses = newSingleThreadScheduledExecutor();
/** 立即执行,并且每5秒执行一次*/
ses.scheduleAtFixedRate(runnable, 0, time, TimeUnit.MILLISECONDS);
}
/** 连接数据库*/
public static Connection getConnection(String data, String user, String pwd) {
Connection conn = null;
try {
Class.forName("com.mysql.cj.jdbc.Driver");
conn = DriverManager.getConnection("jdbc:mysql://"+db_ip_port+"/" + data + "?serverTimezone=UTC&characterEncoding=UTF-8", user, pwd);
} catch (Exception e) {
e.printStackTrace();
}
return conn;
}
/** 插入数据库,只能为一个字段*/
public static boolean insertInto(Connection conn ,List<String[]> strList,List<String[]> list,String tb) throws NumberFormatException, SQLException {
try {
String sql = "";
if("jc".equals(tb)){
sql ="INSERT INTO cd_jc (`aa`, `bb`, `cc`, `dd`, `ee`, `ff`, `gg`, `hh`, `ii`, `jj`, `kk`, `ll`, `nn`, `mm`, `oo`) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);";
PreparedStatement pstmt = conn.prepareStatement(sql);
for(int j =0;j<strList.size();j++){
String[] str = strList.get(j);
for (int i = 0; i < str.length; i++) {
pstmt.setString(i+1,str[i]);
}
/** 攒SQL语句*/
pstmt.addBatch();
/** 攒够1000条执行一次*/
if ((j+1)%1000 == 0 || j==str.length-1) {
/** 执行SQL语句*/
pstmt.executeBatch();
conn.commit();
/** 清空batch*/
pstmt.clearBatch();
}
}
}
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
/** 查询数据库*/
public static List<String> queryInfo() throws NumberFormatException, SQLException {
List<String> list = new ArrayList<>();
Connection conn = null;
try {
conn = getConnection(db_name, db_user, db_pwd);
String sql = "SELECT name from chengdu_insert_time";
PreparedStatement pstmt = conn.prepareStatement(sql);
ResultSet rs = pstmt.executeQuery();
while(rs.next()){
rs.next();
String id=rs.getString(1);
list.add(id);
}
return list;
} catch (Exception e) {
e.printStackTrace();
return list;
}finally {
conn.close();
}
}
public static void dojob() throws SQLException {
List<String> file_list = new ArrayList<>();
ergodic(new File(root),file_list);
List<String> idList = queryInfo();
for(String file_str:file_list){
if(idList.contains(file_str)){
continue;
}
if(file_str.contains("chengdu_jc_")){
String file_name = file_str;
UnivocityReadCsv readCsv = new UnivocityReadCsv();
List<String[]> list_jc = readCsv.parseCSV(file_str);
/** 多线程执行插入*/
executeThreadPool(list_jc,"jc");
}
/** 插入表chengdu_insert_time name time*/
Connection conn = null;
try{
String sql = "INSERT INTO chengdu_insert_time (name,time) VALUES (?,?)";
conn = getConnection(db_name, db_user, db_pwd);
conn.setAutoCommit(false);
PreparedStatement pstmt = conn.prepareStatement(sql);
pstmt.setString(1,file_str);
pstmt.setString(2,new Date().toString());
pstmt.executeUpdate();
conn.commit();
}catch (Exception e){
e.printStackTrace();
}finally {
conn.close();
}
}
System.out.println("终于插入完了");
}
public List<String[]> parseCSV(String fileName){
/** 创建一个配置选项,用来提供多种配置选项*/
CsvParserSettings parserSettings = new CsvParserSettings();
/** 打开解析器的自动检测功能,让它自动检测输入中包含的分隔符*/
parserSettings.setLineSeparatorDetectionEnabled(true);
/** 创建RowListProcessor对象,用来把每个解析的行存储在列表中*/
RowListProcessor rowListProcessor = new RowListProcessor();
parserSettings.setProcessor(rowListProcessor); /** 配置解析器*/
/** 待解析的CSV文件包含标题头,把第一个解析行看作文件中每个列的标题*/
parserSettings.setHeaderExtractionEnabled(true);
parserSettings.setLineSeparatorDetectionEnabled(true);
/** 创建CsvParser对象,用于解析文件*/
CsvParser parser = new CsvParser(parserSettings);
parser.parse(new File(fileName));
/** 如果解析中包含标题,用于获取标题*/
String[] headers = rowListProcessor.getHeaders();
/** 获取行值,并遍历打印*/
List<String[]> rows = rowListProcessor.getRows();
return rows;
}
static List<String> ergodic(File file, List<String> resultFileName) {
File[] files = file.listFiles();
if (files == null)
return resultFileName;/** 判断目录下是不是空的*/
for (File f : files) {
if (f.isDirectory()) {
/** 判断是否文件夹*/
//resultFileName.add(f.getPath());
ergodic(f, resultFileName);/** 调用自身,查找子目录*/
} else
resultFileName.add(f.getPath());
}
return resultFileName;
}
private static final int THREAD_COUNT = 2000;
public static void executeThreadPool(List<String[]> pmsProductList,String type) throws SQLException {
long start = System.currentTimeMillis();
//计数器
int size = 0;
int round = pmsProductList.size() / THREAD_COUNT + 1;
final CountDownLatch count = new CountDownLatch(round);
/** 使用阿里巴巴推荐的创建线程池的方式
//通过ThreadPoolExecutor构造函数自定义参数创建
// ThreadPoolExecutor executor = new ThreadPoolExecutor(
// CORE_POOL_SIZE,
// MAX_POOL_SIZE,
// KEEP_ALIVE_TIME, //当线程数大于核心线程数时,多余的空闲线程存活的最长时间
// TimeUnit.SECONDS, //时间单位
// new ArrayBlockingQueue<>(QUEUE_CAPACITY), //任务队列,用来储存等待执行任务的队列
// new ThreadPoolExecutor.CallerRunsPolicy()); //饱和策略,简单点说就是后面排队的线程就在那儿等着。
// //被拒绝的任务在主线程中运行,所以主线程就被阻塞了,别的任务只能在被拒绝的任务执行完之后才会继续被提交到线程池执行
*/
ExecutorService executor = Executors.newFixedThreadPool(round);
//数据连接
Connection conn = null;
conn = getConnection(db_name, db_user, db_pwd);
conn.setAutoCommit(false);
for (int i = 0; i < round; i++) {
int startLen = i * THREAD_COUNT;
int endLen = ((i + 1) * THREAD_COUNT > pmsProductList.size() ? pmsProductList.size() : (i + 1) * THREAD_COUNT);
final List<String[]> threadList = pmsProductList.subList(startLen, endLen);
size = size + threadList.size();
//调用业务逻辑代码
final int dd = i;
Connection finalConn = conn;
executor.execute(new Runnable() {
@Override
public void run() {
List< String[]> rrr2 = new CopyOnWriteArrayList();
//log.info("第" + (dd + 1) + "批次插入成功");
for (int j = 0; j < threadList.size(); j++) {
//调用插入数据方法
String[] rrr = threadList.get(j);
rrr2.add(rrr);
}
boolean b_jc = false;
try {
if("jc".equals(type)){
b_jc = insertInto(finalConn,rrr2,null,"jc");
}
} catch (SQLException throwables) {
throwables.printStackTrace();
}
count.countDown();
}
});
}
//System.err.println("插入数据总条数:" + size);
//boolean isTerminated()
//若关闭后所有任务都已完成,则返回true。注意除非首先调用shutdown或shutdownNow,否则isTerminated永不为true。
// while (!executor.isTerminated()) {
//System.out.println("线程池还没有完全关闭!!!");
// }
try {
count.await();
long end = System.currentTimeMillis();
} catch (Exception e) {
e.printStackTrace();
} finally {
// 终止线程池
// 启动一次顺序关闭,执行以前提交的任务,但不接受新任务。若已经关闭,则调用没有其他作用。
executor.shutdown();
}
}
}