public class AttFromDateFilter {
/**
* 函数的目的/功能 共通的SQL查询方法
* @param sql
* @return
* @throws SQLException
*/
public List<AttachmentBean> commonLst(String sql) throws SQLException{
JDBCHelper jdbcHelper = JDBCHelper.getInstance();
ResultSet rs = jdbcHelper.executeQuery(sql, null);
List<AttachmentBean> paramsList = new ArrayList<AttachmentBean>();
AttachmentBean attBean = null;
while(rs.next()){
attBean = new AttachmentBean();
attBean.setAttname(rs.getString("attName"));
attBean.setFile_download_url(rs.getString("file_download_url"));
attBean.setEmailDate(rs.getString("emailDate"));
paramsList.add(attBean);
}
return paramsList;
}
public static void main(String[] args) {
SparkConf conf = new SparkConf()
.setMaster("local[2]")
.setAppName("AttFromDateFilter")
.set("spark.local.dir","D://sparkDir")
.set("spark.shuffle.consolidateFiles", "true");
@SuppressWarnings("resource")
JavaSparkContext sc = new JavaSparkContext(conf);
final String caseID ="14";
final String caseName ="缉私";
final String evID = "162";
//将读取本地邮件(后期生产环境将这个路径改成实际的邮件路径)
JavaPairRDD<String, PortableDataStream> lines = sc.binaryFiles("file:///D:/emaildata/6/6c7cdaf9e4f/*");
/**
* 程序开始时,存入applicationId
*/
String applicationId = sc.sc().applicationId();
List<Object[]> appInfo = new ArrayList<>();
try { //将程序开始的spark应用信息存储到jobinfo表中
appInfo.add(new Object[]{evID,caseID,applicationId});
EmailInfoDao.insertJobInfo(appInfo,"jobinfo");
} catch (Exception e1) {
e1.printStackTrace();
}
/**
* 读取邮件内容并解析邮件
*/
JavaRDD<EmailBean> beansFirst = lines.mapPartitions(new FlatMapFunction<Iterator<Tuple2<String,PortableDataStream>>, EmailBean>() {
private static final long serialVersionUID = 1L;
@Override
public Iterable<EmailBean> call(
Iterator<Tuple2<String, PortableDataStream>> arg0)
throws Exception {
List<EmailBean> list = new LinkedList<EmailBean>();
RXParseEml parse = new RXParseEml();
FileOutputStream out = null;
Tuple2<String,PortableDataStream> tuple = null;
EmailBean emaiBean = null;
while(arg0.hasNext()){
tuple = arg0.next();
byte[] bytes = tuple._2.toArray();
File a = File.createTempFile("temp", ".eml"); //C:\Users\XL\AppData\Local\Temp\temp3828338232500747104.eml; 将目标邮件通过IO流写入临时路径下;
out = new FileOutputStream(a);
out.write(bytes);
out.close();
//解析邮件
emaiBean = parse.parseBean(a, tuple._1,caseID,caseName,evID);
if(emaiBean!=null&&
emaiBean.getFromWho()!=null&&
!emaiBean.getFromWho().equals("")
&&emaiBean.getToWho()!=null&&
!emaiBean.getToWho().equals("")){
list.add(emaiBean);
}else{
continue;
}
a.delete();
}
return list;
}
});
/**
* 持久化
*/
JavaRDD<EmailBean> beans = beansFirst.persist(StorageLevel.MEMORY_AND_DISK());
/**
* 获取一月内的附件
*/
beans.foreach(new VoidFunction<EmailBean>() {
private static final long serialVersionUID = 1L;
@Override
public void call(EmailBean arg0) throws Exception {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String attDate = arg0.getDate();
Calendar c = Calendar.getInstance();
c.setTime(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(arg0.getDate()));
c.add(Calendar.DATE, - 30); //过去30天
Date d = c.getTime();
String date30 = format.format(d);
AttFromDateFilter attfdf = new AttFromDateFilter();
String tableName = "attachment_2"; //定义mysql中附件表
String sql = "SELECT attname,emailDate,file_download_url FROM "+ tableName+" WHERE emailDate BETWEEN '"+date30+"' and '"+attDate+"'";
List<AttachmentBean> attBeanList = attfdf.commonLst(sql);
if(attBeanList!=null){
for (int i = 0; i < attBeanList.size(); i++) {
AttachmentBean aa = attBeanList.get(i);
System.out.println(aa.getEmailDate()+" "+aa.getFile_download_url());
}
}
}
});
/**
* 获取三月内的附件
*/
beans.foreach(new VoidFunction<EmailBean>() {
private static final long serialVersionUID = 1L;
@Override
public void call(EmailBean arg0) throws Exception {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String attDate = arg0.getDate();
Calendar c = Calendar.getInstance();
c.setTime(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(arg0.getDate()));
c.add(Calendar.DATE, - 90); //过去90天
Date d = c.getTime();
String date90 = format.format(d);
AttFromDateFilter attfdf = new AttFromDateFilter();
String tableName = "attachment_2"; //定义mysql中附件表
String sql = "SELECT attname,emailDate,file_download_url FROM "+ tableName+" WHERE emailDate BETWEEN '"+date90+"' and '"+attDate+"'";
List<AttachmentBean> attBeanList = attfdf.commonLst(sql);
if(attBeanList!=null){
for (int i = 0; i < attBeanList.size(); i++) {
AttachmentBean aa = attBeanList.get(i);
System.out.println(aa.getEmailDate()+" "+aa.getFile_download_url());
}
}
}
});
/**
* 获取半年内的附件
*/
beans.foreach(new VoidFunction<EmailBean>() {
private static final long serialVersionUID = 1L;
@Override
public void call(EmailBean arg0) throws Exception {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String attDate = arg0.getDate();
Calendar c = Calendar.getInstance();
c.setTime(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(arg0.getDate()));
c.add(Calendar.DATE, - 180); //过去半年
Date d = c.getTime();
String date180 = format.format(d);
AttFromDateFilter attfdf = new AttFromDateFilter();
String tableName = "attachment_2"; //定义mysql中附件表
String sql = "SELECT attname,emailDate,file_download_url FROM "+ tableName+" WHERE emailDate BETWEEN '"+date180+"' and '"+attDate+"'";
List<AttachmentBean> attBeanList = attfdf.commonLst(sql);
if(attBeanList!=null){
for (int i = 0; i < attBeanList.size(); i++) {
AttachmentBean aa = attBeanList.get(i);
System.out.println(aa.getEmailDate()+" "+aa.getFile_download_url());
}
}
}
});
/**
* 获取一年内的附件
*/
beans.foreach(new VoidFunction<EmailBean>() {
private static final long serialVersionUID = 1L;
@Override
public void call(EmailBean arg0) throws Exception {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String attDate = arg0.getDate();
Calendar c = Calendar.getInstance();
c.setTime(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(arg0.getDate()));
c.add(Calendar.DATE, - 360); //过去一年
Date d = c.getTime();
String date360 = format.format(d);
AttFromDateFilter attfdf = new AttFromDateFilter();
String tableName = "attachment_2"; //定义mysql中附件表
String sql = "SELECT attname,emailDate,file_download_url FROM "+ tableName+" WHERE emailDate BETWEEN '"+date360+"' and '"+attDate+"'";
List<AttachmentBean> attBeanList = attfdf.commonLst(sql);
if(attBeanList!=null){
for (int i = 0; i < attBeanList.size(); i++) {
AttachmentBean aa = attBeanList.get(i);
System.out.println(aa.getEmailDate()+" "+aa.getFile_download_url());
}
}
}
});
}
/**
* 函数的目的/功能 共通的SQL查询方法
* @param sql
* @return
* @throws SQLException
*/
public List<AttachmentBean> commonLst(String sql) throws SQLException{
JDBCHelper jdbcHelper = JDBCHelper.getInstance();
ResultSet rs = jdbcHelper.executeQuery(sql, null);
List<AttachmentBean> paramsList = new ArrayList<AttachmentBean>();
AttachmentBean attBean = null;
while(rs.next()){
attBean = new AttachmentBean();
attBean.setAttname(rs.getString("attName"));
attBean.setFile_download_url(rs.getString("file_download_url"));
attBean.setEmailDate(rs.getString("emailDate"));
paramsList.add(attBean);
}
return paramsList;
}
public static void main(String[] args) {
SparkConf conf = new SparkConf()
.setMaster("local[2]")
.setAppName("AttFromDateFilter")
.set("spark.local.dir","D://sparkDir")
.set("spark.shuffle.consolidateFiles", "true");
@SuppressWarnings("resource")
JavaSparkContext sc = new JavaSparkContext(conf);
final String caseID ="14";
final String caseName ="缉私";
final String evID = "162";
//将读取本地邮件(后期生产环境将这个路径改成实际的邮件路径)
JavaPairRDD<String, PortableDataStream> lines = sc.binaryFiles("file:///D:/emaildata/6/6c7cdaf9e4f/*");
/**
* 程序开始时,存入applicationId
*/
String applicationId = sc.sc().applicationId();
List<Object[]> appInfo = new ArrayList<>();
try { //将程序开始的spark应用信息存储到jobinfo表中
appInfo.add(new Object[]{evID,caseID,applicationId});
EmailInfoDao.insertJobInfo(appInfo,"jobinfo");
} catch (Exception e1) {
e1.printStackTrace();
}
/**
* 读取邮件内容并解析邮件
*/
JavaRDD<EmailBean> beansFirst = lines.mapPartitions(new FlatMapFunction<Iterator<Tuple2<String,PortableDataStream>>, EmailBean>() {
private static final long serialVersionUID = 1L;
@Override
public Iterable<EmailBean> call(
Iterator<Tuple2<String, PortableDataStream>> arg0)
throws Exception {
List<EmailBean> list = new LinkedList<EmailBean>();
RXParseEml parse = new RXParseEml();
FileOutputStream out = null;
Tuple2<String,PortableDataStream> tuple = null;
EmailBean emaiBean = null;
while(arg0.hasNext()){
tuple = arg0.next();
byte[] bytes = tuple._2.toArray();
File a = File.createTempFile("temp", ".eml"); //C:\Users\XL\AppData\Local\Temp\temp3828338232500747104.eml; 将目标邮件通过IO流写入临时路径下;
out = new FileOutputStream(a);
out.write(bytes);
out.close();
//解析邮件
emaiBean = parse.parseBean(a, tuple._1,caseID,caseName,evID);
if(emaiBean!=null&&
emaiBean.getFromWho()!=null&&
!emaiBean.getFromWho().equals("")
&&emaiBean.getToWho()!=null&&
!emaiBean.getToWho().equals("")){
list.add(emaiBean);
}else{
continue;
}
a.delete();
}
return list;
}
});
/**
* 持久化
*/
JavaRDD<EmailBean> beans = beansFirst.persist(StorageLevel.MEMORY_AND_DISK());
/**
* 获取一月内的附件
*/
beans.foreach(new VoidFunction<EmailBean>() {
private static final long serialVersionUID = 1L;
@Override
public void call(EmailBean arg0) throws Exception {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String attDate = arg0.getDate();
Calendar c = Calendar.getInstance();
c.setTime(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(arg0.getDate()));
c.add(Calendar.DATE, - 30); //过去30天
Date d = c.getTime();
String date30 = format.format(d);
AttFromDateFilter attfdf = new AttFromDateFilter();
String tableName = "attachment_2"; //定义mysql中附件表
String sql = "SELECT attname,emailDate,file_download_url FROM "+ tableName+" WHERE emailDate BETWEEN '"+date30+"' and '"+attDate+"'";
List<AttachmentBean> attBeanList = attfdf.commonLst(sql);
if(attBeanList!=null){
for (int i = 0; i < attBeanList.size(); i++) {
AttachmentBean aa = attBeanList.get(i);
System.out.println(aa.getEmailDate()+" "+aa.getFile_download_url());
}
}
}
});
/**
* 获取三月内的附件
*/
beans.foreach(new VoidFunction<EmailBean>() {
private static final long serialVersionUID = 1L;
@Override
public void call(EmailBean arg0) throws Exception {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String attDate = arg0.getDate();
Calendar c = Calendar.getInstance();
c.setTime(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(arg0.getDate()));
c.add(Calendar.DATE, - 90); //过去90天
Date d = c.getTime();
String date90 = format.format(d);
AttFromDateFilter attfdf = new AttFromDateFilter();
String tableName = "attachment_2"; //定义mysql中附件表
String sql = "SELECT attname,emailDate,file_download_url FROM "+ tableName+" WHERE emailDate BETWEEN '"+date90+"' and '"+attDate+"'";
List<AttachmentBean> attBeanList = attfdf.commonLst(sql);
if(attBeanList!=null){
for (int i = 0; i < attBeanList.size(); i++) {
AttachmentBean aa = attBeanList.get(i);
System.out.println(aa.getEmailDate()+" "+aa.getFile_download_url());
}
}
}
});
/**
* 获取半年内的附件
*/
beans.foreach(new VoidFunction<EmailBean>() {
private static final long serialVersionUID = 1L;
@Override
public void call(EmailBean arg0) throws Exception {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String attDate = arg0.getDate();
Calendar c = Calendar.getInstance();
c.setTime(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(arg0.getDate()));
c.add(Calendar.DATE, - 180); //过去半年
Date d = c.getTime();
String date180 = format.format(d);
AttFromDateFilter attfdf = new AttFromDateFilter();
String tableName = "attachment_2"; //定义mysql中附件表
String sql = "SELECT attname,emailDate,file_download_url FROM "+ tableName+" WHERE emailDate BETWEEN '"+date180+"' and '"+attDate+"'";
List<AttachmentBean> attBeanList = attfdf.commonLst(sql);
if(attBeanList!=null){
for (int i = 0; i < attBeanList.size(); i++) {
AttachmentBean aa = attBeanList.get(i);
System.out.println(aa.getEmailDate()+" "+aa.getFile_download_url());
}
}
}
});
/**
* 获取一年内的附件
*/
beans.foreach(new VoidFunction<EmailBean>() {
private static final long serialVersionUID = 1L;
@Override
public void call(EmailBean arg0) throws Exception {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String attDate = arg0.getDate();
Calendar c = Calendar.getInstance();
c.setTime(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(arg0.getDate()));
c.add(Calendar.DATE, - 360); //过去一年
Date d = c.getTime();
String date360 = format.format(d);
AttFromDateFilter attfdf = new AttFromDateFilter();
String tableName = "attachment_2"; //定义mysql中附件表
String sql = "SELECT attname,emailDate,file_download_url FROM "+ tableName+" WHERE emailDate BETWEEN '"+date360+"' and '"+attDate+"'";
List<AttachmentBean> attBeanList = attfdf.commonLst(sql);
if(attBeanList!=null){
for (int i = 0; i < attBeanList.size(); i++) {
AttachmentBean aa = attBeanList.get(i);
System.out.println(aa.getEmailDate()+" "+aa.getFile_download_url());
}
}
}
});
}