一些处理的小方法,Java代码
1、去html文本的标签,换行等特殊字符,转为纯文本格式
String claimspath_result = claimspath.replaceAll("\\&[a-zA-Z]{1,10};", "") //去除类似< > 的字串
.replaceAll("<[a-zA-Z]+[1-9]?[^><]*>", "") //去除开始标签及没有结束标签的标签
.replaceAll("</[a-zA-Z]+[1-9]?>", ""); //去除结束标签
2、计算某一日期与当前日期的差额
LocalDateTime now = LocalDateTime.now();
int year = now.getYear();
int month = now.getMonthValue();
int day = now.getDayOfMonth();
String appDate = resultKey.getString("appDate");
String[] dateList = appDate.split("\\.");
int year1 = Integer.parseInt(dateList[0]);
int month1 = Integer.parseInt(dateList[1]);
int day1 = Integer.parseInt(dateList[2]);
LocalDate startDate = LocalDate.of(year1, month1, day1);
LocalDate endDate = LocalDate.of(year, month, day);
double days = startDate.until(endDate, ChronoUnit.DAYS);
3、计算文章相似度(海明距离)
借鉴博客 海明距离https://blog.youkuaiyun.com/sinat_37239798/article/details/122893346
4、Java获取li标签内容(正则表达式+Jsoup)
(1)正则表达式可处理–简单–的标签主要用到Pattern 和Matcher 方法,获取li标签内数据,但不推荐,建议都改成Jsoup
String text = "获取的样式";
String regex = "<li>(.*?)</li>"; //正则表达式
List<String> liListNews = getContentByRegex(text , regex, 1); // 获取到的内容
public static List<String> getContentByRegex(String html, String regex, int index) {
List<String> list = new ArrayList<>(); // 创建一个空列表
Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
Matcher match = pattern.matcher(html);
while (match.find()) {
list.add(match.group(index));
}
return list;
}
(2)–复杂–样式的用Jsoup,获取a标签及span标签
String title = "";
String href = "";
String date = "";
String domNodeObj = "获取的样式";
Document doc = Jsoup.parse(domNodeObj);
Elements links_href = doc.select("a[href]");
Elements links_a = doc.select("a"); // 选择所有的<a>标签
Elements links_span = doc.select("span"); // 选择所有的<span>标签
for (Element link_href : links_href) {
href = link_href.attr("abs:href");
}
for (Element link_a : links_a) {
// 获取<a>标签内的文本内容
title = link_a.text();
}
for (Element link_span : links_span) {
// 获取<span>标签内的文本内容
date = link_span.text();
}
//获取完后断点查看title内容,可能用到下面代码
//title = title.substring(0,title.length()-date.length()-1);
5、打包下载
downLoadZip(){
window.location.href=httpurl+'expertsdatabase2/downLoadZipFile?PID='+this.EXPERTSDATABASE_ID;
},
@RequestMapping(value="/downLoadZipFile")
public void downLoadZipFile(HttpServletResponse response) throws Exception {
PageData pd = this.getPageData();
//获取相关文件信息
pd.put("DEL","0");
pd.put("F_TYPE","2");
List<PageData> files = is_filesService.listAll(pd);
if (files != null && files.size() > 0) {
response.setContentType("application/zip");
response.setHeader("Content-Disposition", "attachment; filename=" + this.get32UUID() + ".zip");
InputStream inputStream = null;
OutputStream outputStream = response.getOutputStream();
ZipOutputStream zos = new ZipOutputStream(outputStream);
List<String> filenams = new ArrayList<>();
for (int i = 0; i < files.size(); i++) {
//将相关文件打成压缩包下载
String f_path = files.get(i).getString("FILE_PATH");
int i1 = f_path.indexOf("/");
// String objectName = files.get(i).getString("URL");
String objectName = f_path.substring(i1+1);
inputStream = MinioUtil.getObject(minioDefaultBucket, objectName);
String pre_name = files.get(i).getString("PRE_NAME");
String suf_name = files.get(i).getString("SUF_NAME");
String or_name = pre_name + "." +suf_name;
//压缩包中不能有重名的文件
for (int k = 0; k < filenams.size(); k++) {
if (or_name.equals(filenams.get(k))) {
or_name = or_name.replaceAll(pre_name, pre_name + "_d");
}
}
zos.putNextEntry(new ZipEntry(or_name));
filenams.add(or_name);
byte b[] = new byte[1024];
int j = 0;
while ((j = inputStream.read(b)) != -1) {
zos.write(b, 0, j);
}
inputStream.close();
}
zos.flush();
zos.close();
outputStream.flush();
outputStream.close();
}
}
6、打印功能
goPrint(){
//打印前将不想打印的部分隐藏
$('#files_div').hide();
$('#print_btn').hide();
window.print();
//再显示
$('#files_div').show();
$('#print_btn').show();
},
7、下载(视频)
downLoadFile(path,name){
let fielurl = minioUrlTcm+path;
// window.location.href = fielurl;
fetch(fielurl)
.then(res => res.blob())
.then(blob => {
const a = document.createElement("a");
const objectUrl = window.URL.createObjectURL(blob);
a.download = name;
a.href = objectUrl;
a.click();
window.URL.revokeObjectURL(objectUrl);
a.remove();
})
},
8、处理长篇详情文章中的各项数据,保证规范演示
formatRichText(html){
let newContent= html.replace(/<img[^>]*>/gi,function(match,capture){
match = match.replace(/style\s*?=\s*?([‘"])[\s\S]*?\1/ig, '').replace(/style\s*?=\s*?([‘"])[\s\S]*?\1/ig, '');
return match;
});
newContent = newContent.replace(/style="[^"]+"/gi,function(match,capture){
match = match.replace(/width:[^;]+;/gi, 'max-width:100%;').replace(/width:[^;]+;/gi, 'max-width:100%;');
return match;
});
newContent = newContent.replace(/\<strong/gi, '<strong style="padding-bottom:12px;display:block"');
newContent = newContent.replace(/\<p/gi, '<p style="padding-bottom:12px;"');
newContent = newContent.replace(/<br[^>]*\/>/gi, '');
newContent = newContent.replace(/\<img/gi, '<img style="max-width:100%;height:auto;display:block;margin:10px 0;"');
return newContent;
},