html表格判断,java判断html表格是不是标准表格

[Java] 纯文本查看 复制代码package test.isEquals;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.IOException;

import java.io.InputStreamReader;

import java.io.UnsupportedEncodingException;

import org.apache.commons.lang.StringUtils;

import org.jsoup.Jsoup;

import org.jsoup.nodes.Document;

import org.jsoup.nodes.Element;

import org.jsoup.select.Elements;

public class ReadHtml {

public static void main(String[] args) {

System.out.println("start!");

traverseFolder2("C:/Users/cylyc/Desktop/test/newhtm");

System.out.println("end!");

}

// 获得需要的文件类型

public static void traverseFolder2(String path) {

File file = new File(path);

if (file.exists()) {

File[] files = file.listFiles();

if (null == files || files.length == 0) {

return;

} else {

for (File file2 : files) {

if (file2.isDirectory()) {

traverseFolder2(file2.getAbsolutePath());

} else {

String absolutePath = file2.getAbsolutePath();

if (absolutePath.endsWith(".html")

|| absolutePath.endsWith(".htm")) {

//System.out.println("当前检测文件名为:"+absolutePath);

String txt2String = txt2String(absolutePath);

if (!isContainsTable(txt2String)) {

System.out.println("该文件名为:"+absolutePath);

}

}

}

}

}

} else {

System.out.println("文件路径不存在!");

}

}

// 读取文件

private static String txt2String(String fileName) {

BufferedReader br = null;

StringBuffer sb = null;

try {

br = new BufferedReader(new InputStreamReader(new FileInputStream(

fileName), "UTF8"));

} catch (UnsupportedEncodingException e) {

e.printStackTrace();

System.out.println("文件编码错误:" + fileName);

} catch (FileNotFoundException e) {

e.printStackTrace();

System.out.println("文件找不到:" + fileName);

}

sb = new StringBuffer();

String line = null;

try {

while ((line = br.readLine()) != null) {

sb.append(line);

}

} catch (IOException e) {

e.printStackTrace();

System.out.println("IO异常" + fileName);

}

return new String(sb);

}

public static boolean isContainsTable(String content) {

boolean returnFlag = true;

Document doc = Jsoup.parse(content);

Elements tables = doc.getElementsByTag("table");

if (tables != null && tables.size() > 0) {

//表格编号

int tableNum = 0;

for (Element table : tables) {

//确定数组的大小

//获得i的最大值

int trSize = table.getElementsByTag("tr").size();

//获得j的最大值

int tdSize = table.getElementsByTag("td").size();

boolean[][] colBytes = new boolean[trSize][tdSize];

tableNum++;

int maxColLength = 0;

int i = 0;

Elements trs = table.getElementsByTag("tr");

for (Element tr : trs) {

Elements tds = tr.getElementsByTag("td");

for (Element td : tds) {

int j = 0;

String col = td.attr("colspan");

col = StringUtils.isBlank(col) ? "1" : col;

String row = td.attr("rowspan");

row = StringUtils.isBlank(row) ? "1" : row;

for (int tempJ = 0; tempJ < Integer.valueOf(col); tempJ++, j++) {

while (true) {

if (colBytes[i][j]) {

j++;

}else {

for (int tempI = 0; tempI < Integer.valueOf(row); tempI++) {

colBytes[i + tempI][j] = true;

}

break;

}

}

}

// 记录行的最大值

maxColLength = j > maxColLength ? j : maxColLength;

}

i++;

}

for (int q = 0; q < i; q++) {

if (!colBytes[q][maxColLength-1]) {

System.out.println("第"+tableNum+"个表格不是标准表格");

returnFlag = false;

break;

}

}

}

}

return returnFlag;

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值