import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class ParserHTML {
public static void main(String[] args) {
String filePath = "d:\\xx.html";
parseHTML(filePath);
}
private static void parseHTML(String filePath) {
try {
Document document = getDocumentInstance(filePath);
Element root = document.getDocumentElement();
NodeList nodeList = root.getElementsByTagName("form").item(0)
.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node subNode = nodeList.item(i);
if (subNode.getNodeType() == Node.ELEMENT_NODE) {
Element eNode = (Element) subNode;
if (eNode.getTagName().equals("input")) {
String name = subNode.getAttributes().getNamedItem(
"name").getNodeValue().toString();
String value = subNode.getAttributes().getNamedItem(
"value").getNodeValue().toString();
System.out.println("name=" + name + " value=" + value);
}
}
}
} catch (Exception e) {
System.out.println("errMsg: " + e.getMessage());
}
}
private static Document getDocumentInstance(String filePath)
throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder db = factory.newDocumentBuilder();
return db.parse(new File(filePath));
}
}