今天由于项目需要,读取一个几百兆的xml文件,直接内存溢出了,主要原因是我一次性读取xml文件的内容造成的,对于大的xml文件还是要读取部分到内存,然后从内存删除,再读取,这里用了dom4j的saxreader
- package test;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.InputStream;
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.PreparedStatement;
- import java.sql.SQLException;
- import java.text.DateFormat;
- import java.text.SimpleDateFormat;
- import java.util.Date;
- import org.apache.commons.codec.binary.Base64;
- import org.dom4j.Element;
- import org.dom4j.ElementHandler;
- import org.dom4j.ElementPath;
- import org.dom4j.io.SAXReader;
- public class MySaxHandler implements ElementHandler {
- SAXReader reader;
- static String jdbcURL = "jdbc:sybase:Tds:172.25.130.1:4100/center?charset=cp936";
- static String jdbcDriver = "com.sybase.jdbc3.jdbc.SybDriver";
- static String userName = "xxx";
- static String password = "xxx";
- ThreeYearBlackEnt threeYearBlackEnt = new ThreeYearBlackEnt();
- public static Connection con = null;
- public static Connection getConnection() throws Exception {
- Class.forName(jdbcDriver);
- return DriverManager.getConnection(jdbcURL, userName, password);
- }
- public static java.sql.Date stringToDate(String str) {
- DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
- Date date = null;
- try {
- // Fri Feb 24 00:00:00 CST 2012
- date = format.parse(str);
- } catch (Exception e) {
- e.printStackTrace();
- }
- // 2012-02-24
- //date = java.sql.Date.valueOf(str);
- return java.sql.Date.valueOf(str);
- }
- public static void addDataBackEnt(Connection con,ThreeYearBlackEnt threeYearBlackEnt) throws SQLException{
- String sql = "insert into J_QYDXB_QG (QYMC, ZCH,REGORG, FDDBR,FDDBR_ZJHM,FDDBR_ZJMC,DXRQ,BZSM) values(?, ?, ?, ?, ?,?,?,?)";
- PreparedStatement pst = con.prepareStatement(sql);
- pst.setString(1, threeYearBlackEnt.ENTNAME);
- pst.setString(2, threeYearBlackEnt.REGNO);
- pst.setString(3, threeYearBlackEnt.REGORG);
- pst.setString(4, threeYearBlackEnt.NAME);
- pst.setString(5, threeYearBlackEnt.CERNO);
- pst.setString(6, threeYearBlackEnt.CERTYPE);
- pst.setDate(7, stringToDate(threeYearBlackEnt.REVDATE));
- pst.setString(8, threeYearBlackEnt.ILLEGACT);
- pst.execute();
- pst.close();
- }
- public MySaxHandler() {
- // test.xml文件跟类放在同一目录下
- try {
- con = getConnection();
- InputStream is = MySaxHandler.class.getResourceAsStream("threeYearBlackEnt_data.xml");
- //InputStream is = new FileInputStream("D:\\programfiles\\workspace\\Counter\\threeYearBlackEnt_data1.xml");
- reader = new SAXReader();
- reader.setDefaultHandler(this);
- reader.read(is);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- public void onEnd(ElementPath ep) {
- Element element = ep.getCurrent(); //获得当前节点
- if(element.getName().equals("ENTNAME")) {
- threeYearBlackEnt.ENTNAME = new String(Base64.decodeBase64(element.getText()));
- p("ENTNAME="+ threeYearBlackEnt.ENTNAME );
- }else if(element.getName().equals("REGNO")) {
- threeYearBlackEnt.REGNO = new String(Base64.decodeBase64(element.getText()));
- p("REGNO="+ threeYearBlackEnt.REGNO);
- }else if(element.getName().equals("REGORG")) {
- threeYearBlackEnt.REGORG = new String(Base64.decodeBase64(element.getText()));
- p("REGORG="+ threeYearBlackEnt.REGORG);
- }else if(element.equals("NAME")) {
- threeYearBlackEnt.NAME = new String(Base64.decodeBase64(element.getText()));
- p("NAME="+ threeYearBlackEnt.NAME);
- }else if(element.getClass().equals("CERNO")) {
- threeYearBlackEnt.CERNO = new String(Base64.decodeBase64(element.getText()));
- p("CERNO="+ threeYearBlackEnt.CERNO);
- }else if(element.getName().equals("CERTYPE")){
- threeYearBlackEnt.CERTYPE = new String(Base64.decodeBase64(element.getText()));
- p("CERTYPE="+ threeYearBlackEnt.CERTYPE);
- }else if(element.getName().equals("REVDATE")) {
- threeYearBlackEnt.REVDATE = new String(Base64.decodeBase64(element.getText()));
- p("REVDATE="+ threeYearBlackEnt.REVDATE);
- }else if(element.getName().equals("ILLEGACT")) {
- threeYearBlackEnt.ILLEGACT =new String(Base64.decodeBase64(element.getText()));
- p("ILLEGACT="+ threeYearBlackEnt.ILLEGACT);
- try {
- addDataBackEnt(con, threeYearBlackEnt);
- } catch (SQLException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- element.detach(); //记得从内存中移去
- }
- public void onStart(ElementPath ep) {
- }
- public static void main(String[] args){
- new MySaxHandler();
- }
- public static void p(Object o) {
- if(o != null) System.out.println(o.toString());
- }
- }
本文介绍了一种使用DOM4J的SAXReader解析大型XML文件的方法,通过逐步读取并处理XML数据避免了内存溢出的问题。同时展示了如何将解析的数据存储到数据库中。
587

被折叠的 条评论
为什么被折叠?



