Hibernate 插入海量数据时的性能与 Jdbc 的比较

最新推荐文章于 2025-05-23 21:29:07 发布

yjailj

最新推荐文章于 2025-05-23 21:29:07 发布

阅读量335

点赞数

文章标签： JDBC Hibernate log4j DAO SQL

[size=medium]系统环境:[/size]

[img]http://dl.iteye.com/upload/attachment/220711/9bef669a-3c62-33fd-badc-95465bdc8e0e.jpg[/img]

[size=medium]MySQL 数据库环境: [/size]

mysql> select version();
--------------
select version()
--------------

+----------------------+
| version()            |
+----------------------+
| 5.1.41-community-log |
+----------------------+
1 row in set (0.00 sec)

[size=medium]eclipse 运行时参数: [/size]

--launcher.XXMaxPermSize
256M
-showsplash
org.eclipse.platform
--launcher.XXMaxPermSize
256m
-vm
C:/Program Files/Java/jdk1.6.0_18/bin/javaw
-vmargs
-Dosgi.requiredJavaVersion=1.5
-Xms40m
-Xmx512m

[size=medium]使用 Spring 2.5、Hibernate3.3

Entity: [/size]

package com.model;

import java.io.Serializable;
import java.sql.Timestamp;

import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
import javax.persistence.Table;

/**
 * @author <a href="liu.anxin13@gmail.com">Tony</a>
 */
@Entity
@Table(name = "T_USERINFO")
@org.hibernate.annotations.Entity(selectBeforeUpdate = true, dynamicInsert = true, dynamicUpdate = true)
public class UserInfo implements Serializable {

	private static final long serialVersionUID = -4855456169220894250L;

	@Id
	@Column(name = "ID", length = 32)
	private String id = java.util.UUID.randomUUID().toString().replaceAll("-", "");

	@Column(name = "CREATE_TIME", updatable = false)
	private Timestamp createTime = new Timestamp(System.currentTimeMillis());

	@Column(name = "UPDATE_TIME", insertable = false)
	private Timestamp updateTime = new Timestamp(System.currentTimeMillis());
	// setter/getter...
}

[size=medium]applicationContext.xml: [/size]

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xmlns="http://www.springframework.org/schema/beans"
	xmlns:context="http://www.springframework.org/schema/context"
	xmlns:tx="http://www.springframework.org/schema/tx"
	xmlns:aop="http://www.springframework.org/schema/aop"
	xsi:schemaLocation="http://www.springframework.org/schema/beans 
		http://www.springframework.org/schema/beans/spring-beans-2.5.xsd 
		http://www.springframework.org/schema/context 
		http://www.springframework.org/schema/context/spring-context-2.5.xsd 
		http://www.springframework.org/schema/tx 
		http://www.springframework.org/schema/tx/spring-tx-2.5.xsd
		http://www.springframework.org/schema/aop
		http://www.springframework.org/schema/aop/spring-aop-2.5.xsd">

	<context:component-scan base-package="com.dao,com.service" />

	<context:property-placeholder location="classpath:jdbc.properties" />

	<bean id="dataSource" class="com.mchange.v2.c3p0.ComboPooledDataSource"
		destroy-method="close">
		<property name="driverClass" value="${jdbc.driver}" />
		<property name="jdbcUrl" value="${jdbc.url}" />
		<property name="user" value="${jdbc.username}" />
		<property name="password" value="${jdbc.password}" />

		<property name="maxPoolSize" value="50" />
		<property name="minPoolSize" value="2" />
		<property name="initialPoolSize" value="5" />
		<property name="acquireIncrement" value="5" />
		<property name="maxIdleTime" value="1800" />
		<property name="idleConnectionTestPeriod" value="1800" />
		<property name="maxStatements" value="1000"/>
		<property name="breakAfterAcquireFailure" value="true" />
		<property name="testConnectionOnCheckin" value="true" />
		<property name="testConnectionOnCheckout" value="false" />
	</bean>

	<bean id="sessionFactory" class="org.springframework.orm.hibernate3.annotation.AnnotationSessionFactoryBean">
		<property name="dataSource" ref="dataSource"/>
		<property name="configLocation" value="classpath:hibernate.cfg.xml" />
	</bean>

	<bean id="hibernateTemplate" class="org.springframework.orm.hibernate3.HibernateTemplate">
		<property name="sessionFactory" ref="sessionFactory" />
	</bean>

	<bean id="transactionManager" class="org.springframework.orm.hibernate3.HibernateTransactionManager">
		<property name="sessionFactory" ref="sessionFactory" />
	</bean>

	<tx:advice id="txAdvice" transaction-manager="transactionManager">
		<tx:attributes>
			<tx:method name="*" isolation="READ_COMMITTED" rollback-for="Throwable" />
		</tx:attributes>
	</tx:advice>

	<aop:config>
		<aop:pointcut id="services" expression="execution(* com.service.*.*.*.*(..))" />
		<aop:advisor advice-ref="txAdvice" pointcut-ref="services" />
	</aop:config>

</beans>

[size=medium]hibernate.cfg.xml: [/size]

<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE hibernate-configuration PUBLIC
          "-//Hibernate/Hibernate Configuration DTD 3.0//EN"
          "http://hibernate.sourceforge.net/hibernate-configuration-3.0.dtd">

<hibernate-configuration>

	<session-factory>

		<property name="hibernate.dialect">
			org.hibernate.dialect.MySQLDialect
		</property>

		<!--
			默认 15, 这是我个人不能完全理解的参数, 不明白这个参数的实际意义
		-->
		<!-- <property name="hibernate.jdbc.batch_size">50</property> -->

		<!-- 排序插入和更新, 避免出现 锁si -->
		<property name="hibernate.order_inserts">true</property>
		<property name="hibernate.order_updates">true</property>

		<property name="hibernate.hbm2ddl.auto">update</property>

		<property name="hibernate.show_sql">false</property>
		<property name="hibernate.format_sql">false</property>

		<property name="hibernate.current_session_context_class">
			org.hibernate.context.JTASessionContext
		</property>

		<mapping class="com.model.UserInfo" />

	</session-factory>

</hibernate-configuration>

[size=medium]log4j.properties:[/size]

log4j.rootLogger=INFO, CONS
log4j.appender.CONS=org.apache.log4j.ConsoleAppender
log4j.appender.CONS.layout=org.apache.log4j.PatternLayout
log4j.appender.CONS.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss sss} [%p] %r %l [%t] %x - %m%n

# log4j.logger.org.springframework=WARN
# log4j.logger.org.hibernate=WARN
# log4j.logger.com.mchange=WARN

[size=medium]HibernateDAO.java:[/size]

package com.dao;

import java.io.Serializable;

import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.orm.hibernate3.HibernateTemplate;

@Repository
public class HibernateDAO {

	private static final Logger log = Logger.getLogger(HibernateDAO.class);

	@Autowired
	private HibernateTemplate template;

	public Serializable save(Object entity) {
		try {
			return template.save(entity);
		} catch (Exception e) {
			log.info("save exception : " + e.getMessage());
			// 异常的目的只是为了记录日志
			throw new RuntimeException(e);
		}
	}

}

[size=medium]CommonServiceImpl.java: [/size]

package com.service;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import com.dao.HibernateDAO;
import com.model.UserInfo;

@Service("commonService")
public class CommonService {

	private static final Logger log = Logger.getLogger(CommonService.class);

	@Autowired
	private HibernateDAO dao;

	public void testOcean(long num) {
		for (int i = 0; i < num; i++) {
			// 在循环中 new 大量对象. 此为 outOfMemory 的根源
			// 将 user 申明在 循环外面, 循环体内部才将其指向具体的对象, 这样可以提高一点效率
			UserInfo user = new UserInfo();
			dao.save(user);

			user = null;
			// 实际意义不大, 就算手动运行, gc 也是异步的, 没有办法控制
			if (i % 100000 == 0)
				System.gc();
			/*
			if (i % 30 == 0) {
				dao.flush();
				dao.clear();
			}
			*/
		}
	}

}

[size=medium]Test.java: [/size]

package com;

import org.apache.log4j.Logger;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;

import com.service.CommonService;

@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations = { "/applicationContext.xml" })
public class TestHibernate {

	@Autowired
	@Qualifier("commonService")
	private CommonService service;

	private static final Logger log = Logger.getLogger(TestHibernate.class);

	@Test
	public void testHibernate() {
		// 海量数据的条数. 分别以 1W, 10W, 50W, 100W 为数值
		// 虽然都算不上海量, 但做为测试也应该算够了吧
		long num = 10000;

		log.info("开始");
		long begin = System.currentTimeMillis();
		service.testOcean(num);

		long end = System.currentTimeMillis();
		log.info("耗费时间: " + num + " 条. " + (end - begin) / 1000.00000000 + " 秒");
		System.out.println("赶紧查看内存");
		try {
			Thread.sleep(10000);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

}

[size=medium]结果:

SQL 生成语句如下: [/size]

Hibernate: 
    insert 
    into
        T_USERINFO
        (CREATE_TIME,  ID) 
    values
        (?, ?)

[size=medium]不使用批量

1W: 耗费时间: 3.65 秒内存占用见下图[/size]
[img]http://dl.iteye.com/upload/attachment/220732/c571641a-62f5-31e1-90b1-6ac30c4f1a07.jpg[/img]

[size=medium]10W: 耗费时间: 33.844 秒[/size]
[img]http://dl.iteye.com/upload/attachment/220735/751ee474-bbc3-3710-abac-391a29ce058c.jpg[/img]

[size=medium]50W : outOfMemory

30条为一个周期, 也就是将 if (i % 30 == 0) 这段注释解开

1W: 耗费时间: 3.797 秒. 查看内存, 这个时间就明显的感觉到运行时的压力已经有一些转换到数据库上面来了(运行时看得更清楚一些)[/size]
[img]http://dl.iteye.com/upload/attachment/220738/374eca1a-7904-340e-a6a0-84a3a3957135.jpg[/img]

[size=medium]MySQL 的进程没能抓到图, 而且其是在运行时耗的资源. 也并不是很大, 比上图要小

10W: 耗费时间: 34.922 秒[/size]
[img]http://dl.iteye.com/upload/attachment/220741/9baef6cb-da78-3f38-9fd9-4103042827e7.jpg[/img]

[size=medium]50W: 耗费时间: 182.531 秒[/size]
[img]http://dl.iteye.com/upload/attachment/220743/63a0015b-2735-3f4f-9608-af2ed0cdf5e5.jpg[/img]

[size=medium]100W outOfMemory

50条为一个周期, 即将上面的 30 改为 50

1W: 耗费时间: 3.625 秒[/size]
[img]http://dl.iteye.com/upload/attachment/220749/e464e5c7-279d-3fdf-843e-fa6ac1f6d702.jpg[/img]

[size=medium]10W: 耗费时间: 32.031 秒[/size]
[img]http://dl.iteye.com/upload/attachment/220751/7d567d59-1d6e-3487-a191-e50a30358eeb.jpg[/img]

[size=medium]50W: 耗费时间: 161.391 秒[/size]
[img]http://dl.iteye.com/upload/attachment/220754/7c3b3ee7-b5a3-3b67-842c-b76dee8d3dc7.jpg[/img]

[size=medium]100W 同样 outOfMemory .

一路看下来, 我并不觉得 Hibernate 有多慢. 4秒以内处理 1W 条数据 . 至于内存溢出, 还是因为在循环里面 new 了太多的对象, 尽管处理完我就将其引用指向 null 并显式调用 gc , 但 JVM 内部运行 gc 是异步的...

当然性能上使用 50批量的策略会好很多, 可以将压力让服务器和数据库同时去承担 .

保存下来再去测试 jdbc 批量看看...

使用 jdbc 测试:

在 applicationContext.xml 中添加: [/size]

<bean id="jdbcTemplate" class="org.springframework.jdbc.core.JdbcTemplate">
	<property name="dataSource" ref="dataSource" />
</bean>

[size=medium]JdbcDAO.java:[/size]

package com.dao;

import java.sql.Connection;

import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Repository;

@Repository
public class JdbcDAO {

	private static final Logger log = Logger.getLogger(JdbcDAO.class);

	@Autowired
	private JdbcTemplate template;

	public Connection getConn() {
		try {
			return template.getDataSource().getConnection();
		} catch (Exception e) {
			log.info("获取连接时异常: " + e.getMessage());
			throw new RuntimeException(e);
		}
	}

}

[size=medium]JdbcService.java: [/size]

package com.service;

import java.sql.Connection;
import java.sql.PreparedStatement;

import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import com.dao.JdbcDAO;
import com.model.UserInfo;

@Service("jdbcService")
public class JdbcService {

	private static final Logger log = Logger.getLogger(JdbcService.class);

	@Autowired
	private JdbcDAO dao;

	public void testOcean(long num) {
		Connection conn = dao.getConn();
		try {
			conn.setAutoCommit(false);
			String sql = "insert into T_USERINFO(CREATE_TIME, ID) values(?, ?)";
			PreparedStatement pstm = conn.prepareStatement(sql);
			for (int i = 0; i < num; i++) {
				// 要保证公平, 也在循环中 new 对象
				UserInfo user = new UserInfo();
				pstm.setTimestamp(1, user.getCreateTime());
				pstm.setString(2, user.getId());
				pstm.execute();

				user = null;
				if (i % 10000 == 0)
					System.gc();
				// 批处理
				/*
				if (i % 30 == 0) {
					pstm.executeBatch();
					conn.commit();
					pstm.clearBatch();
				}
				*/

			}
			// 将循环里面的批处理解开后, 就要将此处的commit注释, 并将下面注释的语句解开
			conn.commit();
			// pstm.executeBatch();
		} catch (Exception e) {
			log.info("异常: " + e.getMessage());
		} finally {
			try {
				conn.close();
			} catch (Exception e) {
				conn = null;
			}
		}
	}

}

[size=medium]TestJdbc.java: (这里很郁闷, 我用的 Junit 4.6 版本, 在上面的测试中正常. 在这里异常, 换成 4.4 就好了) [/size]

package com;

import org.apache.log4j.Logger;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;

import com.service.JdbcService;

@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations = { "/applicationContext.xml" })
public class TestJdbc {

	private static final Logger log = Logger.getLogger(TestJdbc.class);

	@Autowired
	@Qualifier("jdbcService")
	private JdbcService service;

	@Test
	public void testJdbc() {
		long num = 500000;

		log.info("开始");
		long begin = System.currentTimeMillis();
		service.testOcean(num);
		long end = System.currentTimeMillis();
		log.info("耗费时间: " + num + " 条. " + (end - begin) / 1000.00000000 + " 秒");

		System.out.println("赶紧查看内存");
		try {
			Thread.sleep(10000);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

}

[size=medium]结果如下:

不使用批量:

1W: 耗费时间: 3.359 秒(这个效率确实比 Hibernate 要高)[/size]
[img]http://dl.iteye.com/upload/attachment/220890/f03f53bd-942d-3fda-a9f3-1b657ac34caf.jpg[/img]

[size=medium]10W: 耗费时间: 212.812 秒 (我不确定是不是我 jdbc的代码写得不够好, 反正这个效率让我很是郁闷)[/size]
[img]http://dl.iteye.com/upload/attachment/220896/3ef3cbed-c363-3a62-92cc-79db6fecd2f1.jpg[/img]

[size=medium]50W 的时候我已经没有耐心再等下去了. 11 分钟过去了还没看到结果

使用30条一次批量

说实话, 这个时候的性能已经让我到了一种发狂的地步.
接近 [color=red]3 秒运行 100 条[/color] . 1W 条数据花了 288.266 秒的时间
是的, 你没有看错, 就是 3 秒运行 100 条, 到后面的 50 我也就没有再继续测下去.

也可能是我 jdbc 代码写得不对吧. 毕竟 hibernate 底层也是用的 jdbc, 其再快也是不可能快过 Native SQL, 我这个贴的目的也只是想了解下差距的多少, 只是这个效率...

如果哪位有过 jdbc 方面的操作, 给个提醒...
谢谢. [/size]