最近,需要做个功能,从大文件【7百万+行】中导入数据到数据库。由于数据文件和DB不在同台服务器上,因此不太方便用BULK INSERT 这种超变态语句。怎么办呢,调用存储过程一条一条导 or 拼凑insert语句一次导入多条?前者肯定不现实的,后则经测试效果也极其不佳【每次导入2w条,大致需要45分钟+】。怎么办啊,my god?看看System.Data.SqlClient有没有什么超级变态的东东,呵呵,不小心,竟然又看到Bulk了,嘿嘿,只不过是SqlBulkCopy而已,太兴奋了,抓紧看msdn。很快就有了下面的实现方法:
try
{
string beg = DateTime.Now.ToLongTimeString();
string fileName = string.Format("Userinfo-{0}.txt", DateTime.Today.AddDays(DiffDay).ToString("yyyy-MM-dd")); //"Userinfo-2010-07-19.txt";
FileInfo file = new FileInfo(Server.MapPath("../Download/" + fileName));
if (!file.Exists)
{
//JScriptHelper.Alert(this.Page, string.Format("要导入的文件[{0}]不存在,请先ftp下载!", fileName));
return;
}
int nRowPer; //每次导入的最大行数控制内存
if (!int.TryParse(ConfigurationManager.AppSettings["ULROWPER"] as string, out nRowPer))
{
nRowPer = 4000000;
}
using (SqlConnection con = new SqlConnection(ConfigurationManager.ConnectionStrings["SQLCONN"].ToString()))
{
using (SqlBulkCopy bulkCtrl = new SqlBulkCopy(con))
{
DataTable dt = new DataTable();
dt.Columns.Add(new DataColumn("PTAccount", System.Type.GetType("System.String")));
dt.Columns.Add(new DataColumn("GameID", System.Type.GetType("System.String")));
dt.Columns.Add(new DataColumn("Area", System.Type.GetType("System.String")));
dt.Columns.Add(new DataColumn("Groups", System.Type.GetType("System.String")));
dt.Columns.Add(new DataColumn("GameLevel", System.Type.GetType("System.String")));
bulkCtrl.DestinationTableName = "Sp_GameLevel";
bulkCtrl.ColumnMappings.Add(0, "PTAccount");
bulkCtrl.ColumnMappings.Add(1, "GameID");
bulkCtrl.ColumnMappings.Add(2, "Area");
bulkCtrl.ColumnMappings.Add(3, "Groups");
bulkCtrl.ColumnMappings.Add(4, "GameLevel");
bulkCtrl.BulkCopyTimeout = int.MaxValue;
StreamReader read = new StreamReader(Server.MapPath("../Download/" + fileName), Encoding.Default);
UserLevel userCtrl = new UserLevel();
userCtrl.TruncateUserLevel();
con.Open();
int nRow = 0;
int nCnt = 0;
string line = read.ReadLine();
while (line != null)
{
nRow = dt.Rows.Count;
if (nRow > nRowPer)//规避文件太大程序占用内存太多
{
bulkCtrl.BatchSize = nRow;
bulkCtrl.WriteToServer(dt);
//bulkCtrl.Close();
dt.Rows.Clear();
nCnt += nRow;
}
string[] str = line.Split('/t');
if (str.Length == 5)
{
DataRow dr = dt.NewRow();
dr[0] = str[0];
dr[1] = str[1];
dr[2] = str[2];
dr[3] = str[3];
dr[4] = str[4];
dt.Rows.Add(dr);
}
line = read.ReadLine();
}
nRow = dt.Rows.Count;
if (nRow > 0)
{
nCnt += nRow;
bulkCtrl.BatchSize = nRow;
if (con.State != ConnectionState.Open) con.Open();
bulkCtrl.WriteToServer(dt);
}
bulkCtrl.Close();
con.Close();
//LogHelper.Info(string.Format("导入完毕 Beg:{0}End{1}[Row]{2}", beg, DateTime.Now.ToLongTimeString(), nCnt));
//JScriptHelper.Alert(this.Page, string.Format("共成功导入{0}条记录!", nCnt));
}
}
}
catch (Exception ex)
{
//LogHelper.Error(string.Format("UserLevelMain btnInsert_Click [StackTrace]{0}[Message]{1}", ex.StackTrace, ex.Message));
}
运行之后,日志是这样的:导入完毕 Beg:16:42:42 End16:46:04 [Row]5452484
不到4秒5百万+,比以前的30分钟+,快了多少倍啊!
keywor:大文件,大数据量,导入sql数据库