import os
import csv
import shutil
import pyodbc
import logging
from datetime import datetime
# 日志配置
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('csv_import.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def get_db_connection():
"""创建数据库连接"""
try:
conn = pyodbc.connect(
"DRIVER={ODBC Driver 17 for SQL Server};"
"SERVER=192.168.10.116;"
"DATABASE=现场DB;"
"UID=sa;"
"PWD=gtec_6600"
)
logger.info("数据库连接成功")
return conn
except pyodbc.Error as e:
logger.error(f"数据库连接失败: {str(e)}")
raise
def validate_data_row(row):
"""验证数据行有效性(自定义业务规则)"""
return (
len(row) >= 15 and
row[0].strip() != "" # 生产线
)
def safe_float_convert(value, default=0.0):
"""安全转换浮点数"""
try:
return float(value)
except (ValueError, TypeError):
return default
def process_csv(file_path):
"""处理CSV文件并提取数据"""
try:
processed_data = []
filename = os.path.basename(file_path)
# 自动检测文件编码
encodings = ['ANSI', 'utf-8', 'gbk', 'latin1']
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as f:
reader = csv.reader(f)
logger.info(f"使用 {encoding} 编码读取: {filename}")
for row in reader:
# 跳过空行和标题行(如果存在)
if not row or "生产线" in row[0]:
continue
# 提取需要的列(0-8)
if len(row) < 15:
logger.warning(f"列数不足: 需要15列, 实际{len(row)}列")
continue
filtered_row = [
row[0].strip(), # 生产线
row[1].strip(), # 部品
row[2].strip(), # 设备编号
row[3].strip(), # 工序号
row[4].strip(), # 工件号
row[5].strip(), # 机型
row[6].strip(), # 文件名
row[7].strip(), # 判定时间
row[8].strip(), # 加工内容
row[9].strip(), # 数据类型
row[10].strip(), # 判定内容
row[11].upper(), # 判定结果(统一大写)
safe_float_convert(row[12]), # 数值
safe_float_convert(row[13]), # 下限
safe_float_convert(row[14]) # 上限
]
if validate_data_row(filtered_row):
processed_data.append(filtered_row)
else:
logger.warning(f"无效数据: {row}")
logger.info(f"成功处理 {len(processed_data)}/{reader.line_num} 行数据")
return processed_data
except UnicodeDecodeError:
logger.warning(f"{encoding} 编码失败, 尝试下一种")
continue
raise ValueError("所有编码尝试失败,无法读取文件")
except Exception as e:
logger.error(f"处理CSV失败: {os.path.basename(file_path)} - {str(e)}")
raise
def save_to_db(data):
"""保存数据到数据库"""
conn = None
try:
conn = get_db_connection()
cursor = conn.cursor()
# 创建表(如果不存在)- 修正表名一致性问题
cursor.execute("""
IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='T_各部位判定结果' AND xtype='U')
CREATE TABLE T_各部位判定结果 (
id UNIQUEIDENTIFIER PRIMARY KEY DEFAULT NEWID(), -- 使用UUID作为主键
line VARCHAR(50) NOT NULL,
part VARCHAR(50) NOT NULL,
equipment VARCHAR(50) NOT NULL,
OP VARCHAR(20) NOT NULL,
part_no VARCHAR(20) NOT NULL,
engine_type VARCHAR(30) NOT NULL,
file_name VARCHAR(255), -- 添加文件名列
time_stamp VARCHAR(30) NOT NULL, -- 判定时间
content VARCHAR(255) NOT NULL,
content_type VARCHAR(50) NOT NULL,
content_jdg VARCHAR(50) NOT NULL,
result VARCHAR(10) NOT NULL,
data float NOT NULL,
low_limit FLOAT NOT NULL,
up_limit FLOAT NOT NULL,
create_time DATETIME DEFAULT GETDATE()
)
""")
conn.commit()
# 批量插入数据
insert_sql = """
INSERT INTO T_各部位判定结果 (line, part, equipment, OP, part_no, engine_type, file_name, time_stamp,\
content, content_type, content_jdg ,result ,data ,low_limit, up_limit) \
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ? ,? ,? ,? , ?, ?) \
"""
# 准备批量插入参数
params = [
(row[0], row[1], row[2], row[3], row[4],row[5], row[6], row[7], row[8],
row[9], row[10], row[11], row[12], row[13], row[14])
for row in data
]
# 分批次插入(每批500条)
batch_size = 500
for i in range(0, len(params), batch_size):
batch = params[i:i + batch_size]
cursor.executemany(insert_sql, batch)
conn.commit()
logger.info(f"插入批次 {i // batch_size + 1}: {len(batch)} 条")
logger.info(f"总计插入 {len(params)} 条记录")
except pyodbc.Error as e:
logger.error(f"数据库错误: {str(e)}")
if conn:
conn.rollback()
raise
finally:
if conn:
conn.close()
def process_files():
"""处理目录中的所有CSV文件"""
input_dir = r"D:\数字化项目\工控机\#1\缸体"
output_dir = r"D:\数字化项目\工控机\#1\缸体\output"
error_dir = r"D:\数字化项目\工控机\#1\缸体\err"
for dir_path in [output_dir, error_dir]:
os.makedirs(dir_path, exist_ok=True)
processed_count = 0
error_count = 0
for filename in os.listdir(input_dir):
if filename.lower().endswith('.csv'):
input_path = os.path.join(input_dir, filename)
logger.info(f"{'=' * 50}")
logger.info(f"开始处理: {filename}")
try:
data = process_csv(input_path)
if data:
save_to_db(data)
shutil.move(input_path, os.path.join(output_dir, filename))
processed_count += 1
logger.info(f"处理成功: {filename}")
else:
logger.warning(f"无有效数据: {filename}")
shutil.move(input_path, os.path.join(error_dir, "nodata_" + filename))
except Exception as e:
error_path = os.path.join(error_dir, filename)
shutil.move(input_path, error_path)
error_count += 1
logger.error(f"处理失败: {filename} - {str(e)}")
logger.info(f"处理完成: 成功{processed_count}个, 失败{error_count}个")
if __name__ == "__main__":
try:
process_files()
except Exception as e:
logger.critical(f"程序异常终止: {str(e)}")
raise
最新发布