BulkInsert 出现参数值String转换到DateTime失败

BulkInsert 出现参数值String转换到DateTime失败,采用EF框架,一直以为是Datetime字段的数值问题,
解决方案:对应实体的字段顺序需要跟数据库表的字段的顺序相同,才可以。

Traceback (most recent call last): File "D:\dev\EquipmentRegistryDataPlatform\venv\lib\site-packages\iso8601\iso8601.py", line 122, in parse_date m = ISO8601_REGEX.match(datestring) TypeError: expected string or bytes-like object During handling of the above exception, another exception occurred: Traceback (most recent call last): File "d:\dev\EquipmentRegistryDataPlatform\app\tasks\async_zt_flow_hcproblem.py", line 62, in <module> run_async(insert_zt_flow_hcproblem()) File "D:\dev\EquipmentRegistryDataPlatform\venv\lib\site-packages\tortoise\__init__.py", line 643, in run_async loop.run_until_complete(coro) File "C:\Program Files\Python310\lib\asyncio\base_events.py", line 649, in run_until_complete return future.result() File "d:\dev\EquipmentRegistryDataPlatform\app\tasks\async_zt_flow_hcproblem.py", line 57, in insert_zt_flow_hcproblem bulk_data.append(ZtFlowHcProblem(**data)) File "D:\dev\EquipmentRegistryDataPlatform\venv\lib\site-packages\tortoise\models.py", line 698, in __init__ for key in meta.fields.difference(self._set_kwargs(kwargs)): File "D:\dev\EquipmentRegistryDataPlatform\venv\lib\site-packages\tortoise\models.py", line 736, in _set_kwargs setattr(self, key, field_object.to_python_value(value)) File "D:\dev\EquipmentRegistryDataPlatform\venv\lib\site-packages\tortoise\fields\data.py", line 358, in to_python_value value = parse_datetime(value) File "D:\dev\EquipmentRegistryDataPlatform\venv\lib\site-packages\iso8601\iso8601.py", line 124, in parse_date raise ParseError(e) iso8601.iso8601.ParseError: expected string or bytes-like object
最新发布
08-06
private void button2_Click_2(object sender, EventArgs e) { // 禁用按钮防止重复点击 button1.Enabled = false; progressBar1.Value = 0; label1.Text = "准备中..."; // 创建并配置BackgroundWorker BackgroundWorker worker = new BackgroundWorker(); worker.WorkerReportsProgress = true; worker.WorkerSupportsCancellation = true; worker.DoWork += (s, args) => { string connectionString = "Server=10.50.32.244;Port=9080;Database=weiwantian_db;Uid=weiwantian;Pwd=weiwantian123...;CharSet=utf8mb4;"; using (MySqlConnection connection = new MySqlConnection(connectionString)) { try { connection.Open(); MySqlTransaction transaction = connection.BeginTransaction(); // 获取当前年月 DateTime currentDate = DateTime.Now; int year = currentDate.Year; int month = currentDate.Month; int daysInMonth = DateTime.DaysInMonth(year, month); // 查询在职人员数据并存入DataTable string selectQuery = @" SELECT `14ID`, ID_Card_Number, Name, Type, Department, Section, Workshop, Large_Group, Team, Section_Leader, Workshop_Leader, Large_Group_Leader, Team_Leader FROM zsg_personnel_list WHERE Resignation_Type = '在职'"; DataTable personnelTable = new DataTable(); using (MySqlCommand selectCommand = new MySqlCommand(selectQuery, connection, transaction)) { using (MySqlDataAdapter adapter = new MySqlDataAdapter(selectCommand)) { adapter.Fill(personnelTable); } } // 计算总任务量 int totalTasks = personnelTable.Rows.Count * daysInMonth; int completedTasks = 0; // 存在性检查SQL string existsQuery = @" SELECT COUNT(1) FROM ri_gongshi WHERE `14ID` = @ID AND data = @Data"; // 插入SQL string insertQuery = @" INSERT INTO ri_gongshi (`14ID`, ID_Card_Number, Name, Type, Department, Section, Workshop, Large_Group, Team, Section_Leader, Workshop_Leader, Large_Group_Leader, Team_Leader, data) VALUES (@ID, @IDCard, @Name, @Type, @Department, @Section, @Workshop, @LargeGroup, @Team, @SectionLeader, @WorkshopLeader, @LargeGroupLeader, @TeamLeader, @Data)"; using (MySqlCommand existsCommand = new MySqlCommand(existsQuery, connection, transaction)) using (MySqlCommand insertCommand = new MySqlCommand(insertQuery, connection, transaction)) { // 定义存在性检查参数 existsCommand.Parameters.Add("@ID", MySqlDbType.VarChar); existsCommand.Parameters.Add("@Data", MySqlDbType.DateTime); // 定义插入参数 insertCommand.Parameters.Add("@ID", MySqlDbType.VarChar); insertCommand.Parameters.Add("@IDCard", MySqlDbType.VarChar); insertCommand.Parameters.Add("@Name", MySqlDbType.VarChar); insertCommand.Parameters.Add("@Type", MySqlDbType.VarChar); insertCommand.Parameters.Add("@Department", MySqlDbType.VarChar); insertCommand.Parameters.Add("@Section", MySqlDbType.VarChar); insertCommand.Parameters.Add("@Workshop", MySqlDbType.VarChar); insertCommand.Parameters.Add("@LargeGroup", MySqlDbType.VarChar); insertCommand.Parameters.Add("@Team", MySqlDbType.VarChar); insertCommand.Parameters.Add("@SectionLeader", MySqlDbType.VarChar); insertCommand.Parameters.Add("@WorkshopLeader", MySqlDbType.VarChar); insertCommand.Parameters.Add("@LargeGroupLeader", MySqlDbType.VarChar); insertCommand.Parameters.Add("@TeamLeader", MySqlDbType.VarChar); insertCommand.Parameters.Add("@Data", MySqlDbType.DateTime); int insertCount = 0; int skipCount = 0; // 遍历DataTable中的每一行 for (int rowIndex = 0; rowIndex < personnelTable.Rows.Count; rowIndex++) { DataRow row = personnelTable.Rows[rowIndex]; for (int day = 1; day <= daysInMonth; day++) { // 检查是否请求取消 if (worker.CancellationPending) { args.Cancel = true; transaction.Rollback(); return; } completedTasks++; int progressPercentage = (int)((double)completedTasks / totalTasks * 100); // 报告进度 worker.ReportProgress(progressPercentage, new { CurrentRow = rowIndex + 1, TotalRows = personnelTable.Rows.Count, Day = day, DaysInMonth = daysInMonth, InsertCount = insertCount, SkipCount = skipCount }); DateTime recordDate = new DateTime(year, month, day); // 检查记录是否存在 existsCommand.Parameters["@ID"].Value = row["14ID"]; existsCommand.Parameters["@Data"].Value = recordDate; int count = Convert.ToInt32(existsCommand.ExecuteScalar()); if (count > 0) { skipCount++; continue; // 记录已存在,跳过插入 } // 设置插入参数值 insertCommand.Parameters["@ID"].Value = row["14ID"]; insertCommand.Parameters["@IDCard"].Value = row["ID_Card_Number"]; insertCommand.Parameters["@Name"].Value = row["Name"]; insertCommand.Parameters["@Type"].Value = row["Type"]; insertCommand.Parameters["@Department"].Value = row["Department"]; insertCommand.Parameters["@Section"].Value = row["Section"]; insertCommand.Parameters["@Workshop"].Value = row["Workshop"]; insertCommand.Parameters["@LargeGroup"].Value = row["Large_Group"]; insertCommand.Parameters["@Team"].Value = row["Team"]; insertCommand.Parameters["@SectionLeader"].Value = row["Section_Leader"]; insertCommand.Parameters["@WorkshopLeader"].Value = row["Workshop_Leader"]; insertCommand.Parameters["@LargeGroupLeader"].Value = row["Large_Group_Leader"]; insertCommand.Parameters["@TeamLeader"].Value = row["Team_Leader"]; insertCommand.Parameters["@Data"].Value = recordDate; insertCommand.ExecuteNonQuery(); insertCount++; // 每处理100条记录报告一次进度,减少UI更新频率 if (completedTasks % 100 == 0) { worker.ReportProgress(progressPercentage, new { CurrentRow = rowIndex + 1, TotalRows = personnelTable.Rows.Count, Day = day, DaysInMonth = daysInMonth, InsertCount = insertCount, SkipCount = skipCount }); } } } transaction.Commit(); args.Result = new { InsertCount = insertCount, SkipCount = skipCount, Total = totalTasks }; } } catch (Exception ex) { args.Result = ex; } } }; worker.ProgressChanged += (s, args) => { dynamic progressInfo = args.UserState; progressBar1.Value = args.ProgressPercentage; label1.Text = $"处理中: {args.ProgressPercentage}% - " + $"人员: {progressInfo.CurrentRow}/{progressInfo.TotalRows}, " + $"日期: {progressInfo.Day}/{progressInfo.DaysInMonth}, " + $"新增: {progressInfo.InsertCount}, 跳过: {progressInfo.SkipCount}"; }; worker.RunWorkerCompleted += (s, args) => { button1.Enabled = true; if (args.Cancelled) { MessageBox.Show("操作已取消"); label1.Text = "已取消"; } else if (args.Result is Exception ex) { MessageBox.Show($"操作失败:{ex.Message}"); label1.Text = "操作失败"; } else { dynamic result = args.Result; MessageBox.Show($"数据处理完成!\n" + $"总记录数: {result.Total}\n" + $"新增记录: {result.InsertCount}\n" + $"已存在记录: {result.SkipCount}"); label1.Text = "处理完成"; } }; // 启动后台操作 worker.RunWorkerAsync(); }执行太慢,耗时较长,请优化
06-25
from pyspark.sql import SparkSession, functions as F from elasticsearch import Elasticsearch, helpers from elasticsearch.exceptions import ConnectionError, RequestError, NotFoundError from datetime import datetime import base64 import hashlib import sys from pyspark import TaskContext import uuid # ====================== Spark 初始化 ====================== spark = SparkSession.builder \ .appName("BigDataToES") \ .config("spark.sql.shuffle.partitions", 800) \ .config("spark.default.parallelism", 800) \ .config("spark.driver.memory", "30g") \ .config("spark.speculation", "false") \ .getOrCreate() success_count = spark.sparkContext.accumulator(0) failed_count = spark.sparkContext.accumulator(0) # ====================== 数据获取函数 ====================== def get_spark_sql_data(sql_query): try: spark.sql(f"use ${mainDb}") # 需确保 ${mainDb} 已定义 df = spark.sql(sql_query) print(f"[INFO] 数据量统计: {df.count()}") # 识别日期/时间戳字段(可选逻辑) date_columns = [ col for col, datatype in df.dtypes if datatype in ('date', 'timestamp', 'string') ] columns = df.columns print(f"[INFO] 字段列表: {columns}") # 重分区优化 rows_rdd = df.rdd.map(lambda row: tuple(row)).repartition(800) print(f"[INFO] RDD 分区数: {rows_rdd.getNumPartitions()}") return columns, rows_rdd except Exception as e: print(f"[ERROR] Spark SQL 执行失败: {str(e)}") sys.exit(1) # ====================== ES 写入函数 ====================== def es_insert_data(es_host, es_user, es_pass, index_name, columns, rows_rdd, mode='append', es_shards=5, es_replicas=1, failure_threshold=1000000000): """ 改进版 ES 写入:支持覆盖/追加模式,含错误处理、类型自动映射、批量写入优化 """ try: # 1. 构建 ES 连接参数 es_kwargs = { "hosts": [es_host], "basic_auth": (es_user, es_pass), "request_timeout": 300, # 超时设置 "max_retries": 3, "retry_on_timeout": True } # TLS 配置(HTTPS 场景) if es_host.startswith("https://"): es_kwargs.update({ "verify_certs": False, "ssl_assert_hostname": False }) # 2. 初始化 ES 客户端 es = Elasticsearch(**es_kwargs) # 3. 自动推断字段类型 def get_es_type(col_name, value): if isinstance(value, datetime): return {"type": "date", "format": "yyyy-MM-dd"} elif isinstance(value, (int, float)): return "double" if isinstance(value, float) else "long" elif isinstance(value, bool): return "boolean" else: return "text" # 4. 处理索引(覆盖/追加逻辑) if mode == 'overwrite': # 尝试删除旧索引 try: es.indices.delete(index=index_name) print(f"[INFO] 覆盖模式:旧索引 {index_name} 已删除") except NotFoundError: print(f"[INFO] 覆盖模式:索引 {index_name} 不存在,直接创建") except RequestError as e: print(f"[ERROR] 删除索引失败: {str(e)},终止覆盖操作") sys.exit(1) # 采样推断字段类型 if rows_rdd: sample_row = rows_rdd.take(1)[0] properties = { col: {"type": get_es_type(col, val)} for col, val in zip(columns, sample_row) } else: properties = {col: {"type": "text"} for col in columns} # 创建新索引 es.indices.create( index=index_name, body={ "settings": { "number_of_shards": es_shards, "number_of_replicas": es_replicas }, "mappings": {"properties": properties} } ) print(f"[INFO] 覆盖模式:新索引 {index_name} 创建成功(分片: {es_shards}, 副本: {es_replicas})") elif mode == 'append': # 索引不存在则创建 if not es.indices.exists(index=index_name): if rows_rdd: sample_row = rows_rdd.take(1)[0] properties = { col: {"type": get_es_type(col, val)} for col, val in zip(columns, sample_row) } else: properties = {col: {"type": "text"} for col in columns} es.indices.create( index=index_name, body={"mappings": {"properties": properties}} ) print(f"[INFO] 追加模式:新索引 {index_name} 创建成功(分片: {es_shards}, 副本: {es_replicas})") else: print(f"[INFO] 追加模式:索引 {index_name} 已存在,继续追加数据") # 5. 分布式写入逻辑(按分区批量处理) def write_partition_to_es(partition): es_part = Elasticsearch(**es_kwargs) # 每个分区新建客户端 errors = [] local_success = 0 local_failed = 0 docs = [] for i, row_tuple in enumerate(partition): # 生成唯一 ID(UUID 保证全局唯一) unique_id = str(uuid.uuid4()) doc = dict(zip(columns, row_tuple)) # 类型转换(处理日期、二进制、特殊浮点值) for col, val in doc.items(): if isinstance(val, datetime): doc[col] = val.strftime("%Y-%m-%d") elif isinstance(val, bytes): doc[col] = base64.b64encode(val).decode('utf-8') elif isinstance(val, float) and val in (float('inf'), float('-inf'), float('nan')): doc[col] = None # 构建批量写入文档 docs.append({ "_op_type": "create", "_index": index_name, "_id": unique_id, "_source": doc }) # 批量写入(每 500 条提交一次) if len(docs) >= 500: success, failed = helpers.bulk( es_part, docs, chunk_size=500, raise_on_error=False, refresh=False ) local_success += success local_failed += len(failed) errors.extend(failed) docs = [] # 清空缓存 # 处理剩余文档 if docs: success, failed = helpers.bulk( es_part, docs, chunk_size=1000, raise_on_error=False, refresh=False ) local_success += success local_failed += len(failed) errors.extend(failed) # 更新全局统计 success_count.add(local_success) failed_count.add(local_failed) # 打印分区统计 print(f"[INFO] 分区写入:成功 {local_success} 条,失败 {local_failed} 条") if errors: print(f"[ERRORS] 前 10 条失败详情:") for error in errors[:10]: print(f" {error}") es_part.close() # 关闭客户端 # 执行分区写入 rows_rdd.foreachPartition(write_partition_to_es) # 6. 全局统计与校验 total_success = success_count.value total_failed = failed_count.value total_count = total_success + total_failed failure_rate = total_failed / total_success if total_count > 0 else 0 # 刷新索引保证数据可见 es.indices.refresh(index=index_name) # 验证实际写入数量 count_result = es.count(index=index_name) print(f"[INFO] 全局统计:成功 {total_success} 条,失败 {total_failed} 条") print(f"[INFO] 索引 {index_name} 实际文档数:{count_result['count']}") # 失败率校验 if failure_rate > failure_threshold: print(f"[ERROR] 失败率 {failure_rate:.2%} 超过阈值 {failure_threshold:.2%},任务终止") spark.stop() sys.exit(1) except ConnectionError: print("[ERROR] ES 连接失败,请检查地址/认证信息") sys.exit(1) except RequestError as e: print(f"[ERROR] ES 请求错误: {str(e)}") sys.exit(1) except Exception as e: print(f"[ERROR] 未知错误: {str(e)}") sys.exit(1) # ====================== 主程序 ====================== if __name__ == "__main__": # 用户需自定义的配置(根据实际场景修改) SPARK_SQL_CONFIG = { "sql_query": "SELECT * FROM ${input1};" # 需确保 ${input1} 是有效表名 } print("Spark SQL 配置:", SPARK_SQL_CONFIG) ES_CONFIG = { "es_host": "http://tias8es.jcfwpt.cmbc.com.cn:30004", "es_user": "admin", "es_pass": "Cmbc1#tias", "index_name": "gffcm_pfs_indicator", "mode": "append", # 可选 "overwrite" "es_shards": 5, "es_replicas": 1, "failure_threshold": 0.1 # 示例:允许 10% 失败率 } # 执行数据抽取与写入 columns, rows_rdd = get_spark_sql_data(**SPARK_SQL_CONFIG) if columns and rows_rdd: es_insert_data(columns=columns, rows_rdd=rows_rdd, **ES_CONFIG) else: print("[ERROR] 未获取到有效 Spark 数据,同步终止") spark.stop() sys.exit(1) 结合这个代码,规避掉division on by zero 这报错
07-11
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值