Python Access 处理示例-优快云博客

一、整体流程概览

Query生成的数据模板：行是Simulation（模拟次数）和Time（时间步），列是Var1-Var8（变量值）。

Simulation	Time Index	Var1	Var2	Var3	Var4	Var5	Var6	Var7	Var8
1	0
1	1
1	2
1	3
…	…	…	…	…	…	…	…	…	…
1	99
2	0
…	…	…	…	…	…	…	…	…	…

csv数据模板：行是(Simulation, Var)组合，列是Time（0-99），值是变量在该时间步的值。

Simulation	Var	0	1	2	…	99
1	Var1				…
1	Var2				…
1	Var3				…
1	Var4				…
1	Var5				…
1	Var6				…
1	Var7				…
1	Var8				…
2	Var1				…
2	Var2				…
2	Var3				…
2	Var4				…
2	Var5				…
…	…	…	…	…	…	…

二、关键步骤实现

步骤1：复制Access文件中的表和查询到目标文件

需通过Python操作Access数据库，复制1个表和7个查询到10个目标.mdb文件。推荐使用pyodbc（操作数据库结构）和shutil（复制表数据文件，可选）。

准备工作

安装依赖库：pyodbc（操作Access）、pandas（数据处理）。

 pip install pyodbc pandas

pip install pywin32 pandas pyodbc  # pywin32操作Access COM接口，pandas处理数据

确认Access驱动：Windows系统需安装Microsoft Access Database Engine，32/64位需与Python环境匹配。

import pyodbc
import os
from tqdm import tqdm  # 进度条（可选）

def copy_access_objects(source_db, target_dir, table_names, query_names, num_targets=10):
    """
    将源Access文件中的表和查询复制到多个目标文件
    :param source_db: 源Access文件路径（.mdb）
    :param target_dir: 目标文件存储目录
    :param table_names: 需要复制的表名列表（如["Table1"]）
    :param query_names: 需要复制的查询名列表（如["Query1", "Query2", ...]）
    :param num_targets: 目标文件数量（默认10个）
    """
    # 确保目标目录存在
    os.makedirs(target_dir, exist_ok=True)
    
    # 连接源数据库
    source_conn_str = f"DRIVER={{Microsoft Access Driver (*.mdb)}};DBQ={source_db};"
    source_conn = pyodbc.connect(source_conn_str)
    source_cursor = source_conn.cursor()
    
    # 遍历生成10个目标文件
    for i in tqdm(range(1, num_targets+1), desc="复制进度"):
        target_db = os.path.join(target_dir, f"Target_{i}.mdb")
        
        # 创建目标数据库（空文件）
        target_conn_str = f"DRIVER={{Microsoft Access Driver (*.mdb)}};DBQ={target_db};"
        target_conn = pyodbc.connect(target_conn_str)
        target_cursor = target_conn.cursor()
        
        try:
            # 复制表结构+数据
            for table in table_names:
                # 获取表结构（字段名、类型、约束）
                source_cursor.execute(f"SELECT * FROM {table} WHERE 1=0")  # 仅获取结构
                columns = source_cursor.description
                col_defs = []
                for col in columns:
                    # 简化处理：仅获取字段名和类型（需根据实际调整）
                    col_type = str(col.type_code)  # 实际需映射到Access类型（如TEXT=10, INTEGER=3）
                    col_defs.append(f"[{col[0]}] {col_type}")
                
                # 在目标库创建表
                create_table_sql = f"CREATE TABLE [{table}] ({', '.join(col_defs)})"
                target_cursor.execute(create_table_sql)
                
                # 复制数据（若表非空）
                data = source_cursor.execute(f"SELECT * FROM {table}").fetchall()
                if data:
                    placeholders = ", ".join(["?"] * len(columns))
                    insert_sql = f"INSERT INTO [{table}] VALUES ({placeholders})"
                    target_cursor.executemany(insert_sql, data)
                target_conn.commit()
            
            # 复制查询（保存为目标库的查询对象）
            for query in query_names:
                # 获取源查询的SQL定义
                source_cursor.execute(f"SELECT SQL FROM INFORMATION_SCHEMA.QUERIES WHERE NAME='{query}'")
                query_sql = source_cursor.fetchone()[0]
                
                # 在目标库创建同名查询
                target_cursor.execute(f"CREATE QUERY [{query}] AS {query_sql}")
                target_conn.commit()
        
        except Exception as e:
            print(f"目标文件 {target_db} 复制失败: {e}")
            continue
        
        finally:
            # 关闭连接
            target_conn.close()
    source_conn.close()

# 示例调用（根据实际修改参数）
source_mdb = r"C:\SourceDB.mdb"
target_dir = r"C:\TargetDBs"
tables_to_copy = ["YourTable"]  # 替换为实际表名
queries_to_copy = ["Query1", "Query2", "Query3", ...]  # 替换为实际7个查询名
copy_access_objects(source_mdb, target_dir, tables_to_copy, queries_to_copy, num_targets=10)

注意事项

字段类型映射：pyodbc获取的type_code可能与Access实际类型（如TEXT、DATE）不一致，需手动映射（参考https://pyodbc.readthedocs.io/en/latest/usage.html#data-types）。
查询依赖：若查询依赖其他表或查询，需确保目标库中已复制所有依赖对象。
大表性能：复制大表时，executemany可能较慢，可分块读取（fetchmany(1000)）并插入。

优化可能的方法：

数据库连接

import pyodbc
import pandas as pd

def connect_access(db_path):
    """连接Access数据库"""
    driver = r"{Microsoft Access Driver (*.mdb, *.accdb)}"
    conn_str = f"DRIVER={driver};DBQ={db_path};"
    try:
        conn = pyodbc.connect(conn_str)
        return conn
    except pyodbc.Error as e:
        raise Exception(f"数据库连接失败: {e}")

def read_table_data(conn, table_name):
    """读取指定表的全部数据（小数据量适用）"""
    query = f"SELECT * FROM {table_name}"
    return pd.read_sql(query, conn)

配置参数定义

通过config.yaml管理路径和对象列表（更易维护）：

source_db: "path/to/source.mdb"       # 源Access文件
target_dir: "path/to/target_dir/"     # 目标文件存储目录
target_files: ["target1.mdb", ..., "target10.mdb"]  # 10个目标文件名
tables_to_copy: ["SourceTable"]       # 需复制的1个表名
queries_to_copy: ["Query1", "Query2", ..., "Query7"]  # 需复制的7个查询名
output_csv_dir: "path/to/csv_output/" # CSV输出目录

使用`win32com`操作Access COM接口，逐个复制表和查询到目标文件。

import win32com.client as win32
import os
import yaml  # 读取配置

def copy_objects_to_target(source_db_path, target_db_path, tables, queries):
    """将源数据库的表和查询复制到目标数据库"""
    try:
        # 初始化Access应用（后台运行）
        access = win32.Dispatch('Access.Application')
        access.Visible = False
        access.OpenCurrentDatabase(source_db_path, False)
        # 打开源数据库（只读模式避免锁定）
        source_db = access.CurrentDb()

        # 检查目标数据库是否存在，不存在则创建
        try:
            # 尝试以独占方式打开现有数据库
            access.OpenCurrentDatabase(target_path, False)
        except:
            # 如果不存在，则创建新数据库
            access.NewCurrentDatabase(target_path)
        
        # 获取目标数据库对象引用
        target_db = access.CurrentDb()

        # 复制表
        for table in tables:
            if table in source_db.TableDefs:
                source_db.CopyObject(target_db.Name, table, "Table", table)
                print(f"表 {table} 复制成功到 {target_db_path}")

        # 复制查询
        for query in queries:
            if query in source_db.QueryDefs:
                source_db.CopyObject(target_db.Name, query, "Query", query)
                print(f"查询 {query} 复制成功到 {target_db_path}")

        return True
    except Exception as e:
        print(f"复制失败: {str(e)}")
        return False
    finally:
        # 关闭数据库和应用
        if 'source_db' in locals(): source_db.Close()
        if 'target_db' in locals(): target_db.Close()
        access.Quit()
        del source_db, target_db, access

完整示例：企业级对象复制工具

import win32com.client as win32
from win32com.client import constants
import os
import time

class AccessObjectCopier:
    """企业级Access对象复制工具"""
    
    def __init__(self, visible=False):
        self.access = win32.Dispatch("Access.Application")
        self.access.Visible = visible
        self.source_db = None
        
    def connect_to_source(self, db_path):
        """连接到源数据库"""
        try:
            self.access.OpenCurrentDatabase(db_path)
            self.source_db = self.access.CurrentDb()
            return True
        except Exception as e:
            print(f"连接源数据库失败: {e}")
            return False
    
    def ensure_target_exists(self, target_path):
        """确保目标数据库存在"""
        if not os.path.exists(target_path):
            try:
                # 创建新数据库
                self.access.NewCurrentDatabase(target_path)
                self.access.CloseCurrentDatabase()
                # 重新连接源数据库
                self.access.OpenCurrentDatabase(self.source_db.Name)
                return True
            except Exception as e:
                print(f"创建目标数据库失败: {e}")
                return False
        return True
    
    def copy_objects(self, target_path, objects, overwrite=False):
        """
        复制对象到目标数据库
        
        Args:
            target_path: 目标数据库路径
            objects: 要复制的对象字典
            overwrite: 是否覆盖已存在的对象
        """
        if not self.ensure_target_exists(target_path):
            return False
        
        results = {"success": [], "failed": []}
        
        # 复制表
        for table_name in objects.get("tables", []):
            if self.copy_table(table_name, target_path, overwrite):
                results["success"].append(f"表: {table_name}")
            else:
                results["failed"].append(f"表: {table_name}")
        
        # 复制查询
        for query_name in objects.get("queries", []):
            if self.copy_query(query_name, target_path, overwrite):
                results["success"].append(f"查询: {query_name}")
            else:
                results["failed"].append(f"查询: {query_name}")
        
        return results
    
    def copy_table(self, table_name, target_path, overwrite=False):
        """复制表"""
        try:
            # 如果允许覆盖且目标已存在，先删除
            if overwrite and self.object_exists_in_target(target_path, table_name, "table"):
                self.delete_object_in_target(target_path, table_name, constants.acTable)
            
            self.access.DoCmd.CopyObject(target_path, table_name, constants.acTable, table_name)
            return True
        except Exception as e:
            print(f"复制表 '{table_name}' 失败: {e}")
            return False
    
    def copy_query(self, query_name, target_path, overwrite=False):
        """复制查询"""
        try:
            # 如果允许覆盖且目标已存在，先删除
            if overwrite and self.object_exists_in_target(target_path, query_name, "query"):
                self.delete_object_in_target(target_path, query_name, constants.acQuery)
            
            self.access.DoCmd.CopyObject(target_path, query_name, constants.acQuery, query_name)
            return True
        except Exception as e:
            print(f"复制查询 '{query_name}' 失败: {e}")
            return False
    
    def object_exists_in_target(self, target_path, object_name, object_type):
        """检查目标数据库中对象是否存在"""
        # 保存当前状态
        current_db = self.access.CurrentDb().Name
        
        try:
            # 临时切换到目标数据库
            self.access.CloseCurrentDatabase()
            self.access.OpenCurrentDatabase(target_path)
            target_db = self.access.CurrentDb()
            
            # 检查对象是否存在
            exists = False
            if object_type == "table":
                for i in range(target_db.TableDefs.Count):
                    if target_db.TableDefs(i).Name == object_name:
                        exists = True
                        break
            elif object_type == "query":
                for i in range(target_db.QueryDefs.Count):
                    if target_db.QueryDefs(i).Name == object_name:
                        exists = True
                        break
            
            # 切换回源数据库
            self.access.CloseCurrentDatabase()
            self.access.OpenCurrentDatabase(current_db)
            
            return exists
            
        except Exception as e:
            print(f"检查目标对象存在性失败: {e}")
            # 尝试恢复源数据库连接
            try:
                self.access.OpenCurrentDatabase(current_db)
            except:
                pass
            return False
    
    def delete_object_in_target(self, target_path, object_name, object_type):
        """删除目标数据库中的对象"""
        # 保存当前状态
        current_db = self.access.CurrentDb().Name
        
        try:
            # 切换到目标数据库
            self.access.CloseCurrentDatabase()
            self.access.OpenCurrentDatabase(target_path)
            
            # 删除对象
            self.access.DoCmd.DeleteObject(object_type, object_name)
            
            # 切换回源数据库
            self.access.CloseCurrentDatabase()
            self.access.OpenCurrentDatabase(current_db)
            
            return True
            
        except Exception as e:
            print(f"删除目标对象失败: {e}")
            # 尝试恢复源数据库连接
            try:
                self.access.OpenCurrentDatabase(current_db)
            except:
                pass
            return False
    
    def close(self):
        """关闭连接"""
        try:
            self.access.Quit()
        except:
            pass

# 使用示例
copier = AccessObjectCopier(visible=False)
if copier.connect_to_source("C:\\Source.accdb"):
    objects_to_copy = {
        "tables": ["Customers", "Orders"],
        "queries": ["SalesReport", "CustomerList"]
    }
    
    results = copier.copy_objects("C:\\Backup.accdb", objects_to_copy, overwrite=True)
    print("复制结果:", results)
    
    copier.close()

步骤2：多进程运行数据转换函数

需使用multiprocessing.Pool并行处理10个目标文件，每个进程独立连接目标Access文件，读取Query1和Query2数据，转换为CSV。

转换逻辑：将长格式数据转换为宽格式，核心是用pandas.pivot_table或melt+pivot。

单目标文件处理函数

import pandas as pd
import pyodbc
from multiprocessing import Pool

def process_target_file(target_db_path):
    """处理单个目标Access文件：读取Query1和Query2，生成CSV"""
    try:
        # 连接目标数据库
        conn_str = f"DRIVER={{Microsoft Access Driver (*.mdb)}};DBQ={target_db_path};"
        conn = pyodbc.connect(conn_str)
        
        # 读取Query1和Query2数据（假设Query1对应图1的原始数据）
        query1_df = pd.read_sql("SELECT * FROM Query1", conn)
        query2_df = pd.read_sql("SELECT * FROM Query2", conn)  # 若Query2有其他用途（如配置），按需处理
        
        # 数据清洗（根据实际调整列名）
        query1_df = query1_df.rename(columns={
            "Simulation": "Simulation",
            "Time": "Time",
            "Var1": "Var1", "Var2": "Var2", ..., "Var8": "Var8"  # 确保列名与图1一致
        })
        
        # 提取时间步（Time列中的数值，如图1的"0 0.80046"需拆分为时间点0）
        # 假设Time列格式为"时间点 数值"（如"0 0.80046"），需提取时间点作为实际时间步
        query1_df[["TimeStep", " dummy"]] = query1_df["Time"].str.split(" ", expand=True)
        query1_df["TimeStep"] = query1_df["TimeStep"].astype(int)  # 转换为整数时间步（0-99）
        
        # 筛选有效时间步（0-99）
        query1_df = query1_df[query1_df["TimeStep"].between(0, 99)]
        
        # 转换为宽格式（图2模板）
        # 行：Simulation + Var；列：TimeStep（0-99）；值：Var对应值
        pivot_df = query1_df.pivot_table(
            index=["Simulation", "Var"],  # 行标签
            columns="TimeStep",           # 列标签（时间步）
            values=query1_df.columns[-1], # 值（最后一列是变量值，需根据实际调整）
            aggfunc="first"               # 若有重复，取第一个值
        ).reset_index()
        
        # 重命名列（确保列名为0-99）
        pivot_df.columns.name = None  # 移除列索引的名称
        time_columns = [str(ts) for ts in range(100)]  # 0-99列
        pivot_df = pivot_df.reindex(columns=["Simulation", "Var"] + time_columns)
        
        # 输出CSV（路径可根据需求调整，如保存到目标文件同级目录）
        output_csv = os.path.splitext(target_db_path)[0] + "_output.csv"
        pivot_df.to_csv(output_csv, index=False)
        
        return f"成功处理：{target_db_path} → {output_csv}"
    
    except Exception as e:
        return f"处理失败：{target_db_path}，错误：{e}"
    finally:
        if 'conn' in locals():
            conn.close()

# 示例调用（多进程处理10个目标文件）
if __name__ == "__main__":
    target_dir = r"C:\TargetDBs"
    target_files = [os.path.join(target_dir, f"Target_{i}.mdb") for i in range(1, 11)]
    
    # 使用多进程（进程数=CPU核心数，建议不超过10）
    with Pool(processes=min(10, os.cpu_count())) as pool:
        results = pool.map(process_target_file, target_files)
    
    # 打印结果
    for res in results:
        print(res)

注意事项

时间步处理：图1的Time列可能包含非整数或冗余信息（如"0 0.80046"），需根据实际规则提取有效时间步（如第一个数值作为时间点）。
变量对齐：确保Query1中的变量（Var1-Var8）与图2的行标签完全匹配，避免列名错误。
缺失值处理：图2中某些时间步（如列4、5）为空，转换时需保留空值（NaN）或填充默认值（如-）。
多进程安全：每个进程独立连接数据库，避免文件锁冲突（Access不支持多进程同时写入，但此处仅为读取）。

步骤2 的另一种解法：

import pandas as pd
import win32com.client as win32

def transform_query_to_csv(target_db_path, output_dir):
    """读取目标数据库的Query1和Query2，转换为CSV"""
    try:
        # 连接目标数据库
        access = win32.gencache.EnsureDispatch('Access.Application')
        access.Visible = False
        db = access.OpenCurrentDatabase(target_db_path, False)

        # 读取Query1和Query2数据到DataFrame
        query1 = db.OpenRecordset("SELECT * FROM Query1")
        df1 = pd.DataFrame.from_records(query1.Fields)
        df1.columns = [f.Name for f in query1.Fields]  # 列名匹配

        query2 = db.OpenRecordset("SELECT * FROM Query2")
        df2 = pd.DataFrame.from_records(query2.Fields)
        df2.columns = [f.Name for f in query2.Fields]

        # 转换逻辑（示例：长格式转宽格式，匹配图1→图2）
        def reshape_df(df):
            # 假设原数据列：Simulation, Time, Var1-Var8（需根据实际调整）
            # 目标列：Simulation, Var0-Var99（缺失的Var用NaN填充）
            id_vars = ['Simulation', 'Time']
            value_vars = [f'Var{i}' for i in range(1, 9)]  # 假设原Var是Var1-Var8
            melted = df.melt(id_vars=id_vars, value_vars=value_vars, var_name='Var', value_name='Value')
            
            # 提取Var编号（如Var1→1），补全0-99
            melted['Var_Num'] = melted['Var'].str.extract('(\d+)').astype(int)
            all_vars = pd.DataFrame({'Var_Num': range(0, 100)})  # Var0-Var99
            
            # 左连接补全缺失的Var
            merged = all_vars.merge(melted, on='Var_Num', how='left')
            merged['Var'] = merged['Var_Num'].astype(str).replace({'0': 'Var0'})  # 兼容Var0命名
            
            # 透视回宽格式（Simulation + Var0-Var99）
            pivoted = merged.pivot_table(
                index=['Simulation', 'Time'],
                columns='Var',
                values='Value',
                fill_value=None
            ).reset_index()
            
            # 重命名列（Simulation, Time, Var0, Var1,...,Var99）
            cols = ['Simulation', 'Time'] + [f'Var{i}' for i in range(0, 100)]
            return pivoted[cols]

        # 转换并保存CSV
        df1_reshaped = reshape_df(df1)
        df2_reshaped = reshape_df(df2)
        
        # 输出路径（示例：target1_Query1.csv）
        csv1_path = os.path.join(output_dir, f"{os.path.basename(target_db_path)}_Query1.csv")
        csv2_path = os.path.join(output_dir, f"{os.path.basename(target_db_path)}_Query2.csv")
        
        df1_reshaped.to_csv(csv1_path, index=False)
        df2_reshaped.to_csv(csv2_path, index=False)
        print(f"转换完成：{csv1_path}, {csv2_path}")

        return True
    except Exception as e:
        print(f"转换失败: {str(e)}")
        return False
    finally:
        if 'db' in locals(): db.Close()
        if 'access' in locals(): access.Quit()
        del access

import multiprocessing
import yaml
import os

def process_single_target(target_file, config):
    """单个目标文件的完整处理流程（复制+转换）"""
    source_db = config['source_db']
    target_dir = config['target_dir']
    output_csv_dir = config['output_csv_dir']
    
    # 目标数据库完整路径
    target_db_path = os.path.join(target_dir, target_file)
    
    # 步骤1：复制表和查询到目标文件
    tables = config['tables_to_copy']
    queries = config['queries_to_copy']
    copy_success = copy_objects_to_target(source_db, target_db_path, tables, queries)
    
    if not copy_success:
        return f"{target_file} 复制失败，跳过转换"
    
    # 步骤2：转换数据并生成CSV
    transform_success = transform_query_to_csv(target_db_path, output_csv_dir)
    return f"{target_file} 处理完成" if transform_success else f"{target_file} 转换失败"

def main():
    # 加载配置
    with open("config.yaml", "r") as f:
        config = yaml.safe_load(f)
    
    # 创建输出目录（若不存在）
    os.makedirs(config['target_dir'], exist_ok=True)
    os.makedirs(config['output_csv_dir'], exist_ok=True)
    
    # 创建进程池（10个进程，对应10个目标文件）
    with multiprocessing.Pool(processes=10) as pool:
        # 生成任务参数（目标文件列表）
        tasks = [(file, config) for file in config['target_files']]
        # 并行执行（starmap传递多参数）
        results = pool.starmap(process_single_target, tasks)
    
    # 输出结果汇总
    print("
===== 处理结果 =====")
    for res in results:
        print(res)

if __name__ == "__main__":
    main()