开始生成CMM客户信息数据

本文介绍了一种使用Python多线程技术批量生成CMM系统所需的各种数据表的方法,包括客户管理表、机构信息表、机构映射表及员工信息表。通过随机生成各类数据,如客户号、机构号、员工号等,实现对CMM系统的数据填充,适用于系统测试或数据初始化场景。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import gzip
import os
import random
import re
import time
from concurrent.futures.thread import ThreadPoolExecutor
from datetime import datetime, timedelta

from faker import Faker

faker = Faker('zh_CN')
org_name_dict = {}  # 存放机构号对应机构名称字典
org_name_temp_dict = {}
provice_code_dict = {}  # 省市代码字典


# provice_code = {
#     "北京市": "01",
#     "上海市": "02",
#     "天津市": "03",
#     "重庆市": "04",
#     "黑龙江省": "05",
#     "吉林省": "06",
#     "辽宁省": "07",
#     "内蒙古自治区": "08",
#     "河北省": "09",
#     "新疆维吾尔自治区": "10",
#     "甘肃省": "11",
#     "青海省": "12",
#     "陕西省": "13",
#     "宁夏回族自治区": "14",
#     "河南省": "15",
#     "山东省": "16",
#     "山西省": "17",
#     "安徽省": "18",
#     "湖北省": "19",
#     "湖南省": "20",
#     "江苏省": "21",
#     "四川省": "22",
#     "贵州省": "23",
#     "云南省": "24",
#     "广西壮族自治区": "25",
#     "西藏自治区": "26",
#     "浙江省": "27",
#     "江西省": "28",
#     "广东省": "29",
#     "福建省": "30",
#     "台湾省": "31",
#     "海南省": "32",
#     "香港特别行政区": "33",
#     "澳门特别行政区": "34"
# }


def dir_exists(dir_path):
    """
    判断日期目录是否存在
    :return:
    """
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    return dir_path


def gz_file(file_path):
    """
    压缩数据文件
    :param file_path:
    :return:
    """
    gz_phone_file = re.findall(r'(.+?)\.', file_path)[0] + ".gz"
    with gzip.open(gz_phone_file, 'wb') as f_w:
        with open(file_path, "r", encoding="utf-8") as f_r:
            for line in f_r:
                f_w.write(bytes(line, encoding='utf-8'))
    f_r.close()
    f_w.close()


def get_cust_no(i):
    """
    构造客户号
    :return:
    """
    cust_no_prex = '16101'
    cust_no_last = str(i)
    cust_no_midd = "".join([str(0) for x in range(16 - len(cust_no_prex) - len(cust_no_last))])
    cust_no = "".join([cust_no_prex, cust_no_midd, cust_no_last])
    return cust_no


def get_mng_org_no(i, Area_code):
    """
    构造管理机构号
    定义44开头,长度为20
    :param i:
    :return:
    """
    mng_no_pre = Area_code  # "44"
    mng_no_last = str(i).zfill(18)  # 管理机构号的总长度为20
    return mng_no_pre + mng_no_last


def get_term_no(Area_code):
    """
    营销团队编号
    以44开头,长度为6,中间补0,最后两位为两位数
    :return:
    """
    term_no_pref = Area_code  # "44"
    data = [x for x in range(10)]
    term_no_last = "".join([str(x) for x in random.sample(data, 2)]).zfill(4)
    return term_no_pref + term_no_last


def get_term_org():
    """
    营销团队机构
    以44开头,长度为10,中间补0,从101到200为营销团队机构号
    :return:
    """
    term_org_pref = "44"
    data = [x for x in range(101, 201)]
    term_no_last = str(random.sample(data, 1)[0]).zfill(8)
    return term_org_pref + term_no_last


def get_id_card(i, Area_code):
    """
    管理客户经理号 即身份证号
    :return:
    """
    card_pref = "{area_code}0332".format(area_code=Area_code)  # "440332"
    card_last = str(i).zfill(12)
    return card_pref + card_last


def get_asign_time():
    """
    指派时间
    :return:
    """
    hour = str(random.randint(0, 23)).zfill(2)
    minu = str(random.randint(0, 59)).zfill(2)
    secd = str(random.randint(0, 59)).zfill(2)
    return ":".join([hour, minu, secd])


def get_id_card2(mng_id_card, i, count):
    """
    指派人员工号(身份证号,不能与客户经理号相同)
    :param i:
    :param count:
    :return:
    """
    while True:
        random_num = random.randint(count[0], count[1])
        if random_num != i:
            return mng_id_card[: -len(str(count[0]))] + str(random.randint(count[0] + 1, count[1]))


def get_pro_code(org_name):
    """
    省市代码
    :return:
    """
    PATTERN = r'([\u4e00-\u9fa5]{2,5}?(?:省|自治区|行政区|市))([\u4e00-\u9fa5]{1,5}?(?:市)){0,1}'
    pattern = re.compile(PATTERN)
    m = pattern.search(org_name)
    temp_list = []
    for a in m.groups():
        if a is not None:
            temp_list.append(a)
    name = "".join(temp_list)
    code = provice_code_dict.get(name)
    if code:
        pass
    else:
        while True:
            code2 = str(random.randint(1, 99)).zfill(2)
            if code != code2:
                provice_code_dict[org_name] = code2
                code = code2
                break
    return code


def get_city():
    """
    模拟市
    :return:
    """
    while True:  # 市
        city = faker.city()
        if re.findall(r'(.+?)市', city):
            return city


def get_country():
    """
    模拟县
    :return:
    """
    while True:  # 县
        country = faker.city()
        if re.findall(r'(.+?)县', country):
            return country


def get_org_name(org_no):
    """
    机构名称
    :return:
    """
    while True:
        province = faker.province()
        street = faker.street_name()
        if "市" in province or "行政区" in province:
            country = get_country()
            org_name = province + country + street + "支行"
        else:
            city = get_city()
            country = get_country()
            org_name = province + city + country + street + "支行"
        PATTERN = r'([\u4e00-\u9fa5]{2,5}?(?:省|自治区|行政区|市))([\u4e00-\u9fa5]{1,7}?(?:市)){0,1}([\u4e00-\u9fa5]{1,7}?(?:区|县)){0,1}'
        pattern = re.compile(PATTERN)
        m = pattern.search(org_name)
        temp_list = []
        for val in m.groups():
            if val is None:
                continue
            temp_list.append(val)
        line = "".join(temp_list)
        org_name = line + street + "支行"
        value = org_name_temp_dict.get(org_name)
        if value == 1:
            continue
        org_name_temp_dict[org_name] = 1
        org_name_dict[org_no] = org_name
        return org_name


def get_parnt_org_name(org_name):
    """
    上级机构名称
    :param org_name:
    :return:
    """
    PATTERN = r'([\u4e00-\u9fa5]{2,5}?(?:省|自治区|行政区|市))([\u4e00-\u9fa5]{1,5}?(?:市)){0,1}'
    pattern = re.compile(PATTERN)
    m = pattern.search(org_name)
    temp_list = []
    for aa in m.groups():
        if aa is not None:
            temp_list.append(aa)
    line = "".join(temp_list)
    org_name = line + "支行"
    return org_name


def get_org_no_name(i, Area_code):
    """
    获取机构号对应机构名称
    :return:
    """
    while True:
        org_no = get_mng_org_no(i, Area_code)
        org_name = org_name_dict.get(org_no)
        if org_name is not None:
            return org_no, org_name


def get_boe_org_no():
    """
    BoEing机构号
    :return:
    """
    boe_org_no_pref = "190"
    data = [x for x in range(10)]
    boe_org_no_last = "".join([str(x) for x in random.sample(data, 3)])
    return boe_org_no_pref + boe_org_no_last


def get_boe_org_name():
    """
    模拟BoEing机构名称
    :return:
    """
    bank_list = ['人民', '建设', '农业', '邮政']
    street = faker.street_name()
    return "中国{bank}银行股份有限公司{street}支行".format(bank=random.choice(bank_list), street=street)


def get_offc_pho():
    """
    模拟电话号码
    :return:
    """
    offc_pho_pref = "886"
    data = [x for x in range(10)]
    offc_pho_last = "".join([str(x) for x in random.sample(data, 5)])
    return offc_pho_pref + offc_pho_last


def get_prfn():
    """
    职务
    :return:
    """
    prfn_list = ["行长", "副行长", "科员", "部门经理", "部门副经理"]
    return random.choice(prfn_list)


def get_prfn_lvl(prfn):
    """
    职级
    :return:
    """
    prfn_lvl_dict = {"部门经理": "科级", "部门副经理": "副科级", "行长": "处级", "副行长": "副处级", "科员": "科员"}
    return prfn_lvl_dict.get(prfn)


def get_post(prfn):
    """
    岗位
    :return:
    """
    post_dict = {"行长": "一级支行行长", "副行长": "一级支行副行长",
                 "科员": "大堂经理", "部门经理": "部门经理", "部门副经理": "部门副经理"}
    return post_dict.get(prfn)


def write_cmm_org_map_boe(args):
    """
    CMM与BoEing机构映射表
    :param args:
    :return:
    """
    f_w_cmm_org_map_boe, count, Date_Id, seq, Area_code = args[0], args[1], args[2], args[3], args[4]
    for i in range(count[0], count[1] + 1):
        org_no, org_name = get_org_no_name(i, Area_code)  # 机构号,机构名称
        boe_org_no = get_boe_org_no()  # BoEing机构号
        boe_org_name = get_boe_org_name()  # BoEing机构名称
        pro_code = get_pro_code(org_name)  # 省市代码
        line = "{seq}".format(seq=seq).join([org_no, org_name, boe_org_no, boe_org_name, pro_code]) + "\n"
        f_w_cmm_org_map_boe.write(line)


def write_cmm_staf_info(args):
    """
    CMM员工信息表
    :param args:
    :return:
    """
    f_w_cmm_staf, count, Date_Id, seq, Area_code = args[0], args[1], args[2], args[3], args[4]
    for i in range(count[0], count[1] + 1):
        id_card_no = get_id_card(i, Area_code)  # 身份证号
        staf_no = id_card_no  # 员工号
        staf_name = faker.name()  # 员工姓名
        reg_org_no = get_term_org()  # 注册机构号
        staf_stat = str(random.randint(0, 1))  # 员工状态
        offc_pho = get_offc_pho()  # 办公电话
        mob_no = str(faker.phone_number())  # 手机号码
        eml = faker.email()  # 邮箱
        prfn = get_prfn()  # 职务
        prfn_lvl = get_prfn_lvl(prfn)  # 职级
        post = get_post(prfn)  # 岗位
        extr_fld = ""  # 备用字段
        line = "{seq}".format(seq=seq).join([
            id_card_no, staf_no, staf_name, reg_org_no, staf_stat,
            offc_pho, mob_no, eml, prfn, prfn_lvl, post, extr_fld
        ]) + "\n"
        f_w_cmm_staf.write(line)


def write_cmm_cust_mn(args):
    """
    CMM客户管理表
    :param args:
    :return:
    """
    f_w_cust_mn, count, Date_Id, seq, Area_code = args[0], args[1], args[2], args[3], args[4]
    for i in range(count[0], count[1] + 1):
        cust_no = get_cust_no(i)  # 客户号
        cust_nam = faker.company()  # 客户名称
        mng_org_no = get_mng_org_no(i, Area_code)  # 管理机构号
        term_no = get_term_no(Area_code)  # 营销团队编号
        term_org = get_term_org()  # 营销团队机构
        mng_id_card = get_id_card(i, Area_code)  # 管理客户经理号
        mng_type = str(random.randint(1, 2)).zfill(2)  # 客户经理管理类型
        term_mng_type = str(random.randint(1, 2)).zfill(2)  # 营销团队管理类型
        asign_date = faker.date(pattern='%Y%m%d', end_datetime=datetime.now())  # 指派日期
        asign_time = get_asign_time()  # 指派时间
        asign_id_card = get_id_card2(mng_id_card, i, count)  # 指派人员工号
        asgin_org_no = "4415" + str(random.randint(0, 1000)).zfill(6)  # 指派人机号
        asgin_rol_no = str(random.sample(
            [2005, 2003, 3005, 3008, 5001, 5003, 5008, 5001, 5002, 2001, 2008, 3001, 3002, 3003], 1)[0])  # 指派人角色号
        line = "{seq}".format(seq=seq).join(
            [cust_no, cust_nam, mng_org_no, term_no, term_org, mng_id_card, mng_type, term_mng_type, asign_date,
             asign_time, asign_id_card, asgin_org_no, asgin_rol_no]) + "\n"
        f_w_cust_mn.write(line)


def write_cmm_org(args):
    """
    CMM机构信息表
    :param args:
    :return:
    """
    f_w_cmm_org, count, Date_Id, seq, Area_code = args[0], args[1], args[2], args[3], args[4]
    print(count)
    for i in range(count[0], count[1] + 1):
        org_no = get_mng_org_no(i, Area_code)  # 机构号
        org_name = get_org_name(org_no)  # 机构名称
        pro_cod = get_pro_code(org_name)  # 省市代码
        org_typ = str(random.randint(1, 2))  # 机构性质
        org_sts = str(random.randint(0, 2))  # 机构状态
        org_lvl = str(random.randint(1, 6))  # 机构级别
        parnt_org_no = get_mng_org_no(i, Area_code)  # 上级机构号
        parnt_org_nam = get_parnt_org_name(org_name)  # 上级机构名称
        msk_cod = ""  # 机构掩码
        dis_ord = str(random.randint(1, 99))  # 机构顺序
        stop_rsn = ""  # 停用原因
        line = "|!".join(
            [org_no, org_name, pro_cod, org_typ, org_sts, org_sts, org_lvl,
             parnt_org_no, parnt_org_nam, msk_cod, dis_ord, stop_rsn]) + "\n"
        f_w_cmm_org.write(line)


def get_thread_count(count):
    """
    获取线程数量
    :param phone_count:
    :return:
    """
    size = 2000  # 每个线程模拟生成2000条数据
    thread_count = int(count / size) + 1
    count_list = []
    for i in range(1, thread_count):
        count_list.append((size * (i - 1) + 1, size * i))  # 获取多线程数据条数区间范围
        if i == thread_count - 1:
            if (count - i * size) == 0:
                thread_count = thread_count - 1
            else:
                count_list.append((size * i + 1, count))
    print("开启线程数{0}个".format(thread_count))
    return thread_count, count_list


def from_txt_to_gz_file(file_list):
    """
    压缩文件为gz后缀
    :param file_list:
    :return:
    """
    # 压缩文件为gz格式
    print("开始压缩个人移动电话模拟数据和个人基本信息数据")
    p1 = ThreadPoolExecutor(len(file_list))
    for file in file_list:
        p1.submit(gz_file(file))
    p1.shutdown(True)


def create_cmm(Area_code, Date_Id, count, seq='|!'):
    """
    多线程虚拟生成
    1、CMM客户管理表
    2、CMM机构信息表
    3、CMM与BoEing机构映射表
    4、CMM员工信息表
    pid:Pid号
    serino:序号
    iddpre:国际长途区号
    phone_number:电话号码
    :param Date_Id: 日期
    :param count: 数据条数
    :return:
    """
    # CMM客户管理表
    cmm_cust_mn_file = "{Area_code}-CMM-OUT_CMM_A_CUST_MN_HOST-2G-{Date_Id}.txt".format(Area_code=Area_code,
                                                                                        Date_Id=Date_Id)
    # CMM机构信息表
    cmm_org_file = "00-CMM-CMM_A_ORG-2G-{Date_Id}.txt".format(Date_Id=Date_Id)
    # CMM与BoEing机构映射表
    cmm_org_map_boe_file = "00-CMM-CMM_A_ORG_MAP_BOE-2G-{Date_Id}.txt".format(Date_Id=Date_Id)
    # CMM员工信息表
    cmm_staf_info_file = "00-CMM-CMM_A_STAF_INFO-2G-{Date_Id}.txt".format(Date_Id=Date_Id)

    file_list = []
    # cwd = os.getcwd()
    cwd = '/home/appuser/out_data/simula_data'
    # print(Date_Id)
    # dir_path = (cwd + '/data/{date}'.format(date=str(Date_Id)[0:8])).replace('\\', '/')
    # print(dir_path)
    dir_path = os.path.join(cwd, '{date}'.format(date=str(Date_Id)))
    dir_path = dir_exists(dir_path)
    cmm_cust_mn_file_path = os.path.join(dir_path, cmm_cust_mn_file)
    cmm_org_file_path = os.path.join(dir_path, cmm_org_file)
    cmm_org_map_boe_file_path = os.path.join(dir_path, cmm_org_map_boe_file)
    cmm_staf_info_file_path = os.path.join(dir_path, cmm_staf_info_file)
    file_list.append(cmm_cust_mn_file_path)
    file_list.append(cmm_org_file_path)
    file_list.append(cmm_org_map_boe_file_path)
    file_list.append(cmm_staf_info_file_path)
    f_w_cust_mn = open(cmm_cust_mn_file_path, "a+", encoding="utf-8")  # CMM客户管理表
    f_w_cmm_org = open(cmm_org_file_path, "a+", encoding="utf-8")  # CMM机构信息表
    f_w_cmm_map_boe = open(cmm_org_map_boe_file_path, "a+", encoding="utf-8")  # CMM与BoEing机构映射表
    f_w_cmm_staf = open(cmm_staf_info_file_path, "a+", encoding="utf-8")  # CMM员工信息表
    thread_count, count_list = get_thread_count(count)
    p = ThreadPoolExecutor(thread_count)
    for count in count_list:
        p.submit(write_cmm_cust_mn, args=(f_w_cust_mn, count, Date_Id, seq, Area_code))
        p.submit(write_cmm_org, args=(f_w_cmm_org, count, Date_Id, seq, Area_code))
    p.shutdown(True)
    p2 = ThreadPoolExecutor(thread_count)
    for count in count_list:
        p2.submit(write_cmm_org_map_boe, args=(f_w_cmm_map_boe, count, Date_Id, seq, Area_code))
        p2.submit(write_cmm_staf_info, args=(f_w_cmm_staf, count, Date_Id, seq, Area_code))
    p2.shutdown(True)
    f_w_cust_mn.close()
    f_w_cmm_org.close()
    f_w_cmm_map_boe.close()
    f_w_cmm_staf.close()
    from_txt_to_gz_file(file_list)  # txt格式压缩gz格式


if __name__ == '__main__':
    start = time.time()
    # Date_Id = datetime.now().strftime('%Y%m%d')
    Date_Id = 20191223
    count = 10000
    seq = '|!'  # 分隔符
    Area_code = "44"  # 省份代码
    # 开始生成个人移动电话模拟数据和个人基本信息数据
    print("开始生成CMM客户信息数据")
    create_cmm(str(Area_code), Date_Id, count, seq)
    end = time.time()
    print("模拟生成{count}条数据运行时间:{time_}".format(count=count, time_=(end - start)))
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值