模拟生成个人电话信息数据和个人基本信息数据

使用Python模拟生成个人电话和个人基本信息数据
这篇博客介绍了如何利用Python的Faker库和其他库来模拟生成个人电话信息数据和个人基本信息数据,包括PID、序列号、国际长途电话区号、手机号码、姓名、性别、分类鉴别标识等,并展示了多线程生成数据的实现方式。
from faker import Faker
import os, random, gzip
import re, time
from pypinyin import pinyin, Style
from datetime import datetime, timedelta
from concurrent.futures.thread import ThreadPoolExecutor

faker = Faker('zh_CN')


def dir_exists(dir_path):
    """
    判断日期目录是否存在
    :return:
    """
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    return dir_path


def three_num():
    data_list = [x for x in range(10)]
    data = random.sample(data_list, 3)
    return str(data[0]) + str(data[1]) + str(data[2])


def get_idd_code():
    """
    随机选择国际长途电话区号
    (非0开头,两位或者三位)
    :return:
    """

    no_zero = [x for x in range(1, 10)]
    zero = [x for x in range(10)]
    num = random.randint(0, 1)
    if num == 0:  # 生产两位国际区号
        a = random.sample(no_zero, 2)
        return str(a[0]) + str(a[1])
    else:  # 生产三位国际区号
        a = random.choice(no_zero)
        b = random.sample(zero, 2)
        return str(a) + str(b[0]) + str(b[1])


def gz_phone_file(file_path):
    """
    压缩个人电话信息数据文件
    :param file_path:
    :return:
    """
    gz_phone_file = re.findall(r'(.+?)\.', file_path)[0] + ".gz"
    with gzip.open(gz_phone_file, 'wb') as f_w:
        with open(file_path, "r", encoding="utf-8") as f_r:
            for line in f_r:
                f_w.write(bytes(line, encoding='utf-8'))
    f_r.close()
    f_w.close()


def get_homecif():
    """
    构造非0开头的两个数(注册CIF-S编码)
    :return:
    """
    no_zero = [x for x in range(1, 10)]
    zero = [x for x in range(10)]
    a = random.sample(no_zero, 1)
    b = random.sample(zero, 1)
    return str(a[0]) + str(b[0])


def get_descriminator():
    """
    构造分类鉴别标识
    :return:
    """
    all_zero = [0] * 64
    num = random.randint(1, 10)
    for i in range(num):
        pos = random.randint(0, 63)
        all_zero[pos] = 1
    return "".join([str(x) for x in all_zero])


def get_LastTime(Date_id):
    """
    构造最后访问时间和最后维护时间
    :return:
    """
    month = random.randint(10, 60)
    LastAccessTime = (datetime.strptime(str(Date_id), '%Y%m%d') + timedelta(days=(month - 5) * 30)).strftime(
        '%Y%m%d%H%M%S')  # 最后访问时间
    LastMntTime = (datetime.strptime(str(Date_id) + str(datetime.now().strftime('%Y%m%d %H%M%S')).split()[1],
                                     '%Y%m%d%H%M%S') + timedelta(days=month * 30)).strftime(
        '%Y%m%d%H%M%S')  # 最后维护时间
    return LastAccessTime, LastMntTime


def get_pid(i):
    """
    构造pid
    :return:
    """
    pid_prex = '16101'
    pid_last = str(i)
    pid_midd = "".join([str(0) for x in range(16 - len(pid_prex) - len(pid_last))])
    pid = "".join([pid_prex, pid_midd, pid_last])
    return pid


def write_pre_phone(args):
    """
    个人电话信息数据写入文件
    :param f_w:
    :param f_w_pid:
    :param phone_count:
    :return:
    """
    f_w_phone, count, Date_Id, seq = args[0], args[1], args[2], args[3]
    for i in range(count[0], count[1] + 1):
        Pid = get_pid(i)  # PID
        SerialNo = random.randint(0, 1000000)  # 序号
        IddPrefix = get_idd_code()  # 国际长途电话区号
        MobileTel = faker.phone_number()  # 手机号码
        line_phone = "|!".join([str(Pid), str(SerialNo), str(IddPrefix), str(MobileTel)]) + "\n"
        f_w_phone.write(line_phone)


def write_pre_base(args):
    f_w_base, count, Date_Id, seq = args[0], args[1], args[2], args[3]
    for i in range(count[0], count[1] + 1):
        Pid = get_pid(i)  # PID
        HomeCif = get_homecif()  # 注册CIF-S编码
        CnName = faker.name()  # 姓名
        PyName = "".join([x[0] for x in pinyin(CnName, style=Style.NORMAL)]).upper()  # 汉语拼音姓名
        GenderCode = random.randint(1, 2)  # 性别
        Discriminator = get_descriminator()  # 分类鉴别标识
        BuildingDate = Date_Id  # 建立日期
        LastAccessTime, LastMntTime = get_LastTime(Date_Id)
        RecMac = ""  # 记录Mac
        line_base = "{seq}".format(seq=seq).join(
            [str(Pid), str(HomeCif), str(CnName), str(PyName), str(GenderCode), str(Discriminator),
             str(BuildingDate), str(LastAccessTime), str(LastMntTime), RecMac]) + "\n"
        f_w_base.write(line_base)


def get_thread_count(count):
    """
    获取线程数量
    :param phone_count:
    :return:
    """
    size = 2000  # 每个线程模拟生成2000条数据
    thread_count = int(count / size) + 1
    count_list = []
    for i in range(1, thread_count):
        count_list.append((size * (i - 1) + 1, size * i))  # 获取多线程数据条数区间范围
        if i == thread_count - 1:
            if (count - i * size) == 0:
                thread_count = thread_count - 1
            else:
                count_list.append((size * i + 1, count))
    print("开启线程数{0}个".format(thread_count))
    return thread_count, count_list


def create_pre_phone_base(Area_code, Date_Id, count, seq='|!'):
    """
    多线程虚拟生成个人移动电话表数据和个人基本信息数据
    pid:Pid号
    serino:序号
    iddpre:国际长途区号
    phone_number:电话号码
    :param Date_Id: 日期
    :param count: 数据条数
    :return:
    """
    # 个人移动电话表
    phone_file = "{Area_code}-ABIS-D_P2MOBILE-3G-{Date_Id}.txt".format(Area_code=Area_code, Date_Id=Date_Id)
    # 个人基本信息表
    base_file = "{Area_code}-ABIS-D_PBAS-3G-{Date_Id}.txt".format(Area_code=Area_code, Date_Id=Date_Id)
    # cwd = os.getcwd()
    cwd = '/home/appuser/out_data/simula_data'
    # dir_path = (cwd + '/data/{date}'.format(date=str(Date_Id)[0:6])).replace('\\', '/')
    dir_path = os.path.join(cwd, '{date}'.format(date=str(Date_Id)))
    dir_path = dir_exists(dir_path)
    phone_file_path = os.path.join(dir_path, phone_file)
    base_file_path = os.path.join(dir_path, base_file)
    f_w_phone = open(phone_file_path, "a+", encoding="utf-8")  # 存放个人移动电话信息
    f_w_base = open(base_file_path, "a+", encoding="utf-8")  # 存放个人基本信息
    thread_count, count_list = get_thread_count(count)
    p = ThreadPoolExecutor(thread_count)
    for count in count_list:
        p.submit(write_pre_phone, args=(f_w_phone, count, Date_Id, seq))
        p.submit(write_pre_base, args=(f_w_base, count, Date_Id, seq))
    p.shutdown(True)
    f_w_phone.close()
    f_w_base.close()
    # 压缩文件为gz格式
    # print("开始压缩个人移动电话模拟数据和个人基本信息数据")
    # p1 = ThreadPoolExecutor(2)
    # for file in [phone_file_path, base_file_path]:
    #     p1.submit(gz_phone_file(file))
    # p1.shutdown(True)


if __name__ == '__main__':
    start = time.time()
    # Date_Id = datetime.now().strftime('%Y%m%d')
    Date_Id = 20191213
    count = 100000
    seq = '|!'  # 分隔符
    Area_code = 44  # 省份代码
    # 开始生成个人移动电话模拟数据和个人基本信息数据
    print("开始生成个人移动电话模拟数据和个人基本信息数据")
    create_pre_phone_base(Area_code, Date_Id, count, seq)
    end = time.time()
    print("模拟生成{count}条数据运行时间:{time_}".format(count=count, time_=(end - start)))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值