CS 564 Homework #2 ER Modeling & Schema Design

本文介绍了如何使用Python的json和pickle库处理数据,包括日期时间格式转换、金额和字符串格式化,以及从JSON文件中解析数据并存储到不同表格中,用于数据库设计

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

在这里插入图片描述
在这里插入图片描述

处理数据python文件

import json
import pickle
from re import sub

# Dictionary of months used for date transformation
MONTHS = {'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06', \
          'Jul': '07', 'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'}


def transformMonth(mon):
    if mon in MONTHS:
        return MONTHS[mon]
    else:
        return mon


"""
Transforms a timestamp from Mon-DD-YY HH:MM:SS to YYYY-MM-DD HH:MM:SS
"""


def transformDttm(dttm):
    dttm = dttm.strip().split(' ')
    dt = dttm[0].split('-')
    date = '20' + dt[2] + '-'
    date += transformMonth(dt[0]) + '-' + dt[1]
    return date + ' ' + dttm[1]


def transformDollar(money):
    if money == None or len(money) == 0:
        return 'NULL'
    return sub(r'[^\d.]', '', money)


def transformStr(string):
    content = "\""
    if not string:
        return 'NULL'

    for s in string:
        if s == '\"':
            s += '\"'
        else:
            content += s
    content += "\""
    return content


def parseJson(filename, ID):
    # 定义文件路径
    path = './ebay_data/'
    cate_table = []
    item_table = []
    item_bid_table = []
    bid_table = []
    seller_table = []
    bidder_table = []
    BidID = ID[0]
    CateID = ID[1]

    # 打开文件,r是读取,encoding是指定编码格式
    with open(path + filename, 'r', encoding='utf-8') as fp:
        data = json.load(fp)
    for da in data['Items']:
        ItemID = da['ItemID']
        Name = transformStr(da['Name'])
        Category = da['Category']
        Currently = transformDollar(da['Currently'])
        First_Bid = transformDollar(da['First_Bid'])
        Number_of_Bids = da['Number_of_Bids']
        Bids = da['Bids']
        Location = transformStr(da['Location'])
        Country = transformStr(da['Country'])
        Started = transformDttm(da['Started'])
        Ends = transformDttm(da['Ends'])
        Seller = da['Seller']
        UserID = transformStr(Seller['UserID'])
        Rating = Seller['Rating']
        Description = transformStr(da['Description'])
        if 'Buy_Price' in da.keys():
            Buy_Price = transformDollar(da['Buy_Price'])
        else:
            Buy_Price = "0"

        # User( < u > UserID < / u >, isSeller, isBidder, Rating, Location)
        seller_table.append(
            UserID + '|' + Rating + '|' + Location + '|' + Country)

        # item Item(<u>ItemID</u>, Name, UserID, Description)
        item_table.append(ItemID + '|' + Name + '|' + UserID + '|' + Description)

        # cate
        for cate in Category:
            CateID += 1
            cate_table.append(str(CateID) + '|' + ItemID + '|' + cate)

        # ItemBid (<u>ItemID</u>, Currently, First_Bid, Number_of_Bids, Started, Ends, Buy_Price)
        item_bid_table.append(ItemID + '|' + Currently + '|' + First_Bid + '|'
                              + Number_of_Bids + '|' + Started + '|' + Ends + '|' + Buy_Price)

        # Bid(<u>BidID</u>, ItemID, UserID, Time, Amount)
        if Bids:
            for bid in Bids:
                bid = bid['Bid']
                bidder = bid['Bidder']
                Time = transformDttm(bid['Time'])
                Amount = transformDollar(bid['Amount'])
                UserID_1 = transformStr(bidder['UserID'])
                Rating_1 = bidder['Rating']
                if 'Location' in bidder.keys():
                    Location_1 = transformStr(bidder['Location'])
                else:
                    Location_1 = 'NULL'
                if 'Country' in bidder.keys():
                    Country_1 = transformStr(bidder['Country'])
                else:
                    Country_1 = 'NULL'

                BidID += 1
                bid_table.append(str(BidID) + '|' + ItemID + '|' + UserID_1
                                 + '|' + Time + '|' + Amount)
                # User(<u>UserID</u>, isSeller, isBidder, Rating, Location)
                bidder_table.append(
                    UserID_1 + '|' + Rating_1 + '|' + Location_1 + '|' + Country_1)
    fp.close()
    cate_str = '\n'.join(cate_table)
    save_dat(cate_str, "cate")

    item_str = '\n'.join(item_table)
    save_dat(item_str, "item")

    item_bid_str = '\n'.join(item_bid_table)
    save_dat(item_bid_str, "item_bid")

    bid_table_str = '\n'.join(bid_table)
    save_dat(bid_table_str, "bid")

    seller_table_str = '\n'.join(seller_table)
    save_dat(seller_table_str, "seller")

    bidder_table_str = '\n'.join(bidder_table)
    save_dat(bidder_table_str, "bidder")
    ID = [BidID, CateID]
    return ID


def save_dat(data, file):
    f = open(file + ".dat", "ab")
    data = "\n" + data + "\n"
    pickle.dump(data, f)
    f.close()


def main():
    ID = [10000, 10000]
    for i in range(40):
        filename = "items-" + str(i) + ".json"
        ID = parseJson(filename, ID)


if __name__ == '__main__':
    main()

数据库设计
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值