处理数据python文件
import json
import pickle
from re import sub
# Dictionary of months used for date transformation
MONTHS = {'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06', \
'Jul': '07', 'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'}
def transformMonth(mon):
if mon in MONTHS:
return MONTHS[mon]
else:
return mon
"""
Transforms a timestamp from Mon-DD-YY HH:MM:SS to YYYY-MM-DD HH:MM:SS
"""
def transformDttm(dttm):
dttm = dttm.strip().split(' ')
dt = dttm[0].split('-')
date = '20' + dt[2] + '-'
date += transformMonth(dt[0]) + '-' + dt[1]
return date + ' ' + dttm[1]
def transformDollar(money):
if money == None or len(money) == 0:
return 'NULL'
return sub(r'[^\d.]', '', money)
def transformStr(string):
content = "\""
if not string:
return 'NULL'
for s in string:
if s == '\"':
s += '\"'
else:
content += s
content += "\""
return content
def parseJson(filename, ID):
# 定义文件路径
path = './ebay_data/'
cate_table = []
item_table = []
item_bid_table = []
bid_table = []
seller_table = []
bidder_table = []
BidID = ID[0]
CateID = ID[1]
# 打开文件,r是读取,encoding是指定编码格式
with open(path + filename, 'r', encoding='utf-8') as fp:
data = json.load(fp)
for da in data['Items']:
ItemID = da['ItemID']
Name = transformStr(da['Name'])
Category = da['Category']
Currently = transformDollar(da['Currently'])
First_Bid = transformDollar(da['First_Bid'])
Number_of_Bids = da['Number_of_Bids']
Bids = da['Bids']
Location = transformStr(da['Location'])
Country = transformStr(da['Country'])
Started = transformDttm(da['Started'])
Ends = transformDttm(da['Ends'])
Seller = da['Seller']
UserID = transformStr(Seller['UserID'])
Rating = Seller['Rating']
Description = transformStr(da['Description'])
if 'Buy_Price' in da.keys():
Buy_Price = transformDollar(da['Buy_Price'])
else:
Buy_Price = "0"
# User( < u > UserID < / u >, isSeller, isBidder, Rating, Location)
seller_table.append(
UserID + '|' + Rating + '|' + Location + '|' + Country)
# item Item(<u>ItemID</u>, Name, UserID, Description)
item_table.append(ItemID + '|' + Name + '|' + UserID + '|' + Description)
# cate
for cate in Category:
CateID += 1
cate_table.append(str(CateID) + '|' + ItemID + '|' + cate)
# ItemBid (<u>ItemID</u>, Currently, First_Bid, Number_of_Bids, Started, Ends, Buy_Price)
item_bid_table.append(ItemID + '|' + Currently + '|' + First_Bid + '|'
+ Number_of_Bids + '|' + Started + '|' + Ends + '|' + Buy_Price)
# Bid(<u>BidID</u>, ItemID, UserID, Time, Amount)
if Bids:
for bid in Bids:
bid = bid['Bid']
bidder = bid['Bidder']
Time = transformDttm(bid['Time'])
Amount = transformDollar(bid['Amount'])
UserID_1 = transformStr(bidder['UserID'])
Rating_1 = bidder['Rating']
if 'Location' in bidder.keys():
Location_1 = transformStr(bidder['Location'])
else:
Location_1 = 'NULL'
if 'Country' in bidder.keys():
Country_1 = transformStr(bidder['Country'])
else:
Country_1 = 'NULL'
BidID += 1
bid_table.append(str(BidID) + '|' + ItemID + '|' + UserID_1
+ '|' + Time + '|' + Amount)
# User(<u>UserID</u>, isSeller, isBidder, Rating, Location)
bidder_table.append(
UserID_1 + '|' + Rating_1 + '|' + Location_1 + '|' + Country_1)
fp.close()
cate_str = '\n'.join(cate_table)
save_dat(cate_str, "cate")
item_str = '\n'.join(item_table)
save_dat(item_str, "item")
item_bid_str = '\n'.join(item_bid_table)
save_dat(item_bid_str, "item_bid")
bid_table_str = '\n'.join(bid_table)
save_dat(bid_table_str, "bid")
seller_table_str = '\n'.join(seller_table)
save_dat(seller_table_str, "seller")
bidder_table_str = '\n'.join(bidder_table)
save_dat(bidder_table_str, "bidder")
ID = [BidID, CateID]
return ID
def save_dat(data, file):
f = open(file + ".dat", "ab")
data = "\n" + data + "\n"
pickle.dump(data, f)
f.close()
def main():
ID = [10000, 10000]
for i in range(40):
filename = "items-" + str(i) + ".json"
ID = parseJson(filename, ID)
if __name__ == '__main__':
main()
数据库设计