python转换xsl到csv与整理json

最新推荐文章于 2023-10-29 16:17:39 发布

月夜星星雨

最新推荐文章于 2023-10-29 16:17:39 发布

阅读量432

点赞数

CC 4.0 BY-SA版权

分类专栏： python数据分析

本文链接：https://blog.youkuaiyun.com/m493096871/article/details/89489714

python数据分析专栏收录该内容

13 篇文章

订阅专栏

本文详细介绍了一种从Excel文件中解析最大负荷数据并将其写入CSV文件的方法，使用Python的xlrd库读取Excel表格数据，通过遍历特定区域找到最大值及其对应的时间，再将结果以管道符为分隔符输出到CSV文件中。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

# -*- coding: utf-8 -*-
'''
Find the time and value of max load for each of the regions
COAST, EAST, FAR_WEST, NORTH, NORTH_C, SOUTHERN, SOUTH_C, WEST
and write the result out in a csv file, using pipe character | as the delimiter.

An example output can be seen in the "example.csv" file.
'''

import xlrd
import os
import csv
from zipfile import ZipFile

datafile = "2013_ERCOT_Hourly_Load_Data.xls"
outfile = "2013_Max_Loads.csv"

def open_zip(datafile):
with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
myzip.extractall()

def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
data = {}
# process all rows that contain station data
for n in range (1, 9):
station = sheet.cell_value(0, n)
cv = sheet.col_values(n, start_rowx=1, end_rowx=None)

maxval = max(cv)
maxpos = cv.index(maxval) + 1
maxtime = sheet.cell_value(maxpos, 0)
realtime = xlrd.xldate_as_tuple(maxtime, 0)
data[station] = {"maxval": maxval,
"maxtime": realtime}

print data
return data

def save_file(data, filename):
with open(filename, "w") as f:
w = csv.writer(f, delimiter='|')
w.writerow(["Station", "Year", "Month", "Day", "Hour", "Max Load"])
for s in data:
year, month, day, hour, _ , _= data[s]["maxtime"]
w.writerow([s, year, month, day, hour, data[s]["maxval"]])

def test():
open_zip(datafile)
data = parse_file(datafile)
save_file(data, outfile)

number_of_rows = 0
stations = []

ans = {'FAR_WEST': {'Max Load': '2281.2722140000024',
'Year': '2013',
'Month': '6',
'Day': '26',
'Hour': '17'}}
correct_stations = ['COAST', 'EAST', 'FAR_WEST', 'NORTH',
'NORTH_C', 'SOUTHERN', 'SOUTH_C', 'WEST']
fields = ['Year', 'Month', 'Day', 'Hour', 'Max Load']

with open(outfile) as of:
csvfile = csv.DictReader(of, delimiter="|")
for line in csvfile:
station = line['Station']
if station == 'FAR_WEST':
for field in fields:
# Check if 'Max Load' is within .1 of answer
if field == 'Max Load':
max_answer = round(float(ans[station][field]), 1)
max_line = round(float(line[field]), 1)
assert max_answer == max_line

# Otherwise check for equality
else:
assert ans[station][field] == line[field]

number_of_rows += 1
stations.append(station)

# Output should be 8 lines not including header
assert number_of_rows == 8

# Check Station Names
assert set(stations) == set(correct_stations)

if __name__ == "__main__":
test()

###########

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This exercise shows some important concepts that you should be aware about:
- using codecs module to write unicode files
- using authentication with web APIs
- using offset when accessing web APIs

To run this code locally you have to register at the NYTimes developer site
and get your own API key. You will be able to complete this exercise in our UI
without doing so, as we have provided a sample result. (See the file
'popular-viewed-1.json' from the tabs above.)

Your task is to modify the article_overview() function to process the saved
file that represents the most popular articles (by view count) from the last
day, and return a tuple of variables containing the following data:
- labels: list of dictionaries, where the keys are the "section" values and
values are the "title" values for each of the retrieved articles.
- urls: list of URLs for all 'media' entries with "format": "Standard Thumbnail"

All your changes should be in the article_overview() function. See the test()
function for examples of the elements of the output lists.
The rest of functions are provided for your convenience, if you want to access
the API by yourself.
"""
import json
import codecs
import requests

URL_MAIN = "http://api.nytimes.com/svc/"
URL_POPULAR = URL_MAIN + "mostpopular/v2/"
API_KEY = { "popular": "",
"article": ""}

def get_from_file(kind, period):
filename = "popular-{0}-{1}.json".format(kind, period)
with open(filename, "r") as f:
return json.loads(f.read())

def article_overview(kind, period):
data = get_from_file(kind, period)
titles = []
urls =[]

for article in data:
section = article["section"]
title = article["title"]
titles.append({section: title})
if "media" in article:
for m in article["media"]:
for mm in m["media-metadata"]:
if mm["format"] == "Standard Thumbnail":
urls.append(mm["url"])
return (titles, urls)

def query_site(url, target, offset):
# This will set up the query with the API key and offset
# Web services often use offset paramter to return data in small chunks
# NYTimes returns 20 articles per request, if you want the next 20
# You have to provide the offset parameter
if API_KEY["popular"] == "" or API_KEY["article"] == "":
print "You need to register for NYTimes Developer account to run this program."
print "See Intructor notes for information"
return False
params = {"api-key": API_KEY[target], "offset": offset}
r = requests.get(url, params = params)

if r.status_code == requests.codes.ok:
return r.json()
else:
r.raise_for_status()

def get_popular(url, kind, days, section="all-sections", offset=0):
# This function will construct the query according to the requirements of the site
# and return the data, or print an error message if called incorrectly
if days not in [1,7,30]:
print "Time period can be 1,7, 30 days only"
return False
if kind not in ["viewed", "shared", "emailed"]:
print "kind can be only one of viewed/shared/emailed"
return False

url += "most{0}/{1}/{2}.json".format(kind, section, days)
data = query_site(url, "popular", offset)

return data

def save_file(kind, period):
# This will process all results, by calling the API repeatedly with supplied offset value,
# combine the data and then write all results in a file.
data = get_popular(URL_POPULAR, "viewed", 1)
num_results = data["num_results"]
full_data = []
with codecs.open("popular-{0}-{1}.json".format(kind, period), encoding='utf-8', mode='w') as v:
for offset in range(0, num_results, 20):
data = get_popular(URL_POPULAR, kind, period, offset=offset)
full_data += data["results"]

v.write(json.dumps(full_data, indent=2))

def test():
titles, urls = article_overview("viewed", 1)
assert len(titles) == 20
assert len(urls) == 30
assert titles[2] == {'Opinion': 'Professors, We Need You!'}
assert urls[20] == 'http://graphics8.nytimes.com/images/2014/02/17/sports/ICEDANCE/ICEDANCE-thumbStandard.jpg'

if __name__ == "__main__":
test()