import pandas as pd
dfj = pd.DataFrame(np.random.randn(5, 2), columns=list('AB'))
dfj.to_json()
'{"A":{"0":-1.2945235903,"1":0.2766617129,"2":-0.0139597524,"3":-0.0061535699,"4":0.8957173022},"B":{"0":0.4137381054,"1":-0.472034511,"2":-0.3625429925,"3":-0.923060654,"4":0.8052440254}}'
dfjo = pd.DataFrame(dict(A=range(1, 4), B=range(4, 7), C=range(7, 10)),columns=list('ABC'), index=list('xyz'))
sjo = pd.Series(dict(x=15, y=16, z=17), name='D')
dfjo.to_json(orient="columns")
dfjo.to_json(orient="index")
sjo.to_json(orient="index")
dfjo.to_json(orient="records")
sjo.to_json(orient="records")
sjo.to_json(orient="split")
dfd.to_json(date_format='iso')
dfd.to_json(date_format='iso', date_unit='us')
dfd.to_json(date_format='epoch', date_unit='s')
pd.DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json(default_handler=str)
read_json()参数
dtype : if True, infer dtypes, if a dict of column to dtype, then use those, if False, then don’t infer dtypes at all, default is True, apply only to the data.
convert_axes : boolean, try to convert the axes to the proper dtypes, default is True
convert_dates : a list of columns to parse for dates; If True, then try to parse date-like columns, default is True.
keep_default_dates : boolean, default True. If parsing dates, then parse the default date-like columns.
numpy : direct decoding to NumPy arrays. default is False; Supports numeric data only, although labels may be non-numeric. Also note that the JSON ordering MUST be the same for each term if numpy=True.
precise_float : boolean, default False. Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (False) is to use fast but less precise builtin functionality.
date_unit : string, the timestamp unit to detect if converting dates. Default None. By default the timestamp precision will be detected, if this is not desired then pass one of ‘s’, ‘ms’, ‘us’ or ‘ns’ to force timestamp precision to seconds, milliseconds, microseconds or nanoseconds respectively.
lines : reads file as one json object per line.
encoding : The encoding to use to decode py3 bytes.
chunksize : when used in combination with lines=True, return a JsonReader which reads in chunksize lines per iteration.
pd.read_json('test.json', dtype=object).dtypes
pd.read_json('test.json', dtype={'A': 'float32', 'bools': 'int8'}).dtypes
pd.read_json(json, convert_axes=False)
json = dfj2.to_json(date_unit='ns')
dfju = pd.read_json(json, date_unit='ms')
pandas提供了一个实用功能,可以获取dict或dict列表,并将半结构化数据规范化为一个平面表。
from pandas.io.json import json_normalize
data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},{'name': {'given': 'Mose', 'family': 'Regner'}},{'id': 2, 'name': 'Faye Raker'}]
json_normalize(data)
data = [{'state': 'Florida',
.....: 'shortname': 'FL',
.....: 'info': {'governor': 'Rick Scott'},
.....: 'counties': [{'name': 'Dade', 'population': 12345},
.....: {'name': 'Broward', 'population': 40000},
.....: {'name': 'Palm Beach', 'population': 60000}]},
.....: {'state': 'Ohio',
.....: 'shortname': 'OH',
.....: 'info': {'governor': 'John Kasich'},
.....: 'counties': [{'name': 'Summit', 'population': 1234},
.....: {'name': 'Cuyahoga', 'population': 1337}]}]
json_normalize(data, 'counties', ['state', 'shortname', ['info', 'governor']])
data = [{'CreatedBy': {'Name': 'User001'},
.....: 'Lookup': {'TextField': 'Some text',
.....: 'UserField': {'Id': 'ID001',
.....: 'Name': 'Name001'}},
.....: 'Image': {'a': 'b'}
.....: }]
json_normalize(data, max_level=1)
dfs = pd.read_html(url, header=0)
dfs = pd.read_html(url, index_col=0)
dfs = pd.read_html(url, skiprows=0)
dfs = pd.read_html(url, skiprows=range(2))