数据分析汇总学习
https://blog.youkuaiyun.com/weixin_39778570/article/details/81157884
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
# 打开美国航空公司数据
f = open('usa_flights.csv')
df = pd.read_csv(f)
df.head()
Out[6]:
flight_date unique_carrier flight_num origin dest arr_delay \
0 02/01/2015 0:00 AA 1 JFK LAX -19.0
1 03/01/2015 0:00 AA 1 JFK LAX -39.0
2 04/01/2015 0:00 AA 1 JFK LAX -12.0
3 05/01/2015 0:00 AA 1 JFK LAX -8.0
4 06/01/2015 0:00 AA 1 JFK LAX 25.0
cancelled distance carrier_delay weather_delay late_aircraft_delay \
0 0 2475 NaN NaN NaN
1 0 2475 NaN NaN NaN
2 0 2475 NaN NaN NaN
3 0 2475 NaN NaN NaN
4 0 2475 0.0 0.0 0.0
nas_delay security_delay actual_elapsed_time
0 NaN NaN 381.0
1 NaN NaN 358.0
2 NaN NaN 385.0
3 NaN NaN 389.0
4 25.0 0.0 424.0
# 对延误时间进行排序
df.sort_values('arr_delay', ascending=False).head()
Out[7]:
flight_date unique_carr