pandas 第1课

How do I read a tabular data file into pandas?


In [2]:
import pandas as pd


In [6]:
orders = pd.read_table('http://bit.ly/chiporders') #需要翻墙


In [8]:
orders.head()


Out[8]:
order_idquantityitem_namechoice_descriptionitem_price
011Chips and Fresh Tomato SalsaNaN$2.39
111Izze[Clementine]$3.39
211Nantucket Nectar[Apple]$3.39
311Chips and Tomatillo-Green Chili SalsaNaN$2.39
422Chicken Bowl[Tomatillo-Red Chili Salsa (Hot), [Black Beans…$16.98


In [11]:
pd.read_table('http://bit.ly/movieusers')


Out[11]:
1|24|M|technician|85711
02|53|F|other|94043
13|23|M|writer|32067
24|24|M|technician|43537
35|33|F|other|15213
46|42|M|executive|98101
57|57|M|administrator|91344
68|36|M|administrator|05201
79|29|M|student|01002
810|53|M|lawyer|90703
911|39|F|other|30329
1012|28|F|other|06405
1113|47|M|educator|29206
1214|45|M|scientist|55106
1315|49|F|educator|97301
1416|21|M|entertainment|10309
1517|30|M|programmer|06355
1618|35|F|other|37212
1719|40|M|librarian|02138
1820|42|F|homemaker|95660
1921|26|M|writer|30068
2022|25|M|writer|40206
2123|30|F|artist|48197
2224|21|F|artist|94533
2325|39|M|engineer|55107
2426|49|M|engineer|21044
2527|40|F|librarian|30030
2628|32|M|writer|55369
2729|41|M|programmer|94043
2830|7|M|student|55436
2931|24|M|artist|10003
912914|44|F|other|08105
913915|50|M|entertainment|60614
914916|27|M|engineer|N2L5N
915917|22|F|student|20006
916918|40|M|scientist|70116
917919|25|M|other|14216
918920|30|F|artist|90008
919921|20|F|student|98801
920922|29|F|administrator|21114
921923|21|M|student|E2E3R
922924|29|M|other|11753
923925|18|F|salesman|49036
924926|49|M|entertainment|01701
925927|23|M|programmer|55428
926928|21|M|student|55408
927929|44|M|scientist|53711
928930|28|F|scientist|07310
929931|60|M|educator|33556
930932|58|M|educator|06437
931933|28|M|student|48105
932934|61|M|engineer|22902
933935|42|M|doctor|66221
934936|24|M|other|32789
935937|48|M|educator|98072
936938|38|F|technician|55038
937939|26|F|student|33319
938940|32|M|administrator|02215
939941|20|M|student|97229
940942|48|F|librarian|78209
941943|22|M|student|77841

942 rows × 1 columns


In [15]:
pd.read_table('http://bit.ly/movieusers',sep='|')    # 将数据按|进行分隔


Out[15]:
124Mtechnician85711
0253Fother94043
1323Mwriter32067
2424Mtechnician43537
3533Fother15213
4642Mexecutive98101
5757Madministrator91344
6836Madministrator05201
7929Mstudent01002
81053Mlawyer90703
91139Fother30329
101228Fother06405
111347Meducator29206
121445Mscientist55106
131549Feducator97301
141621Mentertainment10309
151730Mprogrammer06355
161835Fother37212
171940Mlibrarian02138
182042Fhomemaker95660
192126Mwriter30068
202225Mwriter40206
212330Fartist48197
222421Fartist94533
232539Mengineer55107
242649Mengineer21044
252740Flibrarian30030
262832Mwriter55369
272941Mprogrammer94043
28307Mstudent55436
293124Martist10003
91291444Fother08105
91391550Mentertainment60614
91491627MengineerN2L5N
91591722Fstudent20006
91691840Mscientist70116
91791925Mother14216
91892030Fartist90008
91992120Fstudent98801
92092229Fadministrator21114
92192321MstudentE2E3R
92292429Mother11753
92392518Fsalesman49036
92492649Mentertainment01701
92592723Mprogrammer55428
92692821Mstudent55408
92792944Mscientist53711
92893028Fscientist07310
92993160Meducator33556
93093258Meducator06437
93193328Mstudent48105
93293461Mengineer22902
93393542Mdoctor66221
93493624Mother32789
93593748Meducator98072
93693838Ftechnician55038
93793926Fstudent33319
93894032Madministrator02215
93994120Mstudent97229
94094248Flibrarian78209
94194322Mstudent77841

942 rows × 5 columns


In [18]:
pd.read_table('http://bit.ly/movieusers',sep='|',header=None)    # 告诉pandas第一行不是表头


Out[18]:
01234
0124Mtechnician85711
1253Fother94043
2323Mwriter32067
3424Mtechnician43537
4533Fother15213
5642Mexecutive98101
6757Madministrator91344
7836Madministrator05201
8929Mstudent01002
91053Mlawyer90703
101139Fother30329
111228Fother06405
121347Meducator29206
131445Mscientist55106
141549Feducator97301
151621Mentertainment10309
161730Mprogrammer06355
171835Fother37212
181940Mlibrarian02138
192042Fhomemaker95660
202126Mwriter30068
212225Mwriter40206
222330Fartist48197
232421Fartist94533
242539Mengineer55107
252649Mengineer21044
262740Flibrarian30030
272832Mwriter55369
282941Mprogrammer94043
29307Mstudent55436
91391444Fother08105
91491550Mentertainment60614
91591627MengineerN2L5N
91691722Fstudent20006
91791840Mscientist70116
91891925Mother14216
91992030Fartist90008
92092120Fstudent98801
92192229Fadministrator21114
92292321MstudentE2E3R
92392429Mother11753
92492518Fsalesman49036
92592649Mentertainment01701
92692723Mprogrammer55428
92792821Mstudent55408
92892944Mscientist53711
92993028Fscientist07310
93093160Meducator33556
93193258Meducator06437
93293328Mstudent48105
93393461Mengineer22902
93493542Mdoctor66221
93593624Mother32789
93693748Meducator98072
93793838Ftechnician55038
93893926Fstudent33319
93994032Madministrator02215
94094120Mstudent97229
94194248Flibrarian78209
94294322Mstudent77841

943 rows × 5 columns


In [22]:
#给pandas添加表头
user_cols = ['user_id','age','gender','occupation','zip_code'] #occupation n. 职业  zip code 邮编
pd.read_table('http://bit.ly/movieusers',sep='|',header=None, names=user_cols )    # 告诉pandas第一行不是表头


Out[22]:
user_idagegenderoccupationzip_code
0124Mtechnician85711
1253Fother94043
2323Mwriter32067
3424Mtechnician43537
4533Fother15213
5642Mexecutive98101
6757Madministrator91344
7836Madministrator05201
8929Mstudent01002
91053Mlawyer90703
101139Fother30329
111228Fother06405
121347Meducator29206
131445Mscientist55106
141549Feducator97301
151621Mentertainment10309
161730Mprogrammer06355
171835Fother37212
181940Mlibrarian02138
192042Fhomemaker95660
202126Mwriter30068
212225Mwriter40206
222330Fartist48197
232421Fartist94533
242539Mengineer55107
252649Mengineer21044
262740Flibrarian30030
272832Mwriter55369
282941Mprogrammer94043
29307Mstudent55436
91391444Fother08105
91491550Mentertainment60614
91591627MengineerN2L5N
91691722Fstudent20006
91791840Mscientist70116
91891925Mother14216
91992030Fartist90008
92092120Fstudent98801
92192229Fadministrator21114
92292321MstudentE2E3R
92392429Mother11753
92492518Fsalesman49036
92592649Mentertainment01701
92692723Mprogrammer55428
92792821Mstudent55408
92892944Mscientist53711
92993028Fscientist07310
93093160Meducator33556
93193258Meducator06437
93293328Mstudent48105
93393461Mengineer22902
93493542Mdoctor66221
93593624Mother32789
93693748Meducator98072
93793838Ftechnician55038
93893926Fstudent33319
93994032Madministrator02215
94094120Mstudent97229
94194248Flibrarian78209
94294322Mstudent77841

943 rows × 5 columns


In [23]:
#给pandas添加表头
user_cols = ['user_id','age','gender','occupation','zip_code'] #occupation n. 职业  zip code 邮编
users = pd.read_table('http://bit.ly/movieusers',sep='|',header=None, names=user_cols )    # 告诉pandas第一行不是表头


In [25]:
users.head()


Out[25]:
user_idagegenderoccupationzip_code
0124Mtechnician85711
1253Fother94043
2323Mwriter32067
3424Mtechnician43537
4533Fother15213


In [ ]:
 






评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值