import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv('taxiGps20200618.csv')
df
| RUNNING_STATUS | GPS_SPEED | DRIVING_DIRECTION | GPS_DATE | LONGITUDE | LATITUDE | CARNO |
|---|
| 0 | 9 | 0.0 | 0 | 2020-06-18 11:17:07 | 118.153315 | 24.484221 | 49550f5b7501cb7c204cf7f7831748dd |
|---|
| 1 | 1 | 0.0 | 0 | 2020-06-18 11:17:23 | 118.154195 | 24.483916 | 49550f5b7501cb7c204cf7f7831748dd |
|---|
| 2 | 8 | 0.0 | 0 | 2020-06-18 16:23:53 | 118.155210 | 24.453045 | 2263295e0de66305ebb2e5c40c841214 |
|---|
| 3 | 1 | 0.0 | 218 | 2020-06-18 16:53:51 | 118.105983 | 24.588700 | 6d815f5a600bd79e8065e55b089b0cac |
|---|
| 4 | 1 | 0.0 | 218 | 2020-06-18 16:53:27 | 118.105983 | 24.588700 | 6d815f5a600bd79e8065e55b089b0cac |
|---|
| 5 | 1 | 0.0 | 0 | 2020-06-18 11:17:54 | 118.153708 | 24.484171 | 49550f5b7501cb7c204cf7f7831748dd |
|---|
| 6 | 1 | 0.0 | 0 | 2020-06-18 12:36:00 | 118.153506 | 24.484160 | 49550f5b7501cb7c204cf7f7831748dd |
|---|
| 7 | 1 | 0.0 | 0 | 2020-06-18 11:18:24 | 118.153575 | 24.484153 | 49550f5b7501cb7c204cf7f7831748dd |
|---|
| 8 | 1 | 0.0 | 0 | 2020-06-18 12:37:03 | 118.153506 | 24.484160 | 49550f5b7501cb7c204cf7f7831748dd |
|---|
| 9 | 1 | 0.0 | 0 | 2020-06-18 11:18:55 | 118.153575 | 24.484153 | 49550f5b7501cb7c204cf7f7831748dd |
|---|
| 10 | 9 | 0.0 | 0 | 2020-06-18 11:20:02 | 118.153575 | 24.484153 | 49550f5b7501cb7c204cf7f7831748dd |
|---|
| 11 | 8 | 0.0 | 0 | 2020-06-18 16:23:53 | 118.155210 | 24.453045 | 2263295e0de66305ebb2e5c40c841214 |
|---|
| 12 | 1 | 0.0 | 0 | 2020-06-18 16:21:35 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 13 | 1 | 0.0 | 0 | 2020-06-18 16:25:33 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 14 | 1 | 0.0 | 0 | 2020-06-18 16:27:01 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 15 | 1 | 0.0 | 0 | 2020-06-18 10:19:00 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 16 | 1 | 0.0 | 0 | 2020-06-18 16:24:33 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 17 | 1 | 0.0 | 0 | 2020-06-18 16:22:52 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 18 | 1 | 0.0 | 0 | 2020-06-18 16:23:57 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 19 | 1 | 0.0 | 0 | 2020-06-18 16:26:33 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 20 | 1 | 0.0 | 0 | 2020-06-18 10:19:30 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 21 | 1 | 0.0 | 0 | 2020-06-18 16:26:03 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22 | 1 | 0.0 | 0 | 2020-06-18 16:21:29 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 23 | 1 | 0.0 | 0 | 2020-06-18 16:22:05 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 24 | 1 | 0.0 | 0 | 2020-06-18 16:24:03 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 25 | 1 | 0.0 | 0 | 2020-06-18 10:17:55 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 26 | 1 | 0.0 | 0 | 2020-06-18 16:22:35 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 27 | 1 | 0.0 | 0 | 2020-06-18 10:18:30 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 28 | 1 | 0.0 | 0 | 2020-06-18 16:25:03 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 29 | 1 | 0.0 | 219 | 2020-06-18 20:11:22 | 118.027708 | 24.576800 | 9d7c1e9850aa9e80d9b6df2c12f3be0c |
|---|
| ... | ... | ... | ... | ... | ... | ... | ... |
|---|
| 22259444 | 1 | 0.0 | 0 | 2020-06-18 10:10:50 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259445 | 1 | 0.0 | 0 | 2020-06-18 10:12:21 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259446 | 1 | 0.0 | 0 | 2020-06-18 10:13:55 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259447 | 1 | 0.0 | 0 | 2020-06-18 10:13:37 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259448 | 1 | 0.0 | 0 | 2020-06-18 10:16:55 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259449 | 1 | 0.0 | 0 | 2020-06-18 10:05:16 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259450 | 1 | 0.0 | 0 | 2020-06-18 10:10:20 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259451 | 1 | 0.0 | 0 | 2020-06-18 10:08:20 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259452 | 1 | 0.0 | 0 | 2020-06-18 10:09:50 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259453 | 1 | 0.0 | 0 | 2020-06-18 10:11:20 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259454 | 1 | 0.0 | 0 | 2020-06-18 10:14:55 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259455 | 1 | 0.0 | 0 | 2020-06-18 10:16:25 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259456 | 1 | 0.0 | 0 | 2020-06-18 10:05:52 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259457 | 1 | 0.0 | 0 | 2020-06-18 10:05:22 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259458 | 1 | 0.0 | 0 | 2020-06-18 10:07:50 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259459 | 1 | 0.0 | 0 | 2020-06-18 10:06:35 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259460 | 1 | 0.0 | 0 | 2020-06-18 10:12:51 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259461 | 1 | 0.0 | 0 | 2020-06-18 10:11:50 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259462 | 1 | 0.0 | 0 | 2020-06-18 10:15:55 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259463 | 1 | 0.0 | 0 | 2020-06-18 10:06:16 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259464 | 1 | 0.0 | 0 | 2020-06-18 10:06:22 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259465 | 1 | 0.0 | 0 | 2020-06-18 10:13:25 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259466 | 1 | 0.0 | 0 | 2020-06-18 10:08:15 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259467 | 1 | 0.0 | 0 | 2020-06-18 10:07:20 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259468 | 1 | 0.0 | 0 | 2020-06-18 10:07:15 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259469 | 1 | 0.0 | 0 | 2020-06-18 10:08:50 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259470 | 1 | 0.0 | 0 | 2020-06-18 10:14:25 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259471 | 1 | 0.0 | 0 | 2020-06-18 10:17:25 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259472 | 1 | 0.0 | 0 | 2020-06-18 10:15:25 | 0.000000 | 0.000000 | 27887e2a10652e5869310b4f1bf340f4 |
|---|
| 22259473 | 8 | 0.0 | 0 | 2020-06-18 08:50:17 | 118.155196 | 24.452791 | d4ff8b3b82659a913adc37eab5c73c3d |
|---|
22259474 rows × 7 columns
df['DRIVING_DIRECTION'].value_counts()
0 2599647
254 152899
252 145160
74 139602
72 131728
270 120226
256 114278
250 110893
272 108768
258 105968
90 104624
264 103306
92 103157
84 102944
76 102777
268 101334
266 101128
262 99976
260 99433
274 99382
276 98322
244 97674
94 96767
70 95441
248 94830
294 92827
170 91701
82 91467
322 91081
16 90672
...
41 25740
145 25727
217 25521
131 25430
49 25388
45 25367
59 25264
119 25235
143 25202
137 25133
53 25085
33 25006
311 24903
61 24871
309 24813
139 24778
113 24728
47 24217
31 24181
29 24040
307 23991
111 23937
127 23729
35 23671
103 23610
43 23377
107 22666
109 22369
37 21588
360 13294
Name: DRIVING_DIRECTION, Length: 361, dtype: int64
df.describe().round(2)
| RUNNING_STATUS | GPS_SPEED | DRIVING_DIRECTION | LONGITUDE | LATITUDE |
|---|
| count | 22259474.00 | 22259474.00 | 22259474.00 | 22259474.00 | 22259474.00 |
|---|
| mean | 1.59 | 16.52 | 162.57 | 117.91 | 24.47 |
|---|
| std | 1.52 | 21.21 | 112.96 | 5.01 | 1.05 |
|---|
| min | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
|---|
| 25% | 1.00 | 0.00 | 63.00 | 118.10 | 24.48 |
|---|
| 50% | 1.00 | 7.10 | 166.00 | 118.12 | 24.50 |
|---|
| 75% | 2.00 | 28.90 | 260.00 | 118.15 | 24.52 |
|---|
| max | 41.00 | 1866.00 | 360.00 | 128.60 | 36.83 |
|---|
df[df.LATITUDE >= 90]
| RUNNING_STATUS | GPS_SPEED | DRIVING_DIRECTION | GPS_DATE | LONGITUDE | LATITUDE | CARNO |
|---|
df[df.LATITUDE <= 20]['LATITUDE'].unique()
array([ 0. , 18.735893, 16.720076, 18.477973, 19.545486, 18.189498,
17.827593, 19.173525, 18.357385, 19.698856, 18.326236, 18.322025,
18.33303 , 18.344588, 18.351545, 18.38139 , 18.381683, 18.314838,
18.319148, 18.317458, 18.301231, 18.304743, 18.30703 , 19.540326,
18.310495, 18.312541, 18.338078, 18.337708, 18.34453 ])
打卡任务
统计巡游车GPS数据在20190603中包含多少俩出租车🚖?
统计网约车GPS数据在20190603中包含多少俩网约车🚗?
统计巡游车订单数据在20190603中上车经纬度的最大最小值?
统计网约车订单数据集在20190603中下车经纬度最常见的位置?
假设经度+维度,各保留三维有效数字组合得到具体位置
小提示:可以将经纬度拼接到一起进行统计
df.columns
Index(['RUNNING_STATUS', 'GPS_SPEED', 'DRIVING_DIRECTION', 'GPS_DATE',
'LONGITUDE', 'LATITUDE', 'CARNO'],
dtype='object')
len(df.CARNO.unique())
6647
df.describe().round(3)
| RUNNING_STATUS | GPS_SPEED | DRIVING_DIRECTION | LONGITUDE | LATITUDE |
|---|
| count | 2.225947e+07 | 2.225947e+07 | 2.225947e+07 | 2.225947e+07 | 2.225947e+07 |
|---|
| mean | 1.592000e+00 | 1.652300e+01 | 1.625740e+02 | 1.179080e+02 | 2.446800e+01 |
|---|
| std | 1.524000e+00 | 2.120900e+01 | 1.129550e+02 | 5.007000e+00 | 1.045000e+00 |
|---|
| min | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 |
|---|
| 25% | 1.000000e+00 | 0.000000e+00 | 6.300000e+01 | 1.180970e+02 | 2.447900e+01 |
|---|
| 50% | 1.000000e+00 | 7.100000e+00 | 1.660000e+02 | 1.181230e+02 | 2.449600e+01 |
|---|
| 75% | 2.000000e+00 | 2.890000e+01 | 2.600000e+02 | 1.181490e+02 | 2.452300e+01 |
|---|
| max | 4.100000e+01 | 1.866000e+03 | 3.600000e+02 | 1.286000e+02 | 3.682800e+01 |
|---|
sns.kdeplot(df.LATITUDE.value_counts())
<matplotlib.axes._subplots.AxesSubplot at 0x1c389fade10>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-0wVeEGj1-1599817423523)(output_11_1.png)]
0.000000 40050
24.533688 5519
24.539665 5079
24.516658 4829
24.690826 4671
24.467685 4442
24.525995 4216
24.474758 3924
24.477215 3823
24.501808 3626
24.510193 3610
24.492511 3574
24.668578 3563
24.480398 3532
24.690756 3509
24.471203 3461
24.480805 3425
24.481183 3297
24.481280 3287
24.512276 3265
24.508940 3246
24.490398 3236
24.519206 3196
24.511568 3194
24.499750 3115
24.512401 3087
24.483943 3075
24.512973 3062
24.690510 3046
24.499226 3029
...
24.896530 1
25.194982 1
25.067691 1
24.521939 1
25.122210 1
25.843908 1
24.568814 1
25.018415 1
24.761613 1
24.774230 1
24.534484 1
25.050253 1
24.790970 1
24.712845 1
24.737576 1
24.408328 1
24.603914 1
26.004841 1
24.871766 1
24.754435 1
24.362590 1
24.706326 1
24.779881 1
24.653016 1
24.739900 1
23.698791 1
24.565734 1
24.425187 1
24.708650 1
24.745930 1
Name: LATITUDE, Length: 292120, dtype: int64
location = df[['LONGITUDE','LATITUDE']].round(3).astype('str')
location = location[(location.LONGITUDE != 0)|(location.LATITUDE != 0)]
location['loc'] = location.LATITUDE + ':' + location.LONGITUDE
num_loc = location['loc'].value_counts()
pd.DataFrame(num_loc).reset_index()['index'].str.split(':',expand = True)
| 0 | 1 |
|---|
| 0 | 24.638 | 118.071 |
|---|
| 1 | 24.691 | 118.141 |
|---|
| 2 | 24.547 | 118.147 |
|---|
| 3 | 24.539 | 118.129 |
|---|
| 4 | 24.538 | 118.129 |
|---|
| 5 | 24.489 | 118.155 |
|---|
| 6 | 24.547 | 118.146 |
|---|
| 7 | 24.639 | 118.071 |
|---|
| 8 | 24.482 | 118.116 |
|---|
| 9 | 24.485 | 118.117 |
|---|
| 10 | 24.481 | 118.116 |
|---|
| 11 | 24.49 | 118.149 |
|---|
| 12 | 24.599 | 118.109 |
|---|
| 13 | 24.479 | 118.115 |
|---|
| 14 | 0.0 | 0.0 |
|---|
| 15 | 24.546 | 118.147 |
|---|
| 16 | 24.474 | 118.178 |
|---|
| 17 | 24.475 | 118.114 |
|---|
| 18 | 24.477 | 118.107 |
|---|
| 19 | 24.512 | 118.137 |
|---|
| 20 | 24.547 | 118.144 |
|---|
| 21 | 24.537 | 118.127 |
|---|
| 22 | 24.548 | 118.146 |
|---|
| 23 | 24.48 | 118.116 |
|---|
| 24 | 24.538 | 118.128 |
|---|
| 25 | 24.472 | 118.174 |
|---|
| 26 | 24.486 | 118.131 |
|---|
| 27 | 24.638 | 118.072 |
|---|
| 28 | 24.472 | 118.11 |
|---|
| 29 | 24.502 | 118.128 |
|---|
| ... | ... | ... |
|---|
| 65754 | 24.631 | 117.724 |
|---|
| 65755 | 23.614 | 117.367 |
|---|
| 65756 | 24.966 | 118.381 |
|---|
| 65757 | 24.457 | 118.101 |
|---|
| 65758 | 24.522 | 118.023 |
|---|
| 65759 | 24.661 | 118.115 |
|---|
| 65760 | 24.525 | 118.082 |
|---|
| 65761 | 24.183 | 117.584 |
|---|
| 65762 | 24.454 | 118.06 |
|---|
| 65763 | 24.674 | 118.053 |
|---|
| 65764 | 24.506 | 118.24 |
|---|
| 65765 | 24.518 | 117.437 |
|---|
| 65766 | 26.683 | 118.16 |
|---|
| 65767 | 27.336 | 117.481 |
|---|
| 65768 | 24.564 | 117.593 |
|---|
| 65769 | 24.442 | 118.177 |
|---|
| 65770 | 24.581 | 118.11 |
|---|
| 65771 | 25.848 | 119.518 |
|---|
| 65772 | 24.801 | 118.128 |
|---|
| 65773 | 24.25 | 117.659 |
|---|
| 65774 | 24.706 | 117.786 |
|---|
| 65775 | 24.716 | 118.07 |
|---|
| 65776 | 24.781 | 118.45 |
|---|
| 65777 | 24.748 | 118.42 |
|---|
| 65778 | 24.958 | 118.076 |
|---|
| 65779 | 23.75 | 116.633 |
|---|
| 65780 | 24.839 | 118.427 |
|---|
| 65781 | 24.694 | 118.535 |
|---|
| 65782 | 24.354 | 118.123 |
|---|
| 65783 | 24.551 | 117.896 |
|---|
65784 rows × 2 columns