最近发现Python DataTable有查询csv, Excel表数据真的很好用。强烈推荐给大家。有一些甚至比Pandas都要好用一些。这篇文章记录一些查询数据的方法。其中一些小的细节,请大家自己观察结果的返回。
读取csv数据:
tips = dt.fread("D:\\pythonProject\\datacenterqa\\datatables_example\\tips.csv")
1."SELECT total_bill, tip,sex, total_bill*2 AS total_bill_doubled FROM tips where sex = 'Female';"
dt12 = tips[f["sex"] == "Female", ("total_bill", "tip", "sex")][:, f[:].extend({"total_bill_doubled": f.total_bill * 2})]
print(dt12)
结果:
| total_bill tip sex total_bill_doubled
| float64 float64 str32 float64
-- + ---------- ------- ------ ------------------
0 | 16.99 1.01 Female 33.98
1 | 24.59 3.61 Female 49.18
2 | 35.26 5 Female 70.52
3 | 14.83 3.02 Female 29.66
4 | 10.33 1.67 Female 20.66
5 | 16.97 3.5 Female 33.94
6 | 20.29 2.75 Female 40.58
7 | 15.77 2.23 Female 31.54
8 | 19.65 3 Female 39.3
9 | 15.06 3 Female 30.12
10 | 20.69 2.45 Female 41.38
11 | 16.93 3.07 Female 33.86
12 | 10.29 2.6 Female 20.58
13 | 34.81 5.2 Female 69.62
14 | 26.41 1.5 Female 52.82
… | … … … …
82 | 10.09 2 Female 20.18
83 | 22.12 2.88 Female 44.24
84 | 35.83 4.67 Female 71.66
85 | 27.18 2 Female 54.36
86 | 18.78 3 Female 37.56
[87 rows x 4 columns]
2. "SELECT total_bill, tip,sex, total_bill*2 AS total_bill_doubled FROM tips where sex = 'Female' and tip >=5;"
dt13 = tips[(f["sex"] == "Female") & (f["tip"] > 5), ("total_bill", "tip", "sex")][:,f[:].extend({"total_bill_doubled": f.total_bill * 2})]
print(dt13)
结果:
| total_bill tip sex total_bill_doubled
| float64 float64 str32 float64
-- + ---------- ------- ------ ------------------
0 | 34.81 5.2 Female 69.62
1 | 34.83 5.17 Female 69.66
2 | 29.85 5.14 Female 59.7
3 | 28.17 6.5 Female 56.34
[4 rows x 4 columns]
3. "SELECT day, sum(total_bill) FROM tips group by day"
dt14 = tips[:,{"sum_total_bill":dt.sum(f["total_bill"])}, by("day")]
print(dt14)
结果:
| day sum_total_bill
| str32 float64
-- + ----- --------------
0 | Fri 325.88
1 | Sat 1778.4
2 | Sun 1627.16
3 | Thur 1096.33
[4 rows x 2 columns]
4. "SELECT day, sum(total_bill) FROM tips where sex='Female' group by day "
dt15 = tips[f["sex"] == "Female",:][:, {"sum_total_bill": dt.sum(f["total_bill"])}, by("day")]
print(dt15)
结果:
| day sum_total_bill
| str32 float64
-- + ----- --------------
0 | Fri 127.31
1 | Sat 551.05
2 | Sun 357.7
3 | Thur 534.89
[4 rows x 2 columns]
5. "SELECT total_bill, tip,sex, size, total_bill*2 AS total_bill_doubled,size*2 AS size_doubled FROM tips where sex = 'Female';"
dt16 = tips[f["sex"] == "Female", ("total_bill", "tip", "sex","size")][:,f[:].extend({"total_bill_doubled": f.total_bill * 2,"size_doubled": f.size * 2})]
print(dt16)
结果:
| total_bill tip sex size total_bill_doubled size_doubled
| float64 float64 str32 int32 float64 int32
-- + ---------- ------- ------ ----- ------------------ ------------
0 | 16.99 1.01 Female 2 33.98 4
1 | 24.59 3.61 Female 4 49.18 8
2 | 35.26 5 Female 4 70.52 8
3 | 14.83 3.02 Female 2 29.66 4
4 | 10.33 1.67 Female 3 20.66 6
5 | 16.97 3.5 Female 3 33.94 6
6 | 20.29 2.75 Female 2 40.58 4
7 | 15.77 2.23 Female 2 31.54 4
8 | 19.65 3 Female 2 39.3 4
9 | 15.06 3 Female 2 30.12 4
10 | 20.69 2.45 Female 4 41.38 8
11 | 16.93 3.07 Female 3 33.86 6
12 | 10.29 2.6 Female 2 20.58 4
13 | 34.81 5.2 Female 4 69.62 8
14 | 26.41 1.5 Female 2 52.82 4
… | … … … … … …
82 | 10.09 2 Female 2 20.18 4
83 | 22.12 2.88 Female 2 44.24 4
84 | 35.83 4.67 Female 3 71.66 6
85 | 27.18 2 Female 2 54.36 4
86 | 18.78 3 Female 2 37.56 4
[87 rows x 6 columns]
6."SELECT size,COUNT() AS N FROM tips GROUP BY size; -- 次数"
dt17 = tips[:,dt.count(), by("size")]
print(dt17)
结果:
| size count
| int32 int64
-- + ----- -----
0 | 1 4
1 | 2 156
2 | 3 38
3 | 4 37
4 | 5 5
5 | 6 4
[6 rows x 2 columns]