import pandas as pd
import numpy as np
print("*"*25+"dataframe创建"+"*"*25)
*************************dataframe创建*************************
pd.DataFrame(np.arange(12).reshape(3,4))
pd.DataFrame(np.arange(12).reshape(3,4),index=list("abc"),columns=list("DEFG"))
d1={"name":["n1","n2"],"age":[20,30],"tel":[111,222]}
t1=pd.DataFrame(d1)
type(t1)
pandas.core.frame.DataFrame
t1
| name | age | tel |
---|
0 | n1 | 20 | 111 |
---|
1 | n2 | 30 | 222 |
---|
d2=[{"name":"n1","age":20,"tel":111},{"name":"n2","tel":222}]
t2=pd.DataFrame(d2)
type(t2)
pandas.core.frame.DataFrame
t2
| name | age | tel |
---|
0 | n1 | 20.0 | 111 |
---|
1 | n2 | NaN | 222 |
---|
print("*"*25+"dataframe操作"+"*"*25)
*************************dataframe操作*************************
t2.index
RangeIndex(start=0, stop=2, step=1)
t2.columns
Index(['name', 'age', 'tel'], dtype='object')
t2.values
array([['n1', 20.0, 111],
['n2', nan, 222]], dtype=object)
t2.shape
(2, 3)
t2.dtypes
name object
age float64
tel int64
dtype: object
t2.ndim
2
t2.head(1)
t2.tail(1)
t2.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 name 2 non-null object
1 age 1 non-null float64
2 tel 2 non-null int64
dtypes: float64(1), int64(1), object(1)
memory usage: 176.0+ bytes
t2.describe()
| age | tel |
---|
count | 1.0 | 2.000000 |
---|
mean | 20.0 | 166.500000 |
---|
std | NaN | 78.488853 |
---|
min | 20.0 | 111.000000 |
---|
25% | 20.0 | 138.750000 |
---|
50% | 20.0 | 166.500000 |
---|
75% | 20.0 | 194.250000 |
---|
max | 20.0 | 222.000000 |
---|
print("*"*25+"dataframe使用"+"*"*25)
*************************dataframe使用*************************
df = pd.read_csv("./dogNames2.csv")
df.sort_values(by="Count_AnimalName",ascending=False)
| Row_Labels | Count_AnimalName |
---|
1156 | BELLA | 1195 |
---|
9140 | MAX | 1153 |
---|
2660 | CHARLIE | 856 |
---|
3251 | COCO | 852 |
---|
12368 | ROCKY | 823 |
---|
... | ... | ... |
---|
6884 | J-LO | 1 |
---|
6888 | JOANN | 1 |
---|
6890 | JOAO | 1 |
---|
6891 | JOAQUIN | 1 |
---|
16219 | 39743 | 1 |
---|
16220 rows × 2 columns
print("*"*25+"dataframe索引"+"*"*25)
*************************dataframe索引*************************
df_sorted=df.sort_values(by="Count_AnimalName",ascending=False)
df_sorted[:5]
| Row_Labels | Count_AnimalName |
---|
1156 | BELLA | 1195 |
---|
9140 | MAX | 1153 |
---|
2660 | CHARLIE | 856 |
---|
3251 | COCO | 852 |
---|
12368 | ROCKY | 823 |
---|
df_sorted[:5]["Row_Labels"]
1156 BELLA
9140 MAX
2660 CHARLIE
3251 COCO
12368 ROCKY
Name: Row_Labels, dtype: object
df_sorted[:1]
| Row_Labels | Count_AnimalName |
---|
1156 | BELLA | 1195 |
---|
t3=pd.DataFrame(np.arange(12).reshape(3,4),index=list("abc"),columns=list("WXYZ"))
t3
t3.loc["a","Z"]
3
type(t3.loc["a","Z"])
numpy.int64
t3.loc["a"]
W 0
X 1
Y 2
Z 3
Name: a, dtype: int64
t3.loc[:,"Z"]
a 3
b 7
c 11
Name: Z, dtype: int64
t3.loc[["a","c"]]
t3.loc[:,["X","Z"]]
t3.iloc[1]
W 4
X 5
Y 6
Z 7
Name: b, dtype: int64
t3.iloc[:,2]
a 2
b 6
c 10
Name: Y, dtype: int64
t3.iloc[[0,2],[2,1]]
t3.iloc[1:,:2]
t3.iloc[1:,:2]=11
t3
| W | X | Y | Z |
---|
a | 0 | 1 | 2 | 3 |
---|
b | 11 | 11 | 6 | 7 |
---|
c | 11 | 11 | 10 | 11 |
---|
字符串方法:
