import numpy as np
import pandas as pd
df = pd.DataFrame({'col_a': np.arange(10),
'col_b': np.random.randn(10),
'col_c': np.random.choice(['A', 'B', 'C'], 10),
'col_d': np.random.choice([0, 1], 10)})
df.head(5)
| col_a | col_b | col_c | col_d |
---|
0 | 0 | -1.030159 | B | 0 |
---|
1 | 1 | 1.447636 | B | 1 |
---|
2 | 2 | 1.572227 | C | 0 |
---|
3 | 3 | 0.031504 | A | 1 |
---|
4 | 4 | 0.071705 | C | 0 |
---|
print(df.shape, df.shape[0], df.shape[1])
(10, 4) 10 4
df.columns
Index(['col_a', 'col_b', 'col_c', 'col_d'], dtype='object')
df.iloc[:5]
| col_a | col_b | col_c | col_d |
---|
0 | 0 | -1.030159 | B | 0 |
---|
1 | 1 | 1.447636 | B | 1 |
---|
2 | 2 | 1.572227 | C | 0 |
---|
3 | 3 | 0.031504 | A | 1 |
---|
4 | 4 | 0.071705 | C | 0 |
---|
df[['col_a', 'col_b']]
| col_a | col_b |
---|
0 | 0 | -1.030159 |
---|
1 | 1 | 1.447636 |
---|
2 | 2 | 1.572227 |
---|
3 | 3 | 0.031504 |
---|
4 | 4 | 0.071705 |
---|
5 | 5 | -0.284231 |
---|
6 | 6 | 0.403412 |
---|
7 | 7 | 1.271582 |
---|
8 | 8 | 0.693771 |
---|
9 | 9 | 1.510458 |
---|
df.iloc[:5, :2]
| col_a | col_b |
---|
0 | 0 | -1.030159 |
---|
1 | 1 | 1.447636 |
---|
2 | 2 | 1.572227 |
---|
3 | 3 | 0.031504 |
---|
4 | 4 | 0.071705 |
---|
df.iat[0, 1]
-1.0301593908948492
df[(df['col_a'] > 3) & (df['col_b'] < 0)]
| col_a | col_b | col_c | col_d |
---|
5 | 5 | -0.284231 | A | 0 |
---|
df[df['col_c'].isin(['A', 'B'])]
| col_a | col_b | col_c | col_d |
---|
0 | 0 | -1.030159 | B | 0 |
---|
1 | 1 | 1.447636 | B | 1 |
---|
3 | 3 | 0.031504 | A | 1 |
---|
5 | 5 | -0.284231 | A | 0 |
---|
7 | 7 | 1.271582 | A | 0 |
---|
8 | 8 | 0.693771 | B | 1 |
---|
9 | 9 | 1.510458 | A | 1 |
---|
df['col_e'] = df['col_a'] + df['col_b']
df
| col_a | col_b | col_c | col_d | col_e |
---|
0 | 0 | -1.030159 | B | 0 | -1.030159 |
---|
1 | 1 | 1.447636 | B | 1 | 2.447636 |
---|
2 | 2 | 1.572227 | C | 0 | 3.572227 |
---|
3 | 3 | 0.031504 | A | 1 | 3.031504 |
---|
4 | 4 | 0.071705 | C | 0 | 4.071705 |
---|
5 | 5 | -0.284231 | A | 0 | 4.715769 |
---|
6 | 6 | 0.403412 | C | 1 | 6.403412 |
---|
7 | 7 | 1.271582 | A | 0 | 8.271582 |
---|
8 | 8 | 0.693771 | B | 1 | 8.693771 |
---|
9 | 9 | 1.510458 | A | 1 | 10.510458 |
---|
df = df.drop(columns='col_e')
df
| col_a | col_b | col_c | col_d |
---|
0 | 0 | -1.030159 | B | 0 |
---|
1 | 1 | 1.447636 | B | 1 |
---|
2 | 2 | 1.572227 | C | 0 |
---|
3 | 3 | 0.031504 | A | 1 |
---|
4 | 4 | 0.071705 | C | 0 |
---|
5 | 5 | -0.284231 | A | 0 |
---|
6 | 6 | 0.403412 | C | 1 |
---|
7 | 7 | 1.271582 | A | 0 |
---|
8 | 8 | 0.693771 | B | 1 |
---|
9 | 9 | 1.510458 | A | 1 |
---|
df.drop(columns=df.columns[0])
| col_b | col_c | col_d |
---|
0 | -1.030159 | B | 0 |
---|
1 | 1.447636 | B | 1 |
---|
2 | 1.572227 | C | 0 |
---|
3 | 0.031504 | A | 1 |
---|
4 | 0.071705 | C | 0 |
---|
5 | -0.284231 | A | 0 |
---|
6 | 0.403412 | C | 1 |
---|
7 | 1.271582 | A | 0 |
---|
8 | 0.693771 | B | 1 |
---|
9 | 1.510458 | A | 1 |
---|
df.T
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
---|
col_a | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
---|
col_b | -1.03016 | 1.44764 | 1.57223 | 0.0315043 | 0.0717051 | -0.284231 | 0.403412 | 1.27158 | 0.693771 | 1.51046 |
---|
col_c | B | B | C | A | C | A | C | A | B | A |
---|
col_d | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 |
---|
df['col_a'].astype(str)
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
Name: col_a, dtype: object
pd.Categorical(df['col_c'])
[B, B, C, A, C, A, C, A, B, A]
Categories (3, object): [A, B, C]
df[['col_a', 'col_b']].sum(axis=1)
0 -1.030159
1 2.447636
2 3.572227
3 3.031504
4 4.071705
5 4.715769
6 6.403412
7 8.271582
8 8.693771
9 10.510458
dtype: float64
df[['col_a', 'col_b']].mean(axis=0)
col_a 4.50000
col_b 0.56879
dtype: float64
df[['col_a', 'col_b']].apply(lambda x: x.mean() + 10)
col_a 14.50000
col_b 10.56879
dtype: float64
df2 = pd.DataFrame({'col_x': np.arange(10),
'col_y': np.arange(10)[::-1]})
df2
| col_x | col_y |
---|
0 | 0 | 9 |
---|
1 | 1 | 8 |
---|
2 | 2 | 7 |
---|
3 | 3 | 6 |
---|
4 | 4 | 5 |
---|
5 | 5 | 4 |
---|
6 | 6 | 3 |
---|
7 | 7 | 2 |
---|
8 | 8 | 1 |
---|
9 | 9 | 0 |
---|
pd.concat([df, df2], axis=1)
| col_a | col_b | col_c | col_d | col_x | col_y |
---|
0 | 0 | -1.030159 | B | 0 | 0 | 9 |
---|
1 | 1 | 1.447636 | B | 1 | 1 | 8 |
---|
2 | 2 | 1.572227 | C | 0 | 2 | 7 |
---|
3 | 3 | 0.031504 | A | 1 | 3 | 6 |
---|
4 | 4 | 0.071705 | C | 0 | 4 | 5 |
---|
5 | 5 | -0.284231 | A | 0 | 5 | 4 |
---|
6 | 6 | 0.403412 | C | 1 | 6 | 3 |
---|
7 | 7 | 1.271582 | A | 0 | 7 | 2 |
---|
8 | 8 | 0.693771 | B | 1 | 8 | 1 |
---|
9 | 9 | 1.510458 | A | 1 | 9 | 0 |
---|
df3 = pd.DataFrame({'col_a': [-1, -2],
'col_b' : [0, 1],
'col_c': ['B', 'C'],
'col_d': [1, 0]})
df3
| col_a | col_b | col_c | col_d |
---|
0 | -1 | 0 | B | 1 |
---|
1 | -2 | 1 | C | 0 |
---|
pd.concat([df, df3], axis=0, ignore_index=True)
| col_a | col_b | col_c | col_d |
---|
0 | 0 | -1.030159 | B | 0 |
---|
1 | 1 | 1.447636 | B | 1 |
---|
2 | 2 | 1.572227 | C | 0 |
---|
3 | 3 | 0.031504 | A | 1 |
---|
4 | 4 | 0.071705 | C | 0 |
---|
5 | 5 | -0.284231 | A | 0 |
---|
6 | 6 | 0.403412 | C | 1 |
---|
7 | 7 | 1.271582 | A | 0 |
---|
8 | 8 | 0.693771 | B | 1 |
---|
9 | 9 | 1.510458 | A | 1 |
---|
10 | -1 | 0.000000 | B | 1 |
---|
11 | -2 | 1.000000 | C | 0 |
---|