import pandas as pd
import numpy as np
data = {"grammer":["Python","C","Java","GO",np.nan,"SQL","PHP","Python"],
"score":[1,2,np.nan,4,5,6,7,10]}
df = pd.DataFrame(data)
df
| grammer | score |
---|
0 | Python | 1.0 |
---|
1 | C | 2.0 |
---|
2 | Java | NaN |
---|
3 | GO | 4.0 |
---|
4 | NaN | 5.0 |
---|
5 | SQL | 6.0 |
---|
6 | PHP | 7.0 |
---|
7 | Python | 10.0 |
---|
df[df['grammer'] == 'Python']
results = df['grammer'].str.contains("Python")
results.fillna(value=False,inplace = True)
df[results]
| grammer | score |
---|
0 | Python | 1.0 |
---|
7 | Python | 10.0 |
---|
df.columns
Index(['grammer', 'score'], dtype='object')
df.rename(columns={'score':'popularity'},inplace = True)
df
| grammer | popularity |
---|
0 | Python | 1.0 |
---|
1 | C | 2.0 |
---|
2 | Java | NaN |
---|
3 | GO | 4.0 |
---|
4 | NaN | 5.0 |
---|
5 | SQL | 6.0 |
---|
6 | PHP | 7.0 |
---|
7 | Python | 10.0 |
---|
df['grammer'].value_counts()
Python 2
SQL 1
Java 1
PHP 1
GO 1
C 1
Name: grammer, dtype: int64
df['popularity'] = df['popularity'].fillna(df['popularity'].interpolate())
df
| grammer | popularity |
---|
0 | Python | 1.0 |
---|
1 | C | 2.0 |
---|
2 | Java | 3.0 |
---|
3 | GO | 4.0 |
---|
4 | NaN | 5.0 |
---|
5 | SQL | 6.0 |
---|
6 | PHP | 7.0 |
---|
7 | Python | 10.0 |
---|
df[df['popularity']>3]
| grammer | popularity |
---|
3 | GO | 4.0 |
---|
4 | NaN | 5.0 |
---|
5 | SQL | 6.0 |
---|
6 | PHP | 7.0 |
---|
7 | Python | 10.0 |
---|
df.drop_duplicates(['grammer'])
| grammer | popularity |
---|
0 | Python | 1.0 |
---|
1 | C | 2.0 |
---|
2 | Java | 3.0 |
---|
3 | GO | 4.0 |
---|
4 | NaN | 5.0 |
---|
5 | SQL | 6.0 |
---|
6 | PHP | 7.0 |
---|
df['popularity'].mean()
4.75
df['grammer'].to_list()
['Python', 'C', 'Java', 'GO', nan, 'SQL', 'PHP', 'Python']
df.to_excel('test.xlsx')
df.shape
(8, 2)
df[(df['popularity']>3) & (df['popularity']<7)]
| grammer | popularity |
---|
3 | GO | 4.0 |
---|
4 | NaN | 5.0 |
---|
5 | SQL | 6.0 |
---|
temp = df['popularity']
df.drop(labels=['popularity'], axis=1,inplace = True)
df.insert(0, 'popularity', temp)
df
| popularity | grammer |
---|
0 | 1.0 | Python |
---|
1 | 2.0 | C |
---|
2 | 3.0 | Java |
---|
3 | 4.0 | GO |
---|
4 | 5.0 | NaN |
---|
5 | 6.0 | SQL |
---|
6 | 7.0 | PHP |
---|
7 | 10.0 | Python |
---|
df[df['popularity'] == df['popularity'].max()]
| popularity | grammer |
---|
7 | 10.0 | Python |
---|
df.tail()
| popularity | grammer |
---|
3 | 4.0 | GO |
---|
4 | 5.0 | NaN |
---|
5 | 6.0 | SQL |
---|
6 | 7.0 | PHP |
---|
7 | 10.0 | Python |
---|
df.drop(len(df)-1,inplace = True)
df
| popularity | grammer |
---|
0 | 1.0 | Python |
---|
1 | 2.0 | C |
---|
2 | 3.0 | Java |
---|
3 | 4.0 | GO |
---|
4 | 5.0 | NaN |
---|
5 | 6.0 | SQL |
---|
6 | 7.0 | PHP |
---|
row={'grammer':'Perl','popularity':6.6}
df = df.append(row,ignore_index=True)
df
| popularity | grammer |
---|
0 | 1.0 | Python |
---|
1 | 2.0 | C |
---|
2 | 3.0 | Java |
---|
3 | 4.0 | GO |
---|
4 | 5.0 | NaN |
---|
5 | 6.0 | SQL |
---|
6 | 7.0 | PHP |
---|
7 | 6.6 | Perl |
---|
df.sort_values('popularity',inplace = True)
df
| popularity | grammer |
---|
0 | 1.0 | Python |
---|
1 | 2.0 | C |
---|
2 | 3.0 | Java |
---|
3 | 4.0 | GO |
---|
4 | 5.0 | NaN |
---|
5 | 6.0 | SQL |
---|
7 | 6.6 | Perl |
---|
6 | 7.0 | PHP |
---|
df['grammer'] = df['grammer'].fillna('R')
df['len_str'] = df['grammer'].map(lambda x: len(x))
df.loc[df[df['grammer']=='R'].index,'len_str'] = '未知'
df.loc[df[df['grammer']=='R'].index,'grammer'] = '未知'
df
| popularity | grammer | len_str |
---|
0 | 1.0 | Python | 6 |
---|
1 | 2.0 | C | 1 |
---|
2 | 3.0 | Java | 4 |
---|
3 | 4.0 | GO | 2 |
---|
4 | 5.0 | 未知 | 未知 |
---|
5 | 6.0 | SQL | 3 |
---|
7 | 6.6 | Perl | 4 |
---|
6 | 7.0 | PHP | 3 |
---|