复现了一下阿里天池上Tianchi Data Hero Cup —— 短租数据集分析 论坛大佬的代码,做了探索性数据分析+简单建模尝试。原本的数据集没找见, 从Airbnb 网站下载了21年的北京短租房数据,数据集下载链接: http://insideairbnb.com/get-the-data.html
代码细节问题还很多,比如汉字显示,图片坐标轴比例不协调等等,模型也是无脑搭建,先这样吧,明天再调整。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as ss
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,MinMaxScaler,StandardScaler,Normalizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.ensemble import AdaBoostClassifier,AdaBoostRegressor
from sklearn.naive_bayes import GaussianNB,BernoulliNB
from sklearn.svm import SVC,SVR
from sklearn.ensemble import RandomForestRegressor,RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis#LDA降维
from sklearn import metrics
import warnings
from keras.models import Sequential
from xgboost import XGBRegressor
from keras.layers.core import Dense,Activation
import tensorflow
warnings.filterwarnings("ignore")
df=pd.read_csv("listings.csv",index_col=0)
df.head()