目录
- test.csv
- train.csv
- titanic.py
数据集
https://www.kaggle.com/c/titanic/data
titanic.py
import tensorflow as tf
import pandas as pd
TRAIN_PATH = "train.csv"
TEST_PATH = "test.csv"
LEARNING_RATE = 0.01 # 0.01
EPOCH_NUM = 15 # 15
BATCH_SIZE = 100 # 100
LOGS_PATH = 'tensorboard_logs'
'''
PassengerId => 乘客ID
Pclass => 乘客等级(1/2/3等舱位)
Name => 乘客姓名
Sex => 性别
Age => 年龄
SibSp => 堂兄弟/妹个数
Parch => 父母与小孩个数
Ticket => 船票信息
Fare => 票价
Cabin => 客舱
Embarked => 登船港口
'''
def preprocess_data(path, is_test=False):
data = pd.read_csv(path, index_col='PassengerId')
data.drop(['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
if is_test:
data = data.replace([None], [0])
else:
data = data[pd.notnull(data['Age'])]
data = data[pd.notnull(data['Embarked'])]
data.replace(["female", "male"], [0, 1], inplace=True)
data.replace(["Q", "C", "S"], [0