使用pandas读取特征数据,并处理数据中的双引号
使用xgboost训练一版模型
xgboost==1.6.2
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import xgboost as xgb
import logging
import csv
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score, accuracy_score, log_loss
model_version = "v101"
root_path = "/home/.../model/xgboost_tool"
class DataProcess(object):
def __init__(self, train_path, test_path):
self.train_data = pd.read_csv(train_path, header=None, sep="\t", quoting=csv.QUOTE_MINIMAL, escapechar='\\')
self.train_data.replace('""', '', inplace=</