import pandas as pd
import numpy as np
import os
import warnings
from sklearn.metrics import f1_score,accuracy_score
from sklearn.model_selection import StratifiedKFold, KFold
from tqdm import tqdm
import lightgbm
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn import metrics
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
data = pd.read_csv('water_data.csv',index_col=0)
data['TN'] = data['TN'].apply(pd.to_numeric, errors='coerce')
data['TEMP'] = data['TEMP'].apply(pd.to_numeric, errors='coerce')
data['COND'] = data['COND'].apply(pd.to_numeric, errors='coerce')
data['TURB'] = data['TURB'].apply(pd.to_numeric, errors='coerce')
data['lable']=data['lable']-1
data_train=data[:3600]
data_test = data[3600:]
X_train = data_train.iloc[:,:9]
y_train = data_train['lable']
X_validation = data_test.iloc[:,:9]
y_validation = data_test['lable']
train_x = X_train
train_y = y_train
train_y=pd.DataFrame(train_y)
test = X_validation
train=pd.concat([train_x,train_y],axis=1)
params = {
'num_leaves': 40,
'min_data_in_leaf': 30,
'objective': 'multiclass',
'num_class'