数据分析——阿里资金流入流出分析(task2-时间序列规则)
import pandas as pd
import sklearn as skr
import numpy as np
import datetime
import matplotlib. pyplot as plt
import seaborn as sns
from dateutil. relativedelta import relativedelta
def load_data ( path: str = 'user_balance_table.csv' ) - > pd. DataFrame:
data_balance = pd. read_csv( path)
data_balance = add_timestamp( data_balance)
return data_balance. reset_index( drop= True )
def add_timestamp ( data: pd. DataFrame, time_index: str = 'report_date' ) - > pd. DataFrame:
data_balance = data. copy( )
data_balance[ 'date' ] = pd. to_datetime( data_balance[ time_index] , format = "%Y%m%d" )
data_balance[ 'day' ] = data_balance[ 'date' ] . dt. day
data_balance[ 'month' ] = data_balance[ 'date' ] . dt. month
data_balance[ 'year' ] = data_balance[ 'date' ] . dt. year
data_balance[ 'week' ] = data_balance[ 'date' ] . dt. week
data_balance[ 'weekday' ] = data_balance[ 'date' ] . dt. weekday
return data_balance. reset_index( drop= True )
def get_total_balance ( data: pd. DataFrame, date: str = '2014-03-31' ) - > pd. DataFrame:
df_tmp = data. copy( )
df_tmp = df_tmp. groupby( [ 'date' ] ) [ 'total_purchase_amt' , 'total_redeem_amt' ] . sum ( )
df_tmp. reset_index( inplace= True )
return df_tmp[ ( df_tmp[ 'date' ] >= date) ] . reset_index( drop=