欢迎光临散文网 会员登陆 & 注册

人工智能学习文件qoiqwetoiu

2023-06-30 15:05 作者:我需要重新集结部队吗  | 我要投稿

#!/usr/bin/env python

# coding: utf-8


# #  导入数据分析和建模所需要的库


# In[1]:



import pandas as pd

from pandas import Series,DataFrame

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sn

import random

import time

from datetime import datetime

import cufflinks as cf

cf.go_offline()

import plotly

print(plotly.__version__) # requires version >= 1.9.0

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

import plotly.express as px

import plotly.graph_objects as go


# for the ML pipeline

from sklearn.model_selection import train_test_split

from sklearn.impute  import SimpleImputer

from sklearn.preprocessing import OrdinalEncoder

from sklearn.feature_selection import SelectFromModel

import sklearn.datasets as datasets

from sklearn import preprocessing

le = preprocessing.LabelEncoder()

#导入机器学习算法模型

from sklearn.neighbors import KNeighborsRegressor

from sklearn.linear_model import LinearRegression

from sklearn.linear_model import Ridge

from sklearn.linear_model import HuberRegressor

from sklearn.tree import DecisionTreeRegressor

from sklearn.svm import SVR


from sklearn.linear_model import Lasso

from sklearn.neural_network import MLPRegressor

from sklearn.tree import ExtraTreeRegressor

from xgboost import XGBRegressor

from sklearn.ensemble import RandomForestRegressor

from sklearn.ensemble import AdaBoostRegressor

from sklearn.ensemble import GradientBoostingRegressor

from sklearn.ensemble import BaggingRegressor

import lightgbm as lgb

import xgboost as xgb

from xgboost import plot_importance, plot_tree

from statsmodels.tools.eval_measures import mse,rmse

from sklearn.model_selection import GridSearchCV

from xgboost import XGBRegressor

import warnings

import pickle


#切割训练数据和样本数据

from sklearn.model_selection import train_test_split


#用于模型评分

from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

from sklearn.metrics import explained_variance_score, median_absolute_error


random.seed(100)


get_ipython().run_line_magic('matplotlib', 'inline')


# from statsmodels.tsa.arima.model import ARIMA

# import statsmodels.api as sm

# from statsmodels.tsa.statespace.sarimax import SARIMAX


# import keras

# import tensorflow as tf

# from keras.preprocessing.sequence import TimeseriesGenerator

# from keras.models import Sequential

# from keras.layers import LSTM, Dense



# # 导入并处理数据


# In[2]:



pd.set_option('display.max_rows',100)  

pd.set_option('display.max_columns',100) 


#Import the data and parse dates.

df_test = pd.read_csv('data/液化气_日度价格预测数据20211026.csv')

df_test['Date']=pd.to_datetime(df_test['Date'],  format='%d/%m/%Y',infer_datetime_format=True)

#df_test.tail(50)



# In[4]:



#df_test.tail(50)



# # 缺失值处理


# In[3]:



# 特征工程函数

# 增加差分列

def create_diff_features(df, columns=[], ignore=[], ignore_first=True):

    if len(columns)==0:

        columns = df.columns

    if ignore_first:

        ignore.append(df.columns[0])

    for col in columns:

        if col not in ignore:

            try:

                df[col+"_d"] = df[col].diff()

            except:

                pass

    return df


# 与目标值之差

def create_target_minus(df, target="", columns=[]):

    if target=="":

        target = df.columns[0]

    if len(columns)==0:

        columns = df.columns

    for col in columns:

        if col != target:

            try:

                df[col+"_m"] = df[col]-df[target]

            except: pass

    return df


# 与目标值之积

def create_target_divide(df, target="", columns=[]):

    if target=="":

        target = df.columns[0]

    if len(columns)!=0:

        columns = df.columns

    for col in columns:

        if col != target:

            try:

                df[col+"_t"] = df[col]/df[target]

            except: pass

    return df


# 增加日期列

def create_date_features(series):

    #serie = series.transpose()

    df_products = series

    df_products["Date"] = pd.to_datetime(df_products["Date"])

    df_products["quarter"] = df_products["Date"].dt.quarter.astype("uint8")

    df_products["Month"] = df_products["Date"].dt.month.astype("uint8")

    df_products["dayofyear"] = df_products["Date"].dt.dayofyear.astype("uint8")

    df_products["dayofmonth"] = df_products["Date"].dt.day.astype("uint8")

    df_products["dayofweek"] = df_products["Date"].dt.dayofweek.astype("uint8")

    df_products.index = df_products.Date

    df_products = df_products.drop(["Date"], axis= 1)


    #df_products["T-1"] = np.append(df_products.Price.values[0],df_products.Price.values[:-1])

    #df_products["T-7"] = np.append([df_products.Price.values[0] for _ in range(7)],df_products.Price.values[:-7])

    

    #Cheat

    #df_products["P-1"] = df_products.Price.values

    #df_products["P-2"] = df_products.Price.diff().values

    

    return df_products



# In[4]:



# 根据需要预测的日期(这里是2021-09-06)调整日期,此处预测9月6号日度价格

#df_test=df_test[(df_test['Date']<='2021-09-16')]


#查看每个特征缺失值

MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)

#print(MisVal_Check.head(39))


#查看每个特征缺失值的百分比

df_MisVal_Check = pd.DataFrame(MisVal_Check,)#

df_MisVal_Check_1=df_MisVal_Check.reset_index()

df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] 

df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)


#去掉缺失值百分比>threshold的特征

threshold = 0.5

#Dropping columns with missing value rate higher than threshold

df_test_1 = df_test[df_test.columns[df_test.isnull().mean() < threshold]]

#df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)


#补充缺失值:将缺失值补为前一个或者后一个数值

df_test_1=df_test_1.fillna(df_test.ffill())

df_test_1=df_test_1.fillna(df_test_1.bfill())

#df_test_1['year']=df_test_1['Date'].dt.year.astype("str")


#特征工程:

df_test_2 = df_test_1.copy()

cols = df_test_1.columns.values

# 自差分

#df_test_2=create_diff_features(df_test_2, columns=cols, ignore=["Price","Date"])

# 与目标值之差

#df_test_2=create_target_minus(df_test_2, target="Price", columns=cols)

# 日期

df_test_2=create_date_features(df_test_2)


df_test_3=df_test_2.drop(['year','星期几'],axis=1, errors='ignore')

#df_test_3=df_test_2.drop(['year','星期几','节假日','调休上班','突发事件'],axis=1, errors='ignore')


#---------------------------------------------------

#将节假日和调休两列从文字格式转化为分类格式

le.fit(df_test_3['节假日'])

df_test_3['节假日']=le.transform(df_test_3['节假日'])


#------------------------------------------------------

le.fit(df_test_3['调休上班'])

df_test_3['调休上班']=le.transform(df_test_3['调休上班'])

#------------------------------------------------------

#------------------------------------------------------

le.fit(df_test_3['突发事件'])

df_test_3['突发事件']=le.transform(df_test_3['突发事件'])

#------------------------------------------------------



# In[251]:



#cols



# In[83]:



#df_test_3



# In[190]:



#add_d_columns(df_test_3)



# In[ ]:






# # 建模&预测


# In[6]:



#定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整

def plot_feature_importance(importance,names,model_type):

    feature_importance = np.array(importance)

    feature_names = np.array(names)

    

    data={'feature_names':feature_names,'feature_importance':feature_importance}

    fi_df = pd.DataFrame(data)

    

    fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)

    

    plt.figure(figsize=(10,8))

    sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])

    

    plt.title(model_type + " "+'FEATURE IMPORTANCE')

    plt.xlabel('FEATURE IMPORTANCE')

    plt.ylabel('FEATURE NAMES')

    plt.show()

    

plt.rcParams['font.sans-serif'] = [u'SimHei']

plt.rcParams['axes.unicode_minus'] = False


# 效果展示函数


# 训练效果展示

def visualize_loss(history, title):

    loss = history.history["loss"]

    val_loss = history.history["val_loss"]

    epochs = range(len(loss))

    plt.figure()

    plt.plot(epochs, loss, "b", label="Training loss")

    plt.plot(epochs, val_loss, "r", label="Validation loss")

    plt.title(title)

    plt.xlabel("Epochs")

    plt.ylabel("Loss")

    plt.legend()

    plt.show()

    

# 画预测效果刺猬图:labels第一个是真实列,后面是预测列

def plot_prediction(df_results, labels, time_label="Date", draw_spike=True, draw_point=True, draw_line=False):

    plt.rcParams["figure.figsize"] = (10,6)

    #colors = ['crimson', 'coral', 'orange', 'red', 'lightcoral', 'gold']

    colors = ['red','gold','blue','purple','orange','green','violet']

    try:

        time_steps = df_results[time_label]

    except:

        time_steps = df_results.index

        

    #显示真实值

    plt.plot(time_steps, df_results[labels[0]], color="grey", alpha=0.5, label=labels[0])

    

    #显示预测值

    if draw_spike:

        true_val = df_results[labels[0]].values

        for i, val in enumerate(labels[1:]):

            pred = df_results[val].values

            plt.plot(time_steps[:2], [true_val[0],pred[1]], color=colors[i], alpha=0.5, label=labels[i+1])

            for j in range(1,df_results.shape[0]-1):

                plt.plot(time_steps[j:j+2], [true_val[j],pred[j+1]], color=colors[i], alpha=0.5)


    #显示预测点图     

    if draw_point:

        plt.scatter(time_steps, df_results[labels[0]], color="grey", alpha=0.5, s=5)

        for i, val in enumerate(labels[1:]):

            pred = df_results[val]

            plt.scatter(time_steps, pred, color=colors[i], alpha=0.5, s=5)

        

    #显示预测线图     

    if draw_line:

        for i, val in enumerate(labels[1:]):

            pred = df_results[val]

            plt.plot(time_steps, pred, color=colors[i], alpha=0.5)


    plt.legend()

    plt.xlim([time_steps[0], time_steps[-1:]])

    plt.xlabel("时间轴")

    plt.show()



# In[7]:



df_product=df_test_3.iloc[:,:].copy()

#df_product.iloc[:,0] = df_product.iloc[:,0].diff()


X_train=df_product.iloc[1:-14,1:]

y_train=df_product.iloc[1:-14,0]

    

X_test=df_product.iloc[-15:,1:]

y_test=df_product.iloc[-15:,0]



# In[86]:



#X_train



# In[8]:



warnings.filterwarnings("ignore")


# Grid search for best parameters

estimator = XGBRegressor(random_state=0,

 #objective='reg:linear',

nthread=4,

seed=42

)


# Round 4 get best parameters 

parameters =  {

              'max_depth': [10],

              'learning_rate': [ 0.4],

              'min_split_loss':[0,1],

              'n_estimators': [15],

              'min_child_weight': [6,7,8],

              'max_delta_step': [0],

              'subsample': [0.6,0.7,0.8],

              'colsample_bytree': [1.0],

              'reg_alpha':[0,0.2],

              'reg_lambda': [0,0.05],

              'scale_pos_weight': [0.05,0.1,]


}


    #################    


grid_search = GridSearchCV(

              estimator=estimator,

              param_grid=parameters,

    #scoring = 'roc_auc',

    #n_jobs = 10,

    cv = 3,

    verbose=True

    )

    

warnings.filterwarnings("ignore")

    

grid_search.fit(X_train, y_train)

    


###############################################################################


estimator_GBR = GradientBoostingRegressor(criterion='mse',random_state=0,)


parameters_GBR = {

             'n_estimators':[99,100,101], 

            'learning_rate': [0.1,0.2], 

            'max_depth':[6,7,8], 

            'min_samples_leaf':[1,2,3,4], 

            'max_features':[5,6,7]

            


}



grid_search_GBR = GridSearchCV(

        estimator=estimator_GBR,

        param_grid=parameters_GBR,

        #scoring = 'roc_auc',

        #n_jobs = 10,

        cv = 3,

        verbose=True

    )


grid_search_GBR.fit(X_train, y_train)

    


###############################################################################

estimator_RF = RandomForestRegressor(criterion='mse',random_state=0)


parameters_RF = {

            'bootstrap': [False],

             'max_depth': [10,None],

             'max_features': ['sqrt'],

             'min_samples_leaf': [1],

             'min_samples_split': [2],

             'n_estimators':[94,100] #range(5,110,1)#[9,10,11],


}



grid_search_RF = GridSearchCV(

        estimator=estimator_RF,

        param_grid=parameters_RF,

        #scoring = 'roc_auc',

        #n_jobs = 10,

        cv = 15,

        verbose=True

    )


grid_search_RF.fit(X_train, y_train)


###############################################################################

estimator_BaggingR = BaggingRegressor(random_state=0,)


n_samples = X_train.shape[0]

n_features = X_train.shape[1]


parameters_BaggingR = {

           

          'n_estimators': [ 101,102,103],

          'max_samples': [1.0,],

          'max_features': [0.4,1.0],

          'bootstrap': [ False],

          'bootstrap_features': [True]


}



grid_search_BaggingR = GridSearchCV(

        estimator=estimator_BaggingR,

        param_grid=parameters_BaggingR,

        #scoring = 'roc_auc',

       # n_jobs = 10,

        cv = 22,

        verbose=True

    )


grid_search_BaggingR.fit(X_train, y_train)



# In[9]:



future_days_test=X_train.copy() #createFeatures(future_days)

                      

future_days_test['XGB_PREDICTIONS'] = grid_search.predict(X_train)

future_days_test['GBR_PREDICTIONS'] = grid_search_GBR.predict(X_train)    

future_days_test['RF_PREDICTIONS'] = grid_search_RF.predict(X_train) 

future_days_test['BaggingR_PREDICTIONS'] = grid_search_BaggingR.predict(X_train)

future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['RF_PREDICTIONS'] *0.25 + future_days_test['GBR_PREDICTIONS']*0.35 + \

future_days_test['BaggingR_PREDICTIONS']*0.25 + future_days_test['XGB_PREDICTIONS']*0.15



# In[10]:



print("XGB")

print("Best score: %.3f" % grid_search.best_score_)

print("Best parameters set:")

best_parameters = grid_search.best_estimator_.get_params()

for param_name in parameters.keys():

    print("\t%s: %r" % (param_name, best_parameters[param_name]))

plot_feature_importance(grid_search.best_estimator_.feature_importances_, X_train.columns,'XGB')


print("GBR")

print("Best score: %.3f" % grid_search_GBR.best_score_)

print("Best parameters set:")

best_parameters = grid_search_GBR.best_estimator_.get_params()

for param_name in parameters_GBR.keys():

    print("\t%s: %r" % (param_name, best_parameters[param_name]))

plot_feature_importance(grid_search_GBR.best_estimator_.feature_importances_, X_train.columns,'GBR')


print("RF")

print("Best score: %.3f" % grid_search_RF.best_score_)

print("Best parameters set:")

best_parameters = grid_search_RF.best_estimator_.get_params()

for param_name in parameters_RF.keys():

    print("\t%s: %r" % (param_name, best_parameters[param_name]))

plot_feature_importance(grid_search_RF.best_estimator_.feature_importances_, X_train.columns,'RF')


print("BAGG")

print("Best score: %.3f" % grid_search_BaggingR.best_score_)

print("Best parameters set:")

best_parameters = grid_search_BaggingR.best_estimator_.get_params()

for param_name in sorted(parameters_BaggingR.keys()):

    print("\t%s: %r" % (param_name, best_parameters[param_name]))

#plot_feature_importance(grid_search_BaggingR.best_estimator_.feature_importances_, X_train.columns,'BAGG')



# In[12]:



future_days_test=X_train.copy() #createFeatures(future_days)

                      

future_days_test['XGB_PREDICTIONS'] = grid_search.predict(X_train)

future_days_test['GBR_PREDICTIONS'] = grid_search_GBR.predict(X_train)    

future_days_test['RF_PREDICTIONS'] = grid_search_RF.predict(X_train) 

future_days_test['BaggingR_PREDICTIONS'] = grid_search_BaggingR.predict(X_train)

future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['RF_PREDICTIONS'] *0.25 + future_days_test['GBR_PREDICTIONS']*0.35 + future_days_test['BaggingR_PREDICTIONS']*0.25 + future_days_test['XGB_PREDICTIONS']*0.15

#future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['XGB_PREDICTIONS']



future_days_test["Price"] = y_train


future_days_test = future_days_test[-100:]

plot_prediction(future_days_test, ["Price","Stacking_Model_PREDICTIONS"])


#print("Current-Method error: ", np.sqrt(mean_squared_error(df_validation.实际起拍价,df_validation.预测起拍价)))

print("XGB_PREDICTIONS 误差: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.XGB_PREDICTIONS)))

print("GBR_PREDICTIONS 误差: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.GBR_PREDICTIONS)))

print("RF_PREDICTIONS 误差: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.RF_PREDICTIONS)))

print("BaggingR_PREDICTIONS 误差: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.BaggingR_PREDICTIONS)))

print("融合模型误差: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Stacking_Model_PREDICTIONS)))

print("Trival error: ", np.sqrt(mean_squared_error(future_days_test.Price[1:],future_days_test.Price[:-1])))



future_days_test=X_test.copy() #createFeatures(future_days)


future_days_test['XGB_PREDICTIONS'] = grid_search.predict(X_test)

future_days_test['GBR_PREDICTIONS'] = grid_search_GBR.predict(X_test)    

future_days_test['RF_PREDICTIONS'] = grid_search_RF.predict(X_test) 

future_days_test['BaggingR_PREDICTIONS'] = grid_search_BaggingR.predict(X_test)

future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['RF_PREDICTIONS'] *0.25 + future_days_test['GBR_PREDICTIONS']*0.35 + future_days_test['BaggingR_PREDICTIONS']*0.25 + future_days_test['XGB_PREDICTIONS']*0.15

#future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['XGB_PREDICTIONS']


future_days_test["Price"] = y_test

df_h_h = pd.read_csv('液化气历史预测数据_竞拍最高价_20210917.csv')

df_h_h['Date']=pd.to_datetime(df_h_h['Date'],  format='%d/%m/%Y',infer_datetime_format=True)

df_h_h=df_h_h.set_index('Date')

future_days_test["highest"] = df_h_h.竞拍最高价


plot_prediction(future_days_test, ["Price","Stacking_Model_PREDICTIONS"])


#print("Current-Method error: ", np.sqrt(mean_squared_error(df_validation.实际起拍价,df_validation.预测起拍价)))

print("XGB_PREDICTIONS 误差: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.XGB_PREDICTIONS)))

print("GBR_PREDICTIONS 误差: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.GBR_PREDICTIONS)))

print("RF_PREDICTIONS 误差: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.RF_PREDICTIONS)))

print("BaggingR_PREDICTIONS 误差: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.BaggingR_PREDICTIONS)))

print("融合模型误差: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Stacking_Model_PREDICTIONS)))

print("Trival error: ", np.sqrt(mean_squared_error(future_days_test.Price[1:],future_days_test.Price[:-1])))



# In[ ]:






# In[66]:



future_days_test=X_train.copy() #createFeatures(future_days)

                      

future_days_test['XGB_PREDICTIONS'] = grid_search.predict(X_train)

future_days_test['GBR_PREDICTIONS'] = grid_search_GBR.predict(X_train)    

future_days_test['RF_PREDICTIONS'] = grid_search_RF.predict(X_train) 

future_days_test['BaggingR_PREDICTIONS'] = grid_search_BaggingR.predict(X_train)

future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['RF_PREDICTIONS'] *0.25 + future_days_test['GBR_PREDICTIONS']*0.35 + future_days_test['BaggingR_PREDICTIONS']*0.25 + future_days_test['XGB_PREDICTIONS']*0.15

#future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['XGB_PREDICTIONS']



future_days_test["Price"] = y_train


future_days_test = future_days_test[-100:]

plot_prediction(future_days_test, ["Price","Stacking_Model_PREDICTIONS"])


#print("Current-Method error: ", np.sqrt(mean_squared_error(df_validation.实际起拍价,df_validation.预测起拍价)))

print("XGB_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.XGB_PREDICTIONS)))

print("GBR_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.GBR_PREDICTIONS)))

print("RF_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.RF_PREDICTIONS)))

print("BaggingR_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.BaggingR_PREDICTIONS)))

print("Stacking_Model_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Stacking_Model_PREDICTIONS)))

print("Trival error: ", np.sqrt(mean_squared_error(future_days_test.Price[1:],future_days_test.Price[:-1])))



future_days_test=X_test.copy() #createFeatures(future_days)


future_days_test['XGB_PREDICTIONS'] = grid_search.predict(X_test)

future_days_test['GBR_PREDICTIONS'] = grid_search_GBR.predict(X_test)    

future_days_test['RF_PREDICTIONS'] = grid_search_RF.predict(X_test) 

future_days_test['BaggingR_PREDICTIONS'] = grid_search_BaggingR.predict(X_test)

future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['RF_PREDICTIONS'] *0.25 + future_days_test['GBR_PREDICTIONS']*0.35 + future_days_test['BaggingR_PREDICTIONS']*0.25 + future_days_test['XGB_PREDICTIONS']*0.15

#future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['XGB_PREDICTIONS']


future_days_test["Price"] = y_test

df_h_h = pd.read_csv('液化气历史预测数据_竞拍最高价_20210917.csv')

df_h_h['Date']=pd.to_datetime(df_h_h['Date'],  format='%d/%m/%Y',infer_datetime_format=True)

df_h_h=df_h_h.set_index('Date')

future_days_test["highest"] = df_h_h.竞拍最高价


plot_prediction(future_days_test, ["Price","Stacking_Model_PREDICTIONS"])


#print("Current-Method error: ", np.sqrt(mean_squared_error(df_validation.实际起拍价,df_validation.预测起拍价)))

print("XGB_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.XGB_PREDICTIONS)))

print("GBR_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.GBR_PREDICTIONS)))

print("RF_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.RF_PREDICTIONS)))

print("BaggingR_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.BaggingR_PREDICTIONS)))

print("Stacking_Model_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Stacking_Model_PREDICTIONS)))

print("Trival error: ", np.sqrt(mean_squared_error(future_days_test.Price[1:],future_days_test.Price[:-1])))



# In[23]:



X_val=df_product.iloc[-10:,1:]

y_val=df_product.iloc[-10:,0]  


future_days_test=X_val.copy() #createFeatures(future_days)


future_days_test['XGB_PREDICTIONS'] = grid_search.predict(X_val)

future_days_test['GBR_PREDICTIONS'] = grid_search_GBR.predict(X_val)    

future_days_test['RF_PREDICTIONS'] = grid_search_RF.predict(X_val) 

future_days_test['BaggingR_PREDICTIONS'] = grid_search_BaggingR.predict(X_val)

future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['RF_PREDICTIONS'] *0.25 + future_days_test['GBR_PREDICTIONS']*0.35 + future_days_test['BaggingR_PREDICTIONS']*0.25 + future_days_test['XGB_PREDICTIONS']*0.15

#future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['XGB_PREDICTIONS']



future_days_test["Price"] = y_val

df_h_h = pd.read_csv('液化气历史预测数据_竞拍最高价_20210917.csv')

df_h_h['Date']=pd.to_datetime(df_h_h['Date'],  format='%d/%m/%Y',infer_datetime_format=True)

df_h_h=df_h_h.set_index('Date')

#future_days_test["highest"] = df_h_h.竞拍最高价

future_days_test["prev_pred"] = df_h_h.历史预测起拍价.diff()


plot_prediction(future_days_test, ["Price","Stacking_Model_PREDICTIONS","prev_pred"])


#print("Current-Method error: ", np.sqrt(mean_squared_error(df_validation.实际起拍价,df_validation.预测起拍价)))

print("XGB_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.XGB_PREDICTIONS)))

print("GBR_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.GBR_PREDICTIONS)))

print("RF_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.RF_PREDICTIONS)))

print("BaggingR_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.BaggingR_PREDICTIONS)))

print("Stacking_Model_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Stacking_Model_PREDICTIONS)))

#print("LSTM error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Stacking_Model_PREDICTIONS)))

print("Trival error: ", np.sqrt(mean_squared_error(future_days_test.Price[1:],future_days_test.Price[:-1])))


print("current error: ", np.sqrt(mean_squared_error(future_days_test.Price[-10:1],future_days_test.prev_pred[-10:1])))



# In[20]:



future_days_test.prev_pred[-10:]



# In[24]:



future_days_test



# In[ ]:






# In[ ]:






# In[56]:



#复盘算法,观察预测稳定性


def run_last_n_days(n):

    l = df_test_3.shape[0]

    for i in range(1,n+1):

        print("last n days: ",i)


        df_product=df_test_3.iloc[:,:].copy()

        #df_product.iloc[:,0] = df_product.iloc[:,0].diff()


        X_train=df_product.iloc[1:-n,1:]

        y_train=df_product.iloc[1:-n,0]


        X_test=df_product.iloc[-14-n:l-n+1,1:]

        y_test=df_product.iloc[-14-n:l-n+1,0]


        X_val=df_product.iloc[-30:,1:]

        y_val=df_product.iloc[-30:,0]



        estimator = XGBRegressor(random_state=0,

         #objective='reg:linear',

        nthread=4,

        seed=42

        )


        # Round 4 get best parameters 

        parameters =  {

                      'max_depth': [10],

                      'learning_rate': [ 0.4],

                      'min_split_loss':[0,1],

                      'n_estimators': [15],

                      'min_child_weight': [6,7,8],

                      'max_delta_step': [0],

                      'subsample': [0.6,0.7,0.8],

                      'colsample_bytree': [1.0],

                      'reg_alpha':[0,0.2],

                      'reg_lambda': [0,0.05],

                      'scale_pos_weight': [0.05,0.1,]


        }


            #################    


        grid_search = GridSearchCV(

                      estimator=estimator,

                      param_grid=parameters,

            #scoring = 'roc_auc',

            #n_jobs = 10,

            cv = 3,

            verbose=0

            )


        warnings.filterwarnings("ignore")


        grid_search.fit(X_train, y_train)



        ###############################################################################


        estimator_GBR = GradientBoostingRegressor(criterion='mse',random_state=0,)


        parameters_GBR = {

                     'n_estimators':range (99, 102,1), 

                    'learning_rate': [0.1,0.2], 

                    'max_depth':[6,7,8,], 

                    'min_samples_leaf':[1,2,3,4], 

                    'max_features':[5,6,7]

        }


        grid_search_GBR = GridSearchCV(

                estimator=estimator_GBR,

                param_grid=parameters_GBR,

                #scoring = 'roc_auc',

                #n_jobs = 10,

                cv = 3,

                verbose=0

            )


        grid_search_GBR.fit(X_train, y_train)



        ###############################################################################

        estimator_RF = RandomForestRegressor(criterion='mse',random_state=0)


        parameters_RF = {

                    'bootstrap': [False],

                     'max_depth': [10,None],

                     'max_features': ['sqrt'],

                     'min_samples_leaf': [1],

                     'min_samples_split': [2],

                     'n_estimators':[94,100] #range(5,110,1)#[9,10,11],

        }



        grid_search_RF = GridSearchCV(

                estimator=estimator_RF,

                param_grid=parameters_RF,

                #scoring = 'roc_auc',

                #n_jobs = 10,

                cv = 15,

                verbose=0

            )


        grid_search_RF.fit(X_train, y_train)


        ###############################################################################

        estimator_BaggingR = BaggingRegressor(random_state=0,)


        n_samples = X_train.shape[0]

        n_features = X_train.shape[1]


        parameters_BaggingR = {


                  'n_estimators': [ 101,102,103],

                  'max_samples': [1.0,],

                  'max_features': [0.4,1.0],

                  'bootstrap': [ False],

                  'bootstrap_features': [True]


        }



        grid_search_BaggingR = GridSearchCV(

                estimator=estimator_BaggingR,

                param_grid=parameters_BaggingR,

                #scoring = 'roc_auc',

               # n_jobs = 10,

                cv = 22,

                verbose=0

            )

        grid_search_BaggingR.fit(X_train, y_train)


        

        

        print("params used:")

        print("XGB")

        print("Best score: %.3f" % grid_search.best_score_)

        print("Best parameters set:")

        best_parameters = grid_search.best_estimator_.get_params()

        for param_name in parameters.keys():

            print("\t%s: %r" % (param_name, best_parameters[param_name]))

        #plot_feature_importance(grid_search.best_estimator_.feature_importances_, X_train.columns,'XGB')


        print("GBR")

        print("Best score: %.3f" % grid_search_GBR.best_score_)

        print("Best parameters set:")

        best_parameters = grid_search_GBR.best_estimator_.get_params()

        for param_name in parameters_GBR.keys():

            print("\t%s: %r" % (param_name, best_parameters[param_name]))

        #plot_feature_importance(grid_search_GBR.best_estimator_.feature_importances_, X_train.columns,'GBR')


        print("RF")

        print("Best score: %.3f" % grid_search_RF.best_score_)

        print("Best parameters set:")

        best_parameters = grid_search_RF.best_estimator_.get_params()

        for param_name in parameters_RF.keys():

            print("\t%s: %r" % (param_name, best_parameters[param_name]))

        #plot_feature_importance(grid_search_RF.best_estimator_.feature_importances_, X_train.columns,'RF')


        print("BAGG")

        print("Best score: %.3f" % grid_search_BaggingR.best_score_)

        print("Best parameters set:")

        best_parameters = grid_search_BaggingR.best_estimator_.get_params()

        for param_name in sorted(parameters_BaggingR.keys()):

            print("\t%s: %r" % (param_name, best_parameters[param_name]))


        



        print("show traing err")

        future_days_test=X_train.copy() #createFeatures(future_days)


        future_days_test['XGB_PREDICTIONS'] = grid_search.predict(X_train)

        future_days_test['GBR_PREDICTIONS'] = grid_search_GBR.predict(X_train)    

        future_days_test['RF_PREDICTIONS'] = grid_search_RF.predict(X_train) 

        future_days_test['BaggingR_PREDICTIONS'] = grid_search_BaggingR.predict(X_train)

        future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['RF_PREDICTIONS'] *0.25 + future_days_test['GBR_PREDICTIONS']*0.35 + future_days_test['BaggingR_PREDICTIONS']*0.25 + future_days_test['XGB_PREDICTIONS']*0.15


        future_days_test["Price"] = y_train


        future_days_test = future_days_test[-100:]

        plot_prediction(future_days_test, ["Price","Stacking_Model_PREDICTIONS"])


        print("XGB_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.XGB_PREDICTIONS)))

        print("GBR_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.GBR_PREDICTIONS)))

        print("RF_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.RF_PREDICTIONS)))

        print("BaggingR_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.BaggingR_PREDICTIONS)))

        print("Stacking_Model_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Stacking_Model_PREDICTIONS)))

        print("Trival error: ", np.sqrt(mean_squared_error(future_days_test.Price[1:],future_days_test.Price[:-1])))

        print("Trival error2: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Price*0)))




        print("show testing err")

        future_days_test=X_test.copy() #createFeatures(future_days)


        future_days_test['XGB_PREDICTIONS'] = grid_search.predict(X_test)

        future_days_test['GBR_PREDICTIONS'] = grid_search_GBR.predict(X_test)    

        future_days_test['RF_PREDICTIONS'] = grid_search_RF.predict(X_test) 

        future_days_test['BaggingR_PREDICTIONS'] = grid_search_BaggingR.predict(X_test)

        future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['RF_PREDICTIONS'] *0.25 + future_days_test['GBR_PREDICTIONS']*0.35 + future_days_test['BaggingR_PREDICTIONS']*0.25 + future_days_test['XGB_PREDICTIONS']*0.15

        #future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['XGB_PREDICTIONS']


        future_days_test["Price"] = y_test

        df_h_h = pd.read_csv('液化气历史预测数据_竞拍最高价_20210917.csv')

        df_h_h['Date']=pd.to_datetime(df_h_h['Date'],  format='%d/%m/%Y',infer_datetime_format=True)

        df_h_h=df_h_h.set_index('Date')

        future_days_test["highest"] = df_h_h.竞拍最高价


        plot_prediction(future_days_test, ["Price","Stacking_Model_PREDICTIONS"])


        print("XGB_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.XGB_PREDICTIONS)))

        print("GBR_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.GBR_PREDICTIONS)))

        print("RF_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.RF_PREDICTIONS)))

        print("BaggingR_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.BaggingR_PREDICTIONS)))

        print("Stacking_Model_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Stacking_Model_PREDICTIONS)))

        print("Trival error: ", np.sqrt(mean_squared_error(future_days_test.Price[1:],future_days_test.Price[:-1])))

        print("Trival error2: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Price*0)))




        print("show validation err")

        future_days_test=X_val.copy() #createFeatures(future_days)


        future_days_test['XGB_PREDICTIONS'] = grid_search.predict(X_val)

        future_days_test['GBR_PREDICTIONS'] = grid_search_GBR.predict(X_val)    

        future_days_test['RF_PREDICTIONS'] = grid_search_RF.predict(X_val) 

        future_days_test['BaggingR_PREDICTIONS'] = grid_search_BaggingR.predict(X_val)

        future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['RF_PREDICTIONS'] *0.25 + future_days_test['GBR_PREDICTIONS']*0.35 + future_days_test['BaggingR_PREDICTIONS']*0.25 + future_days_test['XGB_PREDICTIONS']*0.15


        future_days_test["Price"] = y_val


        #future_days_test = future_days_test[-100:]

        plot_prediction(future_days_test, ["Price","Stacking_Model_PREDICTIONS"])


        print("XGB_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.XGB_PREDICTIONS)))

        print("GBR_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.GBR_PREDICTIONS)))

        print("RF_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.RF_PREDICTIONS)))

        print("BaggingR_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.BaggingR_PREDICTIONS)))

        print("Stacking_Model_PREDICTIONS error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Stacking_Model_PREDICTIONS)))

        print("Trival error: ", np.sqrt(mean_squared_error(future_days_test.Price[1:],future_days_test.Price[:-1])))

        print("Trival error2: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Price*0)))


        print("last 14 days:")

        print("XGB_PREDICTIONS: ", future_days_test.XGB_PREDICTIONS.values[-14:])

        print("GBR_PREDICTIONS: ", future_days_test.GBR_PREDICTIONS.values[-14:])

        print("RF_PREDICTIONS: ", future_days_test.RF_PREDICTIONS.values[-14:])

        print("BaggingR_PREDICTIONS: ", future_days_test.BaggingR_PREDICTIONS.values[-14:])

        print("Stacking_Model_PREDICTIONS: ", future_days_test.Stacking_Model_PREDICTIONS.values[-14:])

        

        print("")

        print("")

        print("")

        print("")



# In[57]:



run_last_n_days(14)



# In[ ]:






# In[ ]:






# In[ ]:






# In[ ]:






# In[ ]:






# In[ ]:






# In[ ]:






# In[ ]:






# In[15]:



df_h_h = pd.read_csv('液化气历史预测数据_竞拍最高价_20210917.csv')

df_h_h['Date']=pd.to_datetime(df_h_h['Date'],  format='%d/%m/%Y',infer_datetime_format=True)

df_h_h=df_h_h.set_index('Date')



# In[18]:



future_days_test["highest"] = df_h_h.竞拍最高价



# In[ ]:



df_h_h



# In[80]:



future_days_test=X_train.copy() #createFeatures(future_days)

                      

future_days_test['XGB_PREDICTIONS'] = grid_search.predict(X_train)

future_days_test['GBR_PREDICTIONS'] = grid_search_GBR.predict(X_train)    

future_days_test['RF_PREDICTIONS'] = grid_search_RF.predict(X_train) 

future_days_test['BaggingR_PREDICTIONS'] = grid_search_BaggingR.predict(X_train)

future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['RF_PREDICTIONS'] *0.25 + future_days_test['GBR_PREDICTIONS']*0.35 + \

future_days_test['BaggingR_PREDICTIONS']*0.25 + future_days_test['XGB_PREDICTIONS']*0.15


future_days_test["Price"] = y_train


future_days_test = future_days_test[-100:]

plot_prediction(future_days_test, ["Price","Stacking_Model_PREDICTIONS"])


#print("Current-Method error: ", np.sqrt(mean_squared_error(df_validation.实际起拍价,df_validation.预测起拍价)))

print("LSTM error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Stacking_Model_PREDICTIONS)))

print("Trival error: ", np.sqrt(mean_squared_error(future_days_test.Price[1:],future_days_test.Price[:-1])))




future_days_test=X_test.copy() #createFeatures(future_days)


future_days_test['XGB_PREDICTIONS'] = grid_search.predict(X_test)

future_days_test['GBR_PREDICTIONS'] = grid_search_GBR.predict(X_test)    

future_days_test['RF_PREDICTIONS'] = grid_search_RF.predict(X_test) 

future_days_test['BaggingR_PREDICTIONS'] = grid_search_BaggingR.predict(X_test)

future_days_test['Stacking_Model_PREDICTIONS']=future_days_test['RF_PREDICTIONS'] *0.25 + future_days_test['GBR_PREDICTIONS']*0.35 + \

future_days_test['BaggingR_PREDICTIONS']*0.25 + future_days_test['XGB_PREDICTIONS']*0.15


future_days_test["Price"] = y_test


plot_prediction(future_days_test, ["Price","Stacking_Model_PREDICTIONS"])


#print("Current-Method error: ", np.sqrt(mean_squared_error(df_validation.实际起拍价,df_validation.预测起拍价)))

print("LSTM error: ", np.sqrt(mean_squared_error(future_days_test.Price,future_days_test.Stacking_Model_PREDICTIONS)))

print("Trival error: ", np.sqrt(mean_squared_error(future_days_test.Price[1:],future_days_test.Price[:-1])))



# In[59]:



#np.sqrt(mean_squared_error(future_days_test.Stacking_Model_PREDICTIONS[:-1], y_test[:-1]))



# In[ ]:






# In[ ]:






# In[41]:



#grid_search.predict(X_train)



# # 导入历史预测数据和最高价


# In[13]:



#Import the data and parse dates.

df_h_h = pd.read_csv('液化气历史预测数据_竞拍最高价_20210917.csv')

df_h_h['Date']=pd.to_datetime(df_h_h['Date'],  format='%d/%m/%Y',infer_datetime_format=True)

df_h_h=df_h_h.set_index('Date')

#df_h_h.tail(5)

A=df_h_h.iloc[-14:,0]

B=df_h_h.iloc[-14:,1]



# In[20]:



df_h_h



# # 日度价格预测结果展示


# In[14]:



#显示日度价格预测

import plotly.offline as pyo

import plotly.graph_objs as go

import numpy as np


np.random.seed(56)

final_results_C=future_days_test

final_results_C=round(final_results_C,2)


df_product.iloc[-1,0] = np.nan

df_product


# create traces

trace0 = go.Scatter(

    x = final_results_C.index,

    y = final_results_C['Stacking_Model_PREDICTIONS'],

    text=final_results_C['Stacking_Model_PREDICTIONS'],

    textposition='top center',

    #animation_frame=final_results_C.index,

    #fmt='.1f',

    #marker=dict(size=0.1*final_results_A['绝对误差'],color="#9400D3"),

    mode = 'lines+markers+text',

    #hoverformat ="%{2}f",

    name = '融合模型预测值'

)

    


trace1 = go.Scatter(

    x = final_results_C.index,

    y = final_results_C['BaggingR_PREDICTIONS'],

    text=final_results_C['BaggingR_PREDICTIONS'],

    textposition='top center',

    #animation_frame=final_results_C.index,

    #fmt='.1f',

    #marker=dict(size=0.1*final_results_A['绝对误差'],color="#9400D3"),

    mode = 'lines+markers+text',

    #hoverformat ="%{2}f",

    name = 'Bagging模型预测值'

)


trace2 = go.Scatter(

    x = final_results_C.index,

    y = final_results_C['RF_PREDICTIONS'],

    text=final_results_C['RF_PREDICTIONS'],

    textposition='top center',

    #animation_frame=final_results_C.index,

    #fmt='.1f',

    #marker=dict(size=0.1*final_results_A['绝对误差'],color="#9400D3"),

    mode = 'lines+markers+text',

    #hoverformat ="%{2}f",

    name = 'RF模型预测值'

)


trace3 = go.Scatter(

    x = final_results_C.index,

    y = round(final_results_C['XGB_PREDICTIONS'],2),

    text=round(final_results_C['XGB_PREDICTIONS'],2),

    textposition='top center',

    #animation_frame=final_results_C.index,

    #fmt='.1f',

    #marker=dict(size=0.1*final_results_A['绝对误差'],color="#9400D3"),

    mode = 'lines+markers+text',

    #hoverformat ="%{2}f",

    name = 'XGB模型预测值'

)


trace4 = go.Scatter(

    x = final_results_C.index,

    y = final_results_C['GBR_PREDICTIONS'],

    text=final_results_C['GBR_PREDICTIONS'],

    textposition='top center',

    #animation_frame=final_results_C.index,

    #fmt='.1f',

    #marker=dict(size=0.1*final_results_A['绝对误差'],color="#9400D3"),

    mode = 'lines+markers+text',

    #hoverformat ="%{2}f",

    name = 'GBR模型预测值'

)


trace5 = go.Scatter(

    x = df_product.iloc[-14:,1:].index,

    y = df_product.iloc[-14:,0],

    text=df_product.iloc[-14:,0],

    textposition='top center',

    mode = 'lines+markers+text',

    name = '价格真实值'

)



trace6 = go.Scatter(

    x = df_h_h.iloc[-14:,0].index,

    y = df_h_h.iloc[-14:,0],

    text=df_h_h.iloc[-14:,0],

    textposition='top center',

    mode = 'lines+markers+text',

    name = '历史预测起拍价'

)


trace7 = go.Scatter(

    x = df_h_h.iloc[-14:,0].index,

    y = df_h_h.iloc[-14:,1],

    text=df_h_h.iloc[-14:,1],

    textposition='top center',

    mode = 'lines+markers+text',

    name = '竞拍最高价'

)


# trace1, trace2,trace3


data = [trace0, trace1,trace2,trace3, trace4,trace5,trace6,trace7]  # assign traces to data

layout = go.Layout(

    title = '日度价格预测值',

    hovermode='closest',

    #hoverformat ="%{2}f",

)

fig = go.Figure(data=data,layout=layout)

fig.update_traces(textposition='top center')

fig.show()


pyo.plot(fig, filename='叠加模型_最优模型日度价格预测值.html')



# In[ ]:






# In[ ]:






# In[21]:



df_test.Price[1368:].values




preds.Stacking_Model_PREDICTIONS-df_test.Price[1368:].values




preds = future_days_test.copy()


preds


人工智能学习文件qoiqwetoiu的评论 (共 条)

分享到微博请遵守国家法律