我建议看一下Yeo-Johnson转换,它是Box-Cox的类似物,但可以处理负值,并且在scikit-learn库中得到了很好的实现,具有易于反向转换的特点。
我正在与fbprophet库(预测)一起使用它:
from sklearn.preprocessing import PowerTransformer
from fbprophet import Prophet
from fbprophet.plot import plot_cross_validation_metric
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics
import numpy as np
import pandas as pd
def inverse_transform(df, pt_instance, features):
for feature in features:
df[feature] = pt_instance.inverse_transform(np.array(df[feature]).reshape(-1,1))
return df
pt = PowerTransformer(method='yeo-johnson')
train_df_transformed = train_df.copy()
train_df_transformed['y'] = pt.fit_transform(np.array(train_df['y']).reshape(-1,1))
model = Prophet(**hyperparams)
model.fit(train_df_transformed)
df_cv = cross_validation(model, initial='14 days', period='3 days', horizon='1 day', parallel="processes")
df_cv = inverse_transform(df_cv, pt, ['yhat','yhat_lower','yhat_upper'])
df_cv = pd.merge(df_cv.drop(columns=['y']),train_df, left_on='ds', right_on='ds')
df_p = performance_metrics(df_cv, metrics=['mae','mape'], rolling_window=1)
fig1 = plot_cross_validation_metric(df_cv, metric='mape')
fig2 = plot_cross_validation_metric(df_cv, metric='mae')