我目前正在使用PyTorch构建LSTM网络,用于预测时间序列数据。在参考Roman的博客文章后,我为单变量时间序列数据实现了一个简单的LSTM,具体内容请参见以下类定义。然而,自从几天前我试图添加更多输入数据特征以来,如一天中的小时数、一周中的日期和年份等等,我就陷入困境。
class Model(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(Model, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.lstm = nn.LSTMCell(self.input_size, self.hidden_size)
self.linear = nn.Linear(self.hidden_size, self.output_size)
def forward(self, input, future=0, y=None):
outputs = []
# reset the state of LSTM
# the state is kept till the end of the sequence
h_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)
c_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)
for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
h_t, c_t = self.lstm(input_t, (h_t, c_t))
output = self.linear(h_t)
outputs += [output]
for i in range(future):
if y is not None and random.random() > 0.5:
output = y[:, [i]] # teacher forcing
h_t, c_t = self.lstm(output, (h_t, c_t))
output = self.linear(h_t)
outputs += [output]
outputs = torch.stack(outputs, 1).squeeze(2)
return outputs
class Optimization:
"A helper class to train, test and diagnose the LSTM"
def __init__(self, model, loss_fn, optimizer, scheduler):
self.model = model
self.loss_fn = loss_fn
self.optimizer = optimizer
self.scheduler = scheduler
self.train_losses = []
self.val_losses = []
self.futures = []
@staticmethod
def generate_batch_data(x, y, batch_size):
for batch, i in enumerate(range(0, len(x) - batch_size, batch_size)):
x_batch = x[i : i + batch_size]
y_batch = y[i : i + batch_size]
yield x_batch, y_batch, batch
def train(
self,
x_train,
y_train,
x_val=None,
y_val=None,
batch_size=100,
n_epochs=20,
dropout=0.2,
do_teacher_forcing=None,
):
seq_len = x_train.shape[1]
for epoch in range(n_epochs):
start_time = time.time()
self.futures = []
train_loss = 0
for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):
y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing)
self.optimizer.zero_grad()
loss = self.loss_fn(y_pred, y_batch)
loss.backward()
self.optimizer.step()
train_loss += loss.item()
self.scheduler.step()
train_loss /= batch
self.train_losses.append(train_loss)
self._validation(x_val, y_val, batch_size)
elapsed = time.time() - start_time
print(
"Epoch %d Train loss: %.2f. Validation loss: %.2f. Avg future: %.2f. Elapsed time: %.2fs."
% (epoch + 1, train_loss, self.val_losses[-1], np.average(self.futures), elapsed)
)
def _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing):
if do_teacher_forcing:
future = random.randint(1, int(seq_len) / 2)
limit = x_batch.size(1) - future
y_pred = self.model(x_batch[:, :limit], future=future, y=y_batch[:, limit:])
else:
future = 0
y_pred = self.model(x_batch)
self.futures.append(future)
return y_pred
def _validation(self, x_val, y_val, batch_size):
if x_val is None or y_val is None:
return
with torch.no_grad():
val_loss = 0
batch = 1
for x_batch, y_batch, batch in self.generate_batch_data(x_val, y_val, batch_size):
y_pred = self.model(x_batch)
loss = self.loss_fn(y_pred, y_batch)
val_loss += loss.item()
val_loss /= batch
self.val_losses.append(val_loss)
def evaluate(self, x_test, y_test, batch_size, future=1):
with torch.no_grad():
test_loss = 0
actual, predicted = [], []
for x_batch, y_batch, batch in self.generate_batch_data(x_test, y_test, batch_size):
y_pred = self.model(x_batch, future=future)
y_pred = (
y_pred[:, -len(y_batch) :] if y_pred.shape[1] > y_batch.shape[1] else y_pred
)
loss = self.loss_fn(y_pred, y_batch)
test_loss += loss.item()
actual += torch.squeeze(y_batch[:, -1]).data.cpu().numpy().tolist()
predicted += torch.squeeze(y_pred[:, -1]).data.cpu().numpy().tolist()
test_loss /= batch
return actual, predicted, test_loss
def plot_losses(self):
plt.plot(self.train_losses, label="Training loss")
plt.plot(self.val_losses, label="Validation loss")
plt.legend()
plt.title("Losses")
在将数据提供给我的LSTM网络之前,我使用一些辅助函数来拆分和格式化数据。
def to_dataframe(actual, predicted):
return pd.DataFrame({"value": actual, "prediction": predicted})
def inverse_transform(scaler, df, columns):
for col in columns:
df[col] = scaler.inverse_transform(df[col])
return df
def split_sequences(sequences, n_steps):
X, y = list(), list()
for i in range(len(sequences)):
# find the end of this pattern
end_ix = i + n_steps
# check if we are beyond the dataset
if end_ix > len(sequences):
break
# gather input and output parts of the pattern
seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
def train_val_test_split_new(df, test_ratio=0.2, seq_len = 100):
y = df['value']
X = df.drop(columns = ['value'])
tarin_ratio = 1 - test_ratio
val_ratio = 1 - ((train_ratio - test_ratio) / train_ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_ratio)
return X_train, y_train, X_val, y_val, X_test, y_test
我使用以下数据框来训练我的模型。
# df_train
value weekday monthday hour
timestamp
2014-07-01 00:00:00 10844 1 1 0
2014-07-01 00:30:00 8127 1 1 0
2014-07-01 01:00:00 6210 1 1 1
2014-07-01 01:30:00 4656 1 1 1
2014-07-01 02:00:00 3820 1 1 2
... ... ... ... ...
2015-01-31 21:30:00 24670 5 31 21
2015-01-31 22:00:00 25721 5 31 22
2015-01-31 22:30:00 27309 5 31 22
2015-01-31 23:00:00 26591 5 31 23
2015-01-31 23:30:00 26288 5 31 23
10320 rows × 4 columns
# x_train
weekday monthday hour
timestamp
2014-08-26 16:30:00 1 26 16
2014-08-18 16:30:00 0 18 16
2014-10-22 20:00:00 2 22 20
2014-12-10 08:00:00 2 10 8
2014-07-27 22:00:00 6 27 22
... ... ... ...
2014-08-24 05:30:00 6 24 5
2014-11-24 12:00:00 0 24 12
2014-12-18 06:00:00 3 18 6
2014-07-27 17:00:00 6 27 17
2014-12-05 21:00:00 4 5 21
6192 rows × 3 columns
# y_train
timestamp
2014-08-26 16:30:00 14083
2014-08-18 16:30:00 14465
2014-10-22 20:00:00 25195
2014-12-10 08:00:00 21348
2014-07-27 22:00:00 16356
...
2014-08-24 05:30:00 2948
2014-11-24 12:00:00 16292
2014-12-18 06:00:00 7029
2014-07-27 17:00:00 18883
2014-12-05 21:00:00 26284
Name: value, Length: 6192, dtype: int64
在将时间序列数据进行转换和分割后,X和y的训练数据集如下所示:
X_data shape is (6093, 100, 3)
y_data shape is (6093,)
tensor([[[-1.0097, 1.1510, 0.6508],
[-1.5126, 0.2492, 0.6508],
[-0.5069, 0.7001, 1.2238],
...,
[ 1.5044, -1.4417, -1.6413],
[ 1.0016, -0.0890, 0.7941],
[ 1.5044, -0.9908, -0.2087]],
[[-1.5126, 0.2492, 0.6508],
[-0.5069, 0.7001, 1.2238],
[-0.5069, -0.6526, -0.4952],
...,
[ 1.0016, -0.0890, 0.7941],
[ 1.5044, -0.9908, -0.2087],
[ 0.4988, 0.5874, 0.5076]],
[[-0.5069, 0.7001, 1.2238],
[-0.5069, -0.6526, -0.4952],
[ 1.5044, 1.2637, 1.5104],
...,
[ 1.5044, -0.9908, -0.2087],
[ 0.4988, 0.5874, 0.5076],
[ 0.4988, 0.5874, -0.6385]],
...,
[[ 1.0016, 0.9255, -1.2115],
[-1.0097, -0.9908, 1.0806],
[-0.0041, 0.8128, 0.3643],
...,
[ 1.5044, 0.9255, -0.9250],
[-1.5126, 0.9255, 0.0778],
[-0.0041, 0.2492, -0.7818]],
[[-1.0097, -0.9908, 1.0806],
[-0.0041, 0.8128, 0.3643],
[-0.5069, 1.3765, -0.0655],
...,
[-1.5126, 0.9255, 0.0778],
[-0.0041, 0.2492, -0.7818],
[ 1.5044, 1.2637, 0.7941]],
[[-0.0041, 0.8128, 0.3643],
[-0.5069, 1.3765, -0.0655],
[-0.0041, -1.6672, -0.4952],
...,
[-0.0041, 0.2492, -0.7818],
[ 1.5044, 1.2637, 0.7941],
[ 0.4988, -1.2163, 1.3671]]])
tensor([ 0.4424, 0.1169, 0.0148, ..., -1.1653, 0.5394, 1.6037])
最后,为了检查所有这些训练、验证和测试数据集的维度是否正确,我打印出它们的形状。
train shape is: torch.Size([6093, 100, 3])
train label shape is: torch.Size([6093])
val shape is: torch.Size([1965, 100, 3])
val label shape is: torch.Size([1965])
test shape is: torch.Size([1965, 100, 3])
test label shape is: torch.Size([1965])
当我尝试按照以下方式构建模型时,出现了运行时错误指向不一致的输入尺寸。
model_params = {'train_ratio': 0.8,
'validation_ratio': 0.2,
'sequence_length': 100,
'teacher_forcing': False,
'dropout_rate': 0.2,
'batch_size': 100,
'num_of_epochs': 5,
'hidden_size': 24,
'n_features': 3,
'learning_rate': 1e-3
}
train_ratio = model_params['train_ratio']
val_ratio = model_params['validation_ratio']
seq_len = model_params['sequence_length']
teacher_forcing = model_params['teacher_forcing']
dropout_rate = model_params['dropout_rate']
batch_size = model_params['batch_size']
n_epochs = model_params['num_of_epochs']
hidden_size = model_params['hidden_size']
n_features = model_params['n_features']
lr = model_params['learning_rate']
model = Model(input_size=n_features, hidden_size=hidden_size, output_size=1)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
optimization = Optimization(model, loss_fn, optimizer, scheduler)
start_time = datetime.now()
optimization.train(x_train, y_train, x_val, y_val,
batch_size=batch_size,
n_epochs=n_epochs,
dropout=dropout_rate,
do_teacher_forcing=teacher_forcing)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-192-6fc406c0113d> in <module>
6
7 start_time = datetime.now()
----> 8 optimization.train(x_train, y_train, x_val, y_val,
9 batch_size=batch_size,
10 n_epochs=n_epochs,
<ipython-input-189-c18d20430910> in train(self, x_train, y_train, x_val, y_val, batch_size, n_epochs, dropout, do_teacher_forcing)
68 train_loss = 0
69 for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):
---> 70 y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing)
71 self.optimizer.zero_grad()
72 loss = self.loss_fn(y_pred, y_batch)
<ipython-input-189-c18d20430910> in _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing)
93 else:
94 future = 0
---> 95 y_pred = self.model(x_batch)
96 self.futures.append(future)
97 return y_pred
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-189-c18d20430910> in forward(self, input, future, y)
17
18 for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
---> 19 h_t, c_t = self.lstm(input_t, (h_t, c_t))
20 output = self.linear(h_t)
21 outputs += [output]
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~\Anaconda3\lib\site-packages\torch\nn\modules\rnn.py in forward(self, input, hx)
963
964 def forward(self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:
--> 965 self.check_forward_input(input)
966 if hx is None:
967 zeros = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)
~\Anaconda3\lib\site-packages\torch\nn\modules\rnn.py in check_forward_input(self, input)
789 def check_forward_input(self, input: Tensor) -> None:
790 if input.size(1) != self.input_size:
--> 791 raise RuntimeError(
792 "input has inconsistent input_size: got {}, expected {}".format(
793 input.size(1), self.input_size))
RuntimeError: input has inconsistent input_size: got 1, expected 3
我怀疑我的当前LSTM模型类不支持具有多个特征的数据,并且我最近一直在尝试不同的方法,但目前还没有成功。欢迎分享您的想法或指引我正确的方向来解决这个问题。
像 @stackoverflowuser2010 建议的那样,在抛出错误之前,我打印出了输入到前向步骤的张量 input_t、h_t 和 c_t 的形状。
input_t
torch.Size([100, 1, 3])
h_t
torch.Size([100, 24])
c_t
torch.Size([100, 24])
19 h_t, c_t = self.lstm(input_t, (h_t, c_t))
是错误的位置。你是否打印出形状并查看它们是否与所需的匹配? - stackoverflowuser2010