使用PyTorch为时间序列预测提供多个输入的LSTM馈送

Question

使用PyTorch为时间序列预测提供多个输入的LSTM馈送

machine-learningdeep-learningtime-seriespytorchlstm

5

我目前正在使用PyTorch构建LSTM网络，用于预测时间序列数据。在参考Roman的博客文章后，我为单变量时间序列数据实现了一个简单的LSTM，具体内容请参见以下类定义。然而，自从几天前我试图添加更多输入数据特征以来，如一天中的小时数、一周中的日期和年份等等，我就陷入困境。

class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Model, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.lstm = nn.LSTMCell(self.input_size, self.hidden_size)
        self.linear = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, future=0, y=None):
        outputs = []

        # reset the state of LSTM
        # the state is kept till the end of the sequence
        h_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)
        c_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)

        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
            h_t, c_t = self.lstm(input_t, (h_t, c_t))
            output = self.linear(h_t)
            outputs += [output]

        for i in range(future):
            if y is not None and random.random() > 0.5:
                output = y[:, [i]]  # teacher forcing
            h_t, c_t = self.lstm(output, (h_t, c_t))
            output = self.linear(h_t)
            outputs += [output]
        outputs = torch.stack(outputs, 1).squeeze(2)
        return outputs


class Optimization:
    "A helper class to train, test and diagnose the LSTM"

    def __init__(self, model, loss_fn, optimizer, scheduler):
        self.model = model
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.train_losses = []
        self.val_losses = []
        self.futures = []

    @staticmethod
    def generate_batch_data(x, y, batch_size):
        for batch, i in enumerate(range(0, len(x) - batch_size, batch_size)):
            x_batch = x[i : i + batch_size]
            y_batch = y[i : i + batch_size]
            yield x_batch, y_batch, batch

    def train(
        self,
        x_train,
        y_train,
        x_val=None,
        y_val=None,
        batch_size=100,
        n_epochs=20,
        dropout=0.2,
        do_teacher_forcing=None,
    ):
        seq_len = x_train.shape[1]
        for epoch in range(n_epochs):
            start_time = time.time()
            self.futures = []

            train_loss = 0
            for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):
                y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing)
                self.optimizer.zero_grad()
                loss = self.loss_fn(y_pred, y_batch)
                loss.backward()
                self.optimizer.step()
                train_loss += loss.item()
            self.scheduler.step()
            train_loss /= batch
            self.train_losses.append(train_loss)

            self._validation(x_val, y_val, batch_size)

            elapsed = time.time() - start_time
            print(
                "Epoch %d Train loss: %.2f. Validation loss: %.2f. Avg future: %.2f. Elapsed time: %.2fs."
                % (epoch + 1, train_loss, self.val_losses[-1], np.average(self.futures), elapsed)
            )

    def _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing):
        if do_teacher_forcing:
            future = random.randint(1, int(seq_len) / 2)
            limit = x_batch.size(1) - future
            y_pred = self.model(x_batch[:, :limit], future=future, y=y_batch[:, limit:])
        else:
            future = 0
            y_pred = self.model(x_batch)
        self.futures.append(future)
        return y_pred

    def _validation(self, x_val, y_val, batch_size):
        if x_val is None or y_val is None:
            return
        with torch.no_grad():
            val_loss = 0
            batch = 1
            for x_batch, y_batch, batch in self.generate_batch_data(x_val, y_val, batch_size):
                y_pred = self.model(x_batch)
                loss = self.loss_fn(y_pred, y_batch)
                val_loss += loss.item()
            val_loss /= batch
            self.val_losses.append(val_loss)

    def evaluate(self, x_test, y_test, batch_size, future=1):
        with torch.no_grad():
            test_loss = 0
            actual, predicted = [], []
            for x_batch, y_batch, batch in self.generate_batch_data(x_test, y_test, batch_size):
                y_pred = self.model(x_batch, future=future)
                y_pred = (
                    y_pred[:, -len(y_batch) :] if y_pred.shape[1] > y_batch.shape[1] else y_pred
                )
                loss = self.loss_fn(y_pred, y_batch)
                test_loss += loss.item()
                actual += torch.squeeze(y_batch[:, -1]).data.cpu().numpy().tolist()
                predicted += torch.squeeze(y_pred[:, -1]).data.cpu().numpy().tolist()
            test_loss /= batch
            return actual, predicted, test_loss

    def plot_losses(self):
        plt.plot(self.train_losses, label="Training loss")
        plt.plot(self.val_losses, label="Validation loss")
        plt.legend()
        plt.title("Losses")

在将数据提供给我的LSTM网络之前，我使用一些辅助函数来拆分和格式化数据。

def to_dataframe(actual, predicted):
    return pd.DataFrame({"value": actual, "prediction": predicted})

def inverse_transform(scaler, df, columns):
    for col in columns:
        df[col] = scaler.inverse_transform(df[col])
    return df

def split_sequences(sequences, n_steps):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)


def train_val_test_split_new(df, test_ratio=0.2, seq_len = 100):
    y = df['value']
    X = df.drop(columns = ['value'])
    tarin_ratio = 1 - test_ratio
    val_ratio = 1 - ((train_ratio - test_ratio) / train_ratio)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_ratio)

    return X_train, y_train, X_val, y_val, X_test, y_test

我使用以下数据框来训练我的模型。

# df_train 
value   weekday monthday    hour
timestamp               
2014-07-01 00:00:00 10844   1   1   0
2014-07-01 00:30:00 8127    1   1   0
2014-07-01 01:00:00 6210    1   1   1
2014-07-01 01:30:00 4656    1   1   1
2014-07-01 02:00:00 3820    1   1   2
... ... ... ... ...
2015-01-31 21:30:00 24670   5   31  21
2015-01-31 22:00:00 25721   5   31  22
2015-01-31 22:30:00 27309   5   31  22
2015-01-31 23:00:00 26591   5   31  23
2015-01-31 23:30:00 26288   5   31  23
10320 rows × 4 columns

# x_train 
weekday monthday    hour
timestamp           
2014-08-26 16:30:00 1   26  16
2014-08-18 16:30:00 0   18  16
2014-10-22 20:00:00 2   22  20
2014-12-10 08:00:00 2   10  8
2014-07-27 22:00:00 6   27  22
... ... ... ...
2014-08-24 05:30:00 6   24  5
2014-11-24 12:00:00 0   24  12
2014-12-18 06:00:00 3   18  6
2014-07-27 17:00:00 6   27  17
2014-12-05 21:00:00 4   5   21
6192 rows × 3 columns

# y_train 
timestamp
2014-08-26 16:30:00    14083
2014-08-18 16:30:00    14465
2014-10-22 20:00:00    25195
2014-12-10 08:00:00    21348
2014-07-27 22:00:00    16356
                       ...  
2014-08-24 05:30:00     2948
2014-11-24 12:00:00    16292
2014-12-18 06:00:00     7029
2014-07-27 17:00:00    18883
2014-12-05 21:00:00    26284
Name: value, Length: 6192, dtype: int64

在将时间序列数据进行转换和分割后，X和y的训练数据集如下所示:

X_data shape is (6093, 100, 3)
y_data shape is (6093,)
tensor([[[-1.0097,  1.1510,  0.6508],
         [-1.5126,  0.2492,  0.6508],
         [-0.5069,  0.7001,  1.2238],
         ...,
         [ 1.5044, -1.4417, -1.6413],
         [ 1.0016, -0.0890,  0.7941],
         [ 1.5044, -0.9908, -0.2087]],

        [[-1.5126,  0.2492,  0.6508],
         [-0.5069,  0.7001,  1.2238],
         [-0.5069, -0.6526, -0.4952],
         ...,
         [ 1.0016, -0.0890,  0.7941],
         [ 1.5044, -0.9908, -0.2087],
         [ 0.4988,  0.5874,  0.5076]],

        [[-0.5069,  0.7001,  1.2238],
         [-0.5069, -0.6526, -0.4952],
         [ 1.5044,  1.2637,  1.5104],
         ...,
         [ 1.5044, -0.9908, -0.2087],
         [ 0.4988,  0.5874,  0.5076],
         [ 0.4988,  0.5874, -0.6385]],

        ...,

        [[ 1.0016,  0.9255, -1.2115],
         [-1.0097, -0.9908,  1.0806],
         [-0.0041,  0.8128,  0.3643],
         ...,
         [ 1.5044,  0.9255, -0.9250],
         [-1.5126,  0.9255,  0.0778],
         [-0.0041,  0.2492, -0.7818]],

        [[-1.0097, -0.9908,  1.0806],
         [-0.0041,  0.8128,  0.3643],
         [-0.5069,  1.3765, -0.0655],
         ...,
         [-1.5126,  0.9255,  0.0778],
         [-0.0041,  0.2492, -0.7818],
         [ 1.5044,  1.2637,  0.7941]],

        [[-0.0041,  0.8128,  0.3643],
         [-0.5069,  1.3765, -0.0655],
         [-0.0041, -1.6672, -0.4952],
         ...,
         [-0.0041,  0.2492, -0.7818],
         [ 1.5044,  1.2637,  0.7941],
         [ 0.4988, -1.2163,  1.3671]]])
tensor([ 0.4424,  0.1169,  0.0148,  ..., -1.1653,  0.5394,  1.6037])

最后，为了检查所有这些训练、验证和测试数据集的维度是否正确，我打印出它们的形状。

train shape is: torch.Size([6093, 100, 3])
train label shape is: torch.Size([6093])
val shape is: torch.Size([1965, 100, 3])
val label shape is: torch.Size([1965])
test shape is: torch.Size([1965, 100, 3])
test label shape is: torch.Size([1965])

当我尝试按照以下方式构建模型时，出现了运行时错误指向不一致的输入尺寸。

model_params = {'train_ratio': 0.8, 
                'validation_ratio': 0.2,
                'sequence_length': 100,
                'teacher_forcing': False,
                'dropout_rate': 0.2,
                'batch_size': 100,
                'num_of_epochs': 5,
                'hidden_size': 24,
                'n_features': 3,
                'learning_rate': 1e-3
               }

train_ratio = model_params['train_ratio']
val_ratio = model_params['validation_ratio']
seq_len = model_params['sequence_length']
teacher_forcing = model_params['teacher_forcing']
dropout_rate = model_params['dropout_rate']
batch_size = model_params['batch_size']
n_epochs = model_params['num_of_epochs']
hidden_size = model_params['hidden_size']
n_features = model_params['n_features']
lr = model_params['learning_rate']


model = Model(input_size=n_features, hidden_size=hidden_size, output_size=1)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
optimization = Optimization(model, loss_fn, optimizer, scheduler)

start_time = datetime.now()
optimization.train(x_train, y_train, x_val, y_val, 
                     batch_size=batch_size, 
                     n_epochs=n_epochs,
                     dropout=dropout_rate, 
                     do_teacher_forcing=teacher_forcing)

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-192-6fc406c0113d> in <module>
      6 
      7 start_time = datetime.now()
----> 8 optimization.train(x_train, y_train, x_val, y_val, 
      9                      batch_size=batch_size,
     10                      n_epochs=n_epochs,

<ipython-input-189-c18d20430910> in train(self, x_train, y_train, x_val, y_val, batch_size, n_epochs, dropout, do_teacher_forcing)
     68             train_loss = 0
     69             for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):
---> 70                 y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing)
     71                 self.optimizer.zero_grad()
     72                 loss = self.loss_fn(y_pred, y_batch)

<ipython-input-189-c18d20430910> in _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing)
     93         else:
     94             future = 0
---> 95             y_pred = self.model(x_batch)
     96         self.futures.append(future)
     97         return y_pred

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

<ipython-input-189-c18d20430910> in forward(self, input, future, y)
     17 
     18         for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
---> 19             h_t, c_t = self.lstm(input_t, (h_t, c_t))
     20             output = self.linear(h_t)
     21             outputs += [output]

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~\Anaconda3\lib\site-packages\torch\nn\modules\rnn.py in forward(self, input, hx)
    963 
    964     def forward(self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:
--> 965         self.check_forward_input(input)
    966         if hx is None:
    967             zeros = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)

~\Anaconda3\lib\site-packages\torch\nn\modules\rnn.py in check_forward_input(self, input)
    789     def check_forward_input(self, input: Tensor) -> None:
    790         if input.size(1) != self.input_size:
--> 791             raise RuntimeError(
    792                 "input has inconsistent input_size: got {}, expected {}".format(
    793                     input.size(1), self.input_size))

RuntimeError: input has inconsistent input_size: got 1, expected 3

我怀疑我的当前LSTM模型类不支持具有多个特征的数据，并且我最近一直在尝试不同的方法，但目前还没有成功。欢迎分享您的想法或指引我正确的方向来解决这个问题。

像 @stackoverflowuser2010 建议的那样，在抛出错误之前，我打印出了输入到前向步骤的张量 input_t、h_t 和 c_t 的形状。

input_t
torch.Size([100, 1, 3])
h_t
torch.Size([100, 24])
c_t
torch.Size([100, 24])

- bkaankuguoglu

1

这行代码 19 h_t, c_t = self.lstm(input_t, (h_t, c_t)) 是错误的位置。你是否打印出形状并查看它们是否与所需的匹配？ - stackoverflowuser2010

谢谢您指出！我已经在上面的问题中添加了input_t、h_t和c_t的形状。 - bkaankuguoglu

1个回答

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- bkaankuguoglu · Accepted Answer

在经历了几周的摸索后，我解决了这个问题。对我来说，这是一个富有成果的旅程，因此我想分享一下我所发现的。如果你想查看完整的代码步骤，请查看我在Medium上的帖子。

就像在Pandas中一样，当我坚持使用PyTorch时，事情往往会更快更顺利。两个库都依赖于NumPy，我相信可以使用NumPy数组和函数明确地执行几乎所有的表格和矩阵操作。然而，这样做会消除所有良好的抽象和性能改进，将每个步骤变成计算机科学练习。这很有趣，直到它不是。

与其手动调整所有的训练和验证集以将其传递给模型，PyTorch的TensorDataset和DataLoaders类极大地帮助了我。按比例缩放特征和目标集以进行训练和验证，然后我们得到NumPy数组。我们可以将这些数组转换为张量，并使用这些张量创建我们的TensorDataset或自定义数据集，具体取决于您的要求。最后，DataLoaders允许我们迭代这些数据集，比其他方法更方便，因为它们已经提供了内置的批处理、洗牌和丢弃最后一批选项。

train_features = torch.Tensor(X_train_arr)
train_targets = torch.Tensor(y_train_arr)

val_features = torch.Tensor(X_val_arr)
val_targets = torch.Tensor(y_val_arr)

train = TensorDataset(train_features, train_targets)
train_loader = DataLoader(train, batch_size=64, shuffle=False, drop_last=True)

val = TensorDataset(val_features, val_targets)
val_loader = DataLoader(val, batch_size=64, shuffle=False, drop_last=True)

将数据转换为可迭代的数据集后，可以使用它们进行小批量训练。我们无需明确定义批次或与矩阵操作搏斗，只需通过DataLoaders轻松迭代即可。

model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)

criterion = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=1e-2)

train_losses = []
val_losses = []
train_step = make_train_step(model, criterion, optimizer)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

for epoch in range(n_epochs):
    batch_losses = []
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.view([batch_size, -1, n_features]).to(device)
        y_batch = y_batch.to(device)
        loss = train_step(x_batch, y_batch)
        batch_losses.append(loss)
    training_loss = np.mean(batch_losses)
    train_losses.append(training_loss)    
    with torch.no_grad():
        batch_val_losses = []
        for x_val, y_val in val_loader:
            x_val = x_val.view([batch_size, -1, n_features]).to(device)
            y_val = y_val.to(device)        
            model.eval()
            yhat = model(x_val)
            val_loss = criterion(y_val, yhat).item()
            batch_val_losses.append(val_loss)
        validation_loss = np.mean(batch_val_losses)
        val_losses.append(validation_loss)
    
    print(f"[{epoch+1}] Training loss: {training_loss:.4f}\t Validation loss: {validation_loss:.4f}")

另一个PyTorch提供的很酷的功能是view()函数，它允许更快速和内存高效地重塑张量。由于我之前使用batch_first = True定义了我的LSTM模型，因此特征集的批处理张量必须具有(batch size, time steps, number of features)的形状。代码中的这一行x_batch = x_batch.view([batch_size, -1, n_features]).to(device)就是这样做的。

我希望这个答案能帮助那些遇到类似问题的人或者至少给出一个思路方向。我在原始帖子中分享的代码进行了很多修改，但为了简单起见，我不会在这里全部放出。欢迎查看我在其他SO帖子这里的其余部分。