使用AWS SDK在Node.js中将整个目录树上传到S3

42

我目前是这样将单个对象上传到S3的:

var options = {
        Bucket: bucket,
        Key: s3Path,
        Body: body,
        ACL: s3FilePermissions
};

S3.putObject(options,
function (err, data) {
    //console.log(data);
});

但是,当我有一个较大的资源文件夹时,例如,我会使用AWS CLI工具。我想知道是否有一种本地方法可以使用AWS SDK进行相同的操作(将整个文件夹上传到S3)。

8个回答

36

我急忙编写的老式递归方式。只使用核心的Node.js模块和标准的AWS SDK。

var AWS = require('aws-sdk');
var path = require("path");
var fs = require('fs');

const uploadDir = function(s3Path, bucketName) {

    let s3 = new AWS.S3();

    function walkSync(currentDirPath, callback) {
        fs.readdirSync(currentDirPath).forEach(function (name) {
            var filePath = path.join(currentDirPath, name);
            var stat = fs.statSync(filePath);
            if (stat.isFile()) {
                callback(filePath, stat);
            } else if (stat.isDirectory()) {
                walkSync(filePath, callback);
            }
        });
    }

    walkSync(s3Path, function(filePath, stat) {
        let bucketPath = filePath.substring(s3Path.length+1);
        let params = {Bucket: bucketName, Key: bucketPath, Body: fs.readFileSync(filePath) };
        s3.putObject(params, function(err, data) {
            if (err) {
                console.log(err)
            } else {
                console.log('Successfully uploaded '+ bucketPath +' to ' + bucketName);
            }
        });

    });
};

uploadDir("path to your folder", "your bucket name");

特别感谢Ali在这篇文章中帮助获取文件名。


6
老派永远是最好的。 - albanx
1
你如何能够管理回调? - Arsal Imam

18

async/await + TypeScript

如果您需要一种使用现代JavaScript语法且与TypeScript兼容的解决方案,我提供了以下代码。递归的getFiles是从这个答案中借鉴的(多年过去了,递归仍然让我头痛,哈哈)。

import { promises as fs, createReadStream } from 'fs';
import * as path from 'path';
import { S3 } from 'aws-sdk';

async function uploadDir(s3Path: string, bucketName: string) {
  const s3 = new S3();

  // Recursive getFiles from
  // https://dev59.com/eG025IYBdhLWcg3wvIko#45130990
  async function getFiles(dir: string): Promise<string | string[]> {
    const dirents = await fs.readdir(dir, { withFileTypes: true });
    const files = await Promise.all(
      dirents.map((dirent) => {
        const res = path.resolve(dir, dirent.name);
        return dirent.isDirectory() ? getFiles(res) : res;
      })
    );
    return Array.prototype.concat(...files);
  }

  const files = (await getFiles(s3Path)) as string[];
  const uploads = files.map((filePath) =>
    s3
      .putObject({
        Key: path.relative(s3Path, filePath),
        Bucket: bucketName,
        Body: createReadStream(filePath),
      })
      .promise()
  );
  return Promise.all(uploads);
}

await uploadDir(path.resolve('./my-path'), 'bucketname');

很好的想法。在点赞之前,我注意到所有文件都上传到根目录下,并且S3中的每个文件都包含路径(作为字符串)。这是有意为之吗? - OhadR
对于Windows,可以添加以下内容: const posixKey = key.split(path.sep).join(path.posix.sep);``` - OhadR
1
@OhadR Amazon S3是一个扁平的存储系统,实际上并不使用文件夹。更多信息请参见此回复:https://dev59.com/1VUK5IYBdhLWcg3w0Sx1#51218935 - spartanz51

9

这是@Jim解决方案的经过清理、调试和工作的版本

    function uploadArtifactsToS3() {
  const artifactFolder = `logs/${config.log}/test-results`;
  const testResultsPath = './test-results';

  const walkSync = (currentDirPath, callback) => {
    fs.readdirSync(currentDirPath).forEach((name) => {
      const filePath = path.join(currentDirPath, name);
      const stat = fs.statSync(filePath);
      if (stat.isFile()) {
        callback(filePath, stat);
      } else if (stat.isDirectory()) {
        walkSync(filePath, callback);
      }
    });
  };

  walkSync(testResultsPath, async (filePath) => {
    let bucketPath = filePath.substring(testResultsPath.length - 1);
    let params = {
      Bucket: process.env.SOURCE_BUCKET,
      Key: `${artifactFolder}/${bucketPath}`,
      Body: fs.readFileSync(filePath)
    };
    try {
      await s3.putObject(params).promise();
      console.log(`Successfully uploaded ${bucketPath} to s3 bucket`);
    } catch (error) {
      console.error(`error in uploading ${bucketPath} to s3 bucket`);
      throw new Error(`error in uploading ${bucketPath} to s3 bucket`);
    }
  });
}

小心 - 当有大量文件夹时,此代码会出错 -事件处理错误:错误:EMFILE:打开的文件太多,扫描目录 '/tmp/diaas-uiGPwkBx/standard-launcher' - Martin Bartlett

5

我前几天正在思考这个问题,想到了以下内容:

...    
var async = require('async'),
    fs = require('fs'),
    path = require("path");

var directoryName = './test',
    directoryPath = path.resolve(directoryName);

var files = fs.readdirSync(directoryPath);
async.map(files, function (f, cb) {
    var filePath = path.join(directoryPath, f);

    var options = {
        Bucket: bucket,
        Key: s3Path,
        Body: fs.readFileSync(filePath),
        ACL: s3FilePermissions
    };

    S3.putObject(options, cb);

}, function (err, results) {
    if (err) console.error(err);
    console.log(results);
});

4
fs.readFileSync(filePath) 这行代码会返回 Error: EISDIR: illegal operation on a directory, read,类似于这里的问题:https://dev59.com/PV8e5IYBdhLWcg3wS4-5。 - Barlas Apaydin
1
@BarlasApaydin 这是因为readFileSync得到的是一个目录而不是要读取的文件。 - Omar Dulaimi

5

下面是一个包含上传方法Promise的版本。这个版本允许您在全部上传完成后执行某个操作。Promise.all().then...

const path = require('path');
const fs = require('fs');
const AWS = require('aws-sdk');
const s3 = new AWS.S3();

const directoryToUpload = 'directory-name-here';
const bucketName = 'name-of-s3-bucket-here';

// get file paths
const filePaths = [];
const getFilePaths = (dir) => {
  fs.readdirSync(dir).forEach(function (name) {
    const filePath = path.join(dir, name);
    const stat = fs.statSync(filePath);
    if (stat.isFile()) {
      filePaths.push(filePath);
    } else if (stat.isDirectory()) {
      getFilePaths(filePath);
    }
  });
};
getFilePaths(directoryToUpload);

// upload to S3
const uploadToS3 = (dir, path) => {
  return new Promise((resolve, reject) => {
    const key = path.split(`${dir}/`)[1];
    const params = {
      Bucket: bucketName,
      Key: key,
      Body: fs.readFileSync(path),
    };
    s3.putObject(params, (err) => {
      if (err) {
        reject(err);
      } else {
        console.log(`uploaded ${params.Key} to ${params.Bucket}`);
        resolve(path);
      }
    });
  });
};

const uploadPromises = filePaths.map((path) =>
  uploadToS3(directoryToUpload, path)
);
Promise.all(uploadPromises)
  .then((result) => {
    console.log('uploads complete');
    console.log(result);
  })
  .catch((err) => console.error(err));

1
哈,我几年后回到这篇帖子上看看是否有人更新了干净的异步版本的答案。@ofhouse的答案看起来很合理,但这个更容易理解。感谢您抽出时间发表这篇文章! - Jim Chertkov

3
您可以尝试使用node-s3-client

更新:在npm上此处可用。

从同步目录到s3 文档

更新:添加客户端初始化代码。

var client = s3.createClient({
    maxAsyncS3: 20,     // this is the default
    s3RetryCount: 3,    // this is the default
    s3RetryDelay: 1000, // this is the default
    multipartUploadThreshold: 20971520, // this is the default (20 MB)
    multipartUploadSize: 15728640, // this is the default (15 MB)
    s3Options: {
      accessKeyId: "YOUR ACCESS KEY",
      secretAccessKey: "YOUR SECRET ACCESS KEY"
    }
  });

var params = {
  localDir: "some/local/dir",
  deleteRemoved: true, // default false, whether to remove s3 objects
                       // that have no corresponding local file.

  s3Params: {
    Bucket: "s3 bucket name",
    Prefix: "some/remote/dir/",
    // other options supported by putObject, except Body and ContentLength.
    // See: http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#putObject-property
  },
};
var uploader = client.uploadDir(params);
uploader.on('error', function(err) {
  console.error("unable to sync:", err.stack);
});
uploader.on('progress', function() {
  console.log("progress", uploader.progressAmount, uploader.progressTotal);
});
uploader.on('end', function() {
  console.log("done uploading");
});

3
无法正常工作,进度条卡住不动,没有错误提示,也没有进展。 - Hitesh Joshi
你确定客户端已经正确初始化了吗?我已经编辑了答案,包括客户端初始化。 - unboundev
1
是的。否则会显示错误。因此,没有错误,客户端已正确初始化,但同步不会发生。 - Hitesh Joshi
5
这个库目前似乎不支持上传大于1MB的文件夹中的文件。在支持之前,它并不是一个非常适合用于通用目录同步到S3的解决方案。除了这个无法接受的问题,它是一个相当不错的库。 - dsw88
2
当使用此库时,请注意该库已经多年没有更新了!存在许多未修复的错误,我在这个库上浪费了5个小时。有人重新发布了一个修复版本:https://www.npmjs.com/package/@auth0/s3 - Black-Hole
现在,上述软件包也已经多年没有更新了。 - ParkerD

1
这对我来说有效(您需要添加walkSync包):
    async function asyncForEach(array, callback) {
      for (let index = 0; index < array.length; index++) {
        await callback(array[index], index, array);
      }
    }

    const syncS3Directory = async (s3Path, endpoint) => {

    await asyncForEach(walkSync(s3Path, {directories: false}), async (file) => {
    const filePath = Path.join(s3Path, file);
    const fileContent = fs.readFileSync(filePath);
    const params = {
      Bucket: endpoint,
      Key: file,
      Body: fileContent,
      ContentType: "text/html",
    };
    let s3Upload = await s3.upload(params).promise();
    s3Upload ? undefined : Logger.error("Error synchronizing the bucket");
  });

      console.log("S3 bucket synchronized!");

};

1
const AWS = require("aws-sdk");
const fs = require("fs");
const path = require("path");
const async = require("async");
const readdir = require("recursive-readdir");

// AWS CRED
const ID = "<accessKeyId>";
const SECRET = "<secretAccessKey>";

const rootFolder = path.resolve(__dirname, "../");
const uploadFolder = "./sources";

// The name of the bucket that you have created
const BUCKET_NAME = "<Bucket_Name>";

const s3 = new AWS.S3({
  accessKeyId: ID,
  secretAccessKey: SECRET
});

function getFiles(dirPath) {
  return fs.existsSync(dirPath) ? readdir(dirPath) : [];
}

async function uploadToS3(uploadPath) {
  const filesToUpload = await getFiles(path.resolve(rootFolder, uploadPath));

  console.log(filesToUpload);
  return new Promise((resolve, reject) => {
    async.eachOfLimit(
      filesToUpload,
      10,
      async.asyncify(async file => {
        const Key = file.replace(`${rootFolder}/`, "");
        console.log(`uploading: [${Key}]`);
        return new Promise((res, rej) => {
          s3.upload(
            {
              Key,
              Bucket: BUCKET_NAME,
              Body: fs.readFileSync(file)
            },
            err => {
              if (err) {
                return rej(new Error(err));
              }
              res({ result: true });
            }
          );
        });
      }),
      err => {
        if (err) {
          return reject(new Error(err));
        }
        resolve({ result: true });
      }
    );
  });
}

uploadToS3(uploadFolder)
  .then(() => {
    console.log("upload complete!");
    process.exit(0);
  })
  .catch(err => {
    console.error(err.message);
    process.exit(1);
  });

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接