Nodejs脚本无法读取GDOC文件扩展名。

Question

Nodejs脚本无法读取GDOC文件扩展名。

3

我正在使用Google Drive for Developers Drive API (V3) Nodejs快速入门。

特别是我关注以下函数。我已将pageSize定制为1进行测试，并调用了我的function read(file.name);

    /**
 * Lists the names and IDs of up to 10 files.
 * @param {google.auth.OAuth2} auth An authorized OAuth2 client.
 */
function listFiles(auth) {
  const drive = google.drive({version: 'v3', auth});
  drive.files.list({
    pageSize: 1,   // only find the last modified file in dev folder
    fields: 'nextPageToken, files(id, name)',
  }, (err, res) => {
    if (err) return console.log('The API returned an error: ' + err);
    const files = res.data.files;
    if (files.length) {
      console.log('Files:');
      files.map((file) => {
        console.log(`${file.name} (${file.id})`);
        read(file.name);   // my function here  
      });
    } else {
      console.log('No files found.');
    }
  });
}

// custom code - function to read and output file contents 
function read(fileName) {
  const readableStream = fs.createReadStream(fileName, 'utf8');

  readableStream.on('error', function (error) {
      console.log(`error: ${error.message}`);
  })

  readableStream.on('data', (chunk) => {
      console.log(chunk);
  })
}

这段代码从已同步的Google Drive文件夹中读取文件。我在开发中使用本地文件夹。我发现pageSize: 1 参数会返回此本地文件夹中最后修改的文件。因此我的流程如下：

编辑 .js 代码文件
对测试文件进行微小编辑 (先是 txt 文件，然后是 gdoc 文件)，以确保它是最后被修改的
运行代码

我正在测试一个文本文件和一个 GDOC 文件。它们的文件名分别为 atest.txt 和 31832_226114__0001-00028.gdoc。输出结果如下：

    PS C:\Users\david\Google Drive\Technical-local\gDriveDev> node . gdocToTextDownload.js
Files:
atest.txt (1bm1E4s4ET6HVTrJUj4TmNGaxqJJRcnCC)
atest.txt this is a test file!!


PS C:\Users\david\Google Drive\Technical-local\gDriveDev> node . gdocToTextDownload.js
Files:
31832_226114__0001-00028 (1oi_hE0TTfsKG9lr8Wl7ahGNvMvXJoFj70LssGNFFjOg)
error: ENOENT: no such file or directory, open 'C:\Users\david\Google Drive\Technical-local\gDriveDev\31832_226114__0001-00028'

我的问题是：为什么脚本读取文本文件而不是gdoc文件？

此时我必须在函数调用中将gdoc文件扩展名硬编码到文件名中，以生成所需的输出，就像文本文件示例一样。

read('31832_226114__0001-00028.gdoc');

这显然不是我想做的事情。

我打算编写一个脚本，用于下载大量由.jpg文件创建的gdocs文档。

------------------------- 以下是已完成的代码 ------------------------

/**
 * Lists the names and IDs of pageSize number of files (using query to define folder of files)
 * @param {google.auth.OAuth2} auth An authorized OAuth2 client.
 */
 function listFiles(auth) {
  const drive = google.drive({version: 'v3', auth});
 
 
  drive.files.list({
    corpora: 'user',  
    pageSize: 100,
    // files in a parent folder that have not been trashed 
    // get ID from Drive > Folder by looking at the URL after /folders/ 
    q: `'11Sejh6XG-2WzycpcC-MaEmDQJc78LCFg' in parents and trashed=false`,    
    fields: 'nextPageToken, files(id, name)',
  }, (err, res) => {
    if (err) return console.log('The API returned an error: ' + err);
    const files = res.data.files;
    if (files.length) {

      var ids = [ ];
      var names = [ ];
      files.forEach(function(file, i) {
        ids.push(file.id);
        names.push(file.name);
      });

      ids.forEach((fileId, i) => {
              fileName = names[i];

      downloadFile(drive, fileId, fileName);
      });

    } 
    else 
    {
      console.log('No files found.');
    }
  });
}

/**
 * @param {google.auth.OAuth2} auth An authorized OAuth2 client.
 */ 

function downloadFile(drive, fileId, fileName) {
 
 // make sure you have valid path & permissions. Use UNIX filepath notation.
  
    const filePath = `/test/test1/${fileName}`;

  const dest = fs.createWriteStream(filePath);
  let progress = 0;

  drive.files.export(
    { fileId, mimeType: 'text/plain' },
    { responseType: 'stream' }
  ).then(res => {
    res.data
      .on('end', () => {
        console.log('  Done downloading');

      })  
      .on('error', err => {
        console.error('Error downloading file.');
      })  
      .on('data', d => {
        progress += d.length;
        if (process.stdout.isTTY) {
          process.stdout.clearLine();
          process.stdout.cursorTo(0);
          process.stdout.write(`Downloading ${fileName} ${progress} bytes`);
        }   
      })  
      .pipe(dest);
  }); 
}

- Dave

1

在文件响应中，检查 fullFileExtension 和 fileExtension 是否存在？请参见此处：https://developers.google.com/drive/api/v3/reference/files#resource 您可以手动添加它，例如，检测扩展名是否缺失（最后一个 . 匹配），将文件名与 .gdoc 连接起来。 - traynor

谢谢@traynor，好观点。但我认为，也许我在尝试获取成功下载方面走错了方向。我在https://developers.google.com/drive/api/v3/manage-downloads示例中遇到了麻烦。也就是说，对于代码而言，它对我不起作用。在做出更改后，我能得到的最好结果是403错误。通过这篇文章，我认为我被“岔路”所困扰，走了“绕路”的路线。我想我只是在尝试理解Google环境。 - Dave

2个回答

0

我认为答案是，上面的nodejs脚本运行在Windows上，因此必须遵守通过DOS/NT开发Windows所继承的本机操作系统/文件系统。另一方面，gdoc扩展名是由Google Drive同步桌面客户端创建的引用。这里有一个重要的区别。gdoc扩展名引用了存储在Google Drive上的文件（引用位于硬盘驱动器上的同步文件夹中，文件位于云上的Google Drive上），因此它不是通常意义上的扩展名。通常意义上的扩展名是指本地应用程序使用的有效访问/读取/写入文件类型。因此，我的测试函数function read(fileName)将无法像.txt扩展名那样读取.gdoc。

因此，从本地应用程序访问Google Drive上的文件的正确方法是使用文件的ID。文件名只是一种方便的方式，用于标记ID，以便用户可以有意义地比较下载副本与Google Drive上的原始文件。

（参考原问题）使用下面已经完成的代码，我已经添加了这两个函数到谷歌的Nodejs快速入门中，替换了function listFiles(auth)并添加了function downloadFile(drive, fileId, fileName)

整个脚本文件已被用于下载多个文件（一次超过50个）到我的硬盘。这是OCR设置中有用的代码片段，它将历史选民名册的.JPG图像转换为可读文本的gscript。这些gdocs很混乱（仍然包含原始图像和各种格式的彩色字体），通过上述脚本以文本文件形式下载可以清理它们。当然，图像从文本文件中删除，字体标准化为只有大/小写文本。因此，它不仅是一个下载器，还是一个过滤器。

我希望这对某人有所帮助。

- Dave

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- Ruben Restrepo · Accepted Answer

我的问题是：为什么脚本可以读取文本文件但无法读取gdoc文件？

这是因为您正在尝试下载Google Workspace文档，只有具有二进制内容的文件才能使用drive.files.get方法进行下载。对于Google Workspace文档，您需要使用drive.files.exports（如此处所述）。

从您的代码中，我看到您只列出了文件，您需要确定要下载的文件类型，您可以使用mimeType字段来检查是否需要使用exports方法与get方法，例如，Google Doc的mime类型是application/vnd.google-apps.document，而docx文件（二进制）则为application/vnd.openxmlformats-officedocument.wordprocessingml.document。

请参考以下工作示例：

从Google Drive下载文件	`在Fusebit中运行`

const fs = require("fs");

const getFile = async (drive, fileId, name) => {
    const res = await drive.files.get({ fileId, alt: "media" }, { responseType: "stream" });

    return new Promise((resolve, reject) => {
        const filePath = `/tmp/${name}`;
        console.log(`writing to ${filePath}`);
        const dest = fs.createWriteStream(filePath);
        let progress = 0;
        res.data
            .on("end", () => {
                console.log(" Done downloading file.");
                resolve(filePath);
            })
            .on("error", (err) => {
                console.error(" Error downloading file.");
                reject(err);
            })
            .on("data", (d) => {
                progress += d.length;
                console.log(` Downloaded ${progress} bytes`);
            })
            .pipe(dest);
    });
};

const fileKind = "drive#file";
let filesCounter = 0;
const drive = googleClient.drive({ version: "v3" });
const files = await drive.files.list();

// Only files with binary content can be downloaded. Use Export with Docs Editors files
// Read more at https://developers.google.com/drive/api/v3/reference/files/get
// In this example, any docx folder will be downloaded in a temp folder.
const onlyFiles = files.data.files.filter(
    (file) =>
        file.kind === fileKind &&
        file.mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
);
const numberOfFilesToDownload = onlyFiles.length;
console.log(` About to download ${numberOfFilesToDownload} files`);
for await (const file of onlyFiles) {
    filesCounter++;
    console.log(` Downloading file ${file.name}, ${filesCounter} of ${numberOfFilesToDownload}`);
    await getFile(drive, file.id, file.name);
}