使用Node.js将文件系统中的目录结构转换为JSON

73

我有这样的文件结构:

root
|_ fruits
|___ apple
|______images
|________ apple001.jpg
|________ apple002.jpg
|_ animals
|___ cat
|______images
|________ cat001.jpg
|________ cat002.jpg

我想要使用Javascript和Node.js监听这个根目录以及所有子目录,并创建一个JSON来反映此目录结构,每个节点包含类型、名称、路径和子节点:

data = [
  {
    type: "folder",
    name: "animals",
    path: "/animals",
    children: [
      {
        type: "folder",
        name: "cat",
        path: "/animals/cat",
        children: [
          {
            type: "folder",
            name: "images",
            path: "/animals/cat/images",
            children: [
              {
                type: "file",
                name: "cat001.jpg",
                path: "/animals/cat/images/cat001.jpg"
              }, {
                type: "file",
                name: "cat001.jpg",
                path: "/animals/cat/images/cat002.jpg"
              }
            ]
          }
        ]
      }
    ]
  }
];

这是一个Coffeescript JSON:

data = 
[
  type: "folder"
  name: "animals"
  path: "/animals"
  children  :
    [
      type: "folder"
      name: "cat"
      path: "/animals/cat"
      children:
        [
          type: "folder"
          name: "images"
          path: "/animals/cat/images"
          children: 
            [
              type: "file"
              name: "cat001.jpg"
              path: "/animals/cat/images/cat001.jpg"
            , 
              type: "file"
              name: "cat001.jpg"
              path: "/animals/cat/images/cat002.jpg"
            ]
        ]
    ]
]

如何在Django视图中获取这个JSON数据格式?(Python)


3
这是在获取d3.js分层数据时的常见要求。我想使用d3.js标记此问题,但Stack Overflow最多只允许5个标记。 - Sridhar Sarnobat
1
我希望这些答案中有一个可以从stdin读取路径,这样你就可以将路径列表转换为json对象,就像这样:find | paths2json。这将通过管道发挥Unix组合的全部威力。 - Sridhar Sarnobat
8个回答

75

这是一份草图。错误处理留给读者来练习。

var fs = require('fs'),
    path = require('path')

function dirTree(filename) {
    var stats = fs.lstatSync(filename),
        info = {
            path: filename,
            name: path.basename(filename)
        };

    if (stats.isDirectory()) {
        info.type = "folder";
        info.children = fs.readdirSync(filename).map(function(child) {
            return dirTree(filename + '/' + child);
        });
    } else {
        // Assuming it's a file. In real life it could be a symlink or
        // something else!
        info.type = "file";
    }

    return info;
}

if (module.parent == undefined) {
    // node dirTree.js ~/foo/bar
    var util = require('util');
    console.log(util.inspect(dirTree(process.argv[2]), false, null));
}

3
这在第一层很好用,但是孩子看起来像这样:children: [Object] ... 你看到这里有任何问题吗? - hagope
3
好的,创建的对象没问题,但是默认情况下,console.log仅打印有限深度的对象。我编辑了代码以打印完整的树形结构。 - Miikka
6
谢谢您的使用func。我认为最好使用path.join代替a + '/' + b。请改为return dirTree( path.join(filename, child)); - tuchk4
如何对输出进行排序,以便目录首先按字母顺序排列,然后是文件(同样按字母顺序)? - Peter Butcher
@peterButcher 如果它们以树形结构打印出来,你会如何对它们进行排序?但是你可以使用lodash来构造返回的对象。它只是一个普通的对象,所以像任何其他东西一样对其进行排序即可 :) - Justin
显示剩余2条评论

39

有一个适用于它的NPM模块

https://www.npmjs.com/package/directory-tree

创建一个代表目录树的对象。

来自:

photos
├── summer
│   └── june
│       └── windsurf.jpg
└── winter
    └── january
        ├── ski.png
        └── snowboard.jpg

收件人:

{
  "path": "",
  "name": "photos",
  "type": "directory",
  "children": [
    {
      "path": "summer",
      "name": "summer",
      "type": "directory",
      "children": [
        {
          "path": "summer/june",
          "name": "june",
          "type": "directory",
          "children": [
            {
              "path": "summer/june/windsurf.jpg",
              "name": "windsurf.jpg",
              "type": "file"
            }
          ]
        }
      ]
    },
    {
      "path": "winter",
      "name": "winter",
      "type": "directory",
      "children": [
        {
          "path": "winter/january",
          "name": "january",
          "type": "directory",
          "children": [
            {
              "path": "winter/january/ski.png",
              "name": "ski.png",
              "type": "file"
            },
            {
              "path": "winter/january/snowboard.jpg",
              "name": "snowboard.jpg",
              "type": "file"
            }
          ]
        }
      ]
    }
  ]
}

用法

var tree = directoryTree('/some/path');

而且您还可以按扩展名进行过滤:

var filteredTree = directoryTree('/some/path', ['.jpg', '.png']);

2
这正是我所需要的。谢谢。它运行得很好。 - WebWanderer

21

接受的答案有效,但它是同步的,并且会深度影响您的性能,特别是对于大型目录树。
我强烈建议您使用以下异步解决方案,它既更快,又不会阻塞。
基于此处的并行解决方案。

var fs = require('fs');
var path = require('path');

var diretoryTreeToObj = function(dir, done) {
    var results = [];

    fs.readdir(dir, function(err, list) {
        if (err)
            return done(err);

        var pending = list.length;

        if (!pending)
            return done(null, {name: path.basename(dir), type: 'folder', children: results});

        list.forEach(function(file) {
            file = path.resolve(dir, file);
            fs.stat(file, function(err, stat) {
                if (stat && stat.isDirectory()) {
                    diretoryTreeToObj(file, function(err, res) {
                        results.push({
                            name: path.basename(file),
                            type: 'folder',
                            children: res
                        });
                        if (!--pending)
                            done(null, results);
                    });
                }
                else {
                    results.push({
                        type: 'file',
                        name: path.basename(file)
                    });
                    if (!--pending)
                        done(null, results);
                }
            });
        });
    });
};

示例用法:

var dirTree = ('/path/to/dir');

diretoryTreeToObj(dirTree, function(err, res){
    if(err)
        console.error(err);

    console.log(JSON.stringify(res));
});

1
小问题:diretoryTreeToObj 中有一个拼写错误,我认为应该是 directoryTreeToObj,对吗? - mcnutt

3

基于Miika的解决方案,我的CS示例(使用express):

fs = require 'fs' #file system module
path = require 'path' # file path module

# returns json tree of directory structure
tree = (root) ->
    # clean trailing '/'(s)
    root = root.replace /\/+$/ , ""
    # extract tree ring if root exists
    if fs.existsSync root
        ring = fs.lstatSync root
    else
        return 'error: root does not exist'
    # type agnostic info
    info = 
        path: root
        name: path.basename(root)
    # dir   
    if ring.isDirectory()
        info.type = 'folder'
        # execute for each child and call tree recursively
        info.children = fs.readdirSync(root) .map (child) ->
            tree root + '/' + child
    # file
    else if ring.isFile()
        info.type = 'file'
    # link
    else if ring.isSymbolicLink()
        info.type = 'link'
    # other
    else
        info.type = 'unknown'
    # return tree 
    info

# error handling
handle = (e) ->
    return 'uncaught exception...'

exports.index = (req, res) ->
    try
        res.send tree './test/'
    catch e
        res.send handle e

2
这里有一个异步解决方案:
 function list(dir) {
   const walk = entry => {
     return new Promise((resolve, reject) => {
       fs.exists(entry, exists => {
         if (!exists) {
           return resolve({});
         }
         return resolve(new Promise((resolve, reject) => {
           fs.lstat(entry, (err, stats) => {
             if (err) {
               return reject(err);
             }
             if (!stats.isDirectory()) {
               return resolve({
                 // path: entry,
                 // type: 'file',
                 name: path.basename(entry),
                 time: stats.mtime,
                 size: stats.size
               });
             }
             resolve(new Promise((resolve, reject) => {
               fs.readdir(entry, (err, files) => {
                 if (err) {
                   return reject(err);
                 }
                 Promise.all(files.map(child => walk(path.join(entry, child)))).then(children => {
                   resolve({
                     // path: entry,
                     // type: 'folder',
                     name: path.basename(entry),
                     time: stats.mtime,
                     entries: children
                   });
                 }).catch(err => {
                   reject(err);
                 });
               });
             }));
           });
         }));
       });
     });
   }

   return walk(dir);
 }

请注意,当目录不存在时,返回的是空结果而不是抛出错误。 以下是一个示例结果:
{
    "name": "root",
    "time": "2017-05-09T07:46:26.740Z",
    "entries": [
        {
            "name": "book.txt",
            "time": "2017-05-09T07:24:18.673Z",
            "size": 0
        },
        {
            "name": "cheatsheet-a5.pdf",
            "time": "2017-05-09T07:24:18.674Z",
            "size": 262380
        },
        {
            "name": "docs",
            "time": "2017-05-09T07:47:39.507Z",
            "entries": [
                {
                    "name": "README.md",
                    "time": "2017-05-08T10:02:09.651Z",
                    "size": 19229
                }
            ]
        }
    ]
}

这将是:

root
|__ book.txt
|__ cheatsheet-a5.pdf
|__ docs
      |__ README.md

1

您可以使用此项目中的代码,但应根据您的需求进行调整:

https://github.com/NHQ/Node-FileUtils/blob/master/src/file-utils.js#L511-L593

发件人:

a
|- b
|  |- c
|  |  |- c1.txt
|  |
|  |- b1.txt
|  |- b2.txt
|
|- d
|  |
|
|- a1.txt
|- a2.txt

To:

{
    b: {
        "b1.txt": "a/b/b1.txt",
        "b2.txt": "a/b/b2.txt",
        c: {
            "c1.txt": "a/b/c/c1.txt"
        }
    },
    d: {},
    "a2.txt": "a/a2.txt",
    "a1.txt": "a/a1.txt"
}

工作中:

new File ("a").list (function (error, files){
    //files...
});

我从Github上删除了这个项目。这个链接是一个分支。 - Gabriel Llamas
@GabrielLlamas 为什么这个项目从Github上被删除了?在我看来,这是一个非常有用的项目。 - Izhaki

0
在这种情况下,我使用了“walk”库,它获取您的根路径并递归地遍历文件和目录,并发出带有节点所需所有信息的目录/文件事件, 请查看该实现-->
const walk = require('walk');

class FsTree {

    constructor(){

    }

    /**
     * @param rootPath
     * @returns {Promise}
     */
    getFileSysTree(rootPath){
        return new Promise((resolve, reject)=>{

            const root = rootPath || __dirname; // if there's no rootPath use exec location
            const tree = [];
            const nodesMap = {};
            const walker  = walk.walk(root, { followLinks: false}); // filter doesn't work well

            function addNode(node, path){
                if ( node.name.indexOf('.') === 0 || path.indexOf('/.') >= 0){ // ignore hidden files
                    return;
                }
                var relativePath = path.replace(root,'');

                node.path = relativePath + '/' + node.name;
                nodesMap[node.path] = node;

                if ( relativePath.length === 0 ){ //is root
                    tree.push(node);
                    return;
                }
                node.parentPath = node.path.substring(0,node.path.lastIndexOf('/'));
                const parent = nodesMap[node.parentPath];
                parent.children.push(node);

            }

            walker.on('directory', (path, stats, next)=>{
                addNode({ name: stats.name, type:'dir',children:[]}, path);
                next();
            });

            walker.on('file', (path,stats,next)=>{
                addNode({name:stats.name, type:'file'},path);
                next();
            });

            walker.on('end',()=>{
                resolve(tree);
            });

            walker.on('errors',  (root, nodeStatsArray, next) => {
                reject(nodeStatsArray);
                next();
            });
        });

    }
}


const fsTreeFetcher = new FsTree();

fsTreeFetcher.getFileSysTree(__dirname).then((result)=>{
    console.log(result);
});

0

Sean C.的回答上补充一点。

我非常喜欢它,但使用async await可以使代码更易读。

import fs from 'fs';
import {
  lstat,
  readdir,
  access,
} from 'fs/promises';
import path from 'path';

async function existsAsync(file) {
  try {
    await access(file, fs.constants.F_OK);
    return true;
  } catch (e) {
    return false;
  }
}

async function listFileTreeRecursive(dir) {
  const recurse = async (entry) => {
    if (!(await existsAsync(entry))) {
      return {};
    }

    const stats = await lstat(entry);
    if (!stats.isDirectory()) {
      return {
        name: path.basename(entry),
        time: stats.mtime,
        size: stats.size,
      };
    }

    const files = await readdir(entry);
    const childEntries = await Promise.all(
      files.map((child) => recurse(path.join(entry, child))),
    );
    return {
      name: path.basename(entry),
      time: stats.mtime,
      entries: childEntries,
    };
  };

  return recurse(dir);
}

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接