希望创建一个函数,返回目录及其子目录中找到的文件数量。只需要帮助开始
一句话简述
import os
cpt = sum([len(files) for r, d, files in os.walk("G:\CS\PYTHONPROJECTS")])
使用 os.walk
。它会为您执行递归操作。请查看http://www.pythonforbeginners.com/code-snippets-source-code/python-os-walk/以获取示例。
total = 0
for root, dirs, files in os.walk(folder):
total += len(files)
elif
语句来处理目录即可:def fileCount(folder):
"count the number of files in a directory"
count = 0
for filename in os.listdir(folder):
path = os.path.join(folder, filename)
if os.path.isfile(path):
count += 1
elif os.path.isfolder(path):
count += fileCount(path)
return count
os.path.isdir
对我有效,而不是os.path.isfolder
。 - Jeppe这里是三种最流行方法的时间测试:
import os
from datetime import datetime
dir_path = "D:\\Photos"
# os.listdir
def recursive_call(dir_path):
folder_array = os.listdir(dir_path)
files = 0
folders = 0
for path in folder_array:
if os.path.isfile(os.path.join(dir_path, path)):
files += 1
elif os.path.isdir(os.path.join(dir_path, path)):
folders += 1
file_count, folder_count = recursive_call(os.path.join(dir_path, path))
files += file_count
folders += folder_count
return files, folders
start_time = datetime.now()
files, folders = recursive_call(dir_path)
print ("\nFolders: %d, Files: %d" % (folders, files))
print ("Time Taken (os.listdir): %s seconds" % (datetime.now() - start_time).total_seconds())
# os.walk
start_time = datetime.now()
file_array = [len(files) for r, d, files in os.walk(dir_path)]
files = sum(file_array)
folders = len(file_array)
print ("\nFolders: %d, Files: %d" % (folders, files))
print ("Time Taken (os.walk): %s seconds" % (datetime.now() - start_time).total_seconds())
# os.scandir
def recursive_call(dir_path):
folder_array = os.scandir(dir_path)
files = 0
folders = 0
for path in folder_array:
if path.is_file():
files += 1
elif path.is_dir():
folders += 1
file_count, folder_count = recursive_call(path)
files += file_count
folders += folder_count
return files, folders
start_time = datetime.now()
files, folders = recursive_call(dir_path)
print ("\nFolders: %d, Files: %d" % (folders, files))
print ("Time Taken (os.scandir): %s seconds" % (datetime.now() - start_time).total_seconds())
结果:
Folders: 53, Files: 29048
Time Taken (os.listdir): 3.074945 seconds
Folders: 53, Files: 29048
Time Taken (os.walk): 0.062022 seconds
Folders: 53, Files: 29048
Time Taken (os.scandir): 0.048984 seconds
os.walk
是最优雅的,但递归实现的 os.scandir
似乎是最快的。Path.cwd().rglob('*')
或 Path('some path').rglob('*')
,可以创建一个所有文件的生成器。
通过list
或*
来展开生成器,并使用len
获取文件数。
查看 如何计算每个子目录中文件的总数 来获取每个目录的文件总数。from pathlib import Path
total_dir_files = len(list(Path.cwd().rglob('*')))
# or
total_dir_files = len([*Path.cwd().rglob('*')])
# or filter for only files using is_file()
file_count = len([f for f in Path.cwd().rglob('*') if f.is_file()])
这是我的版本
def fileCount(folder, allowed_extensions=None):
"count the number of files in a directory and sub directory"
count = 0
for base, dirs, files in os.walk(folder):
for file in files:
if allowed_extensions and file.endswith(allowed_extensions) or not allowed_extensions:
count += 1
return count
scan_dir = r"C:\Users\sannjayy\Desktop"
allowed_extensions = (".jpg", ".mp4")
print(fileCount(scan_dir , allowed_extensions))
扫描目录 = r"C:\Users\sannjayy\Desktop"
允许的扩展名 = (".jpg", ".mp4")
print(fileCount(扫描目录, 允许的扩展名))