Python: os.walk中的当前目录

9

我需要在os.walk过程中获取当前的目录。当只有一个子目录时它可以正常工作,但是当有多个子目录时会失败。请给予建议...

[代码]

# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION:  This is dangerous!  For example, if top == '/', it
# could affect all your disk files.

import os, glob, arcpy, csv, sys, shutil, datetime
top = r'L:\Raster_Data\Topographic_Maps'
RootOutput = r'L:\Raster_Data\Topographic_Maps'
#FileList = csv.reader(open('FileList.csv'))
SearchString=['Temp_Pol', 'Spatial_Ex']

filecount=0
successcount=0
errorcount=0

print "Working in: "+os.getcwd()

list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run @:"+str(datetime.datetime.now())+"\n")
f.close()

#for File in FileList:
for root, dirs, files in os.walk(top, topdown=False):
  #for directory in dirs:
    for file in files:
      #currentPath=os.path.join(root,directory)
      currentPath=os.path.abspath(file)
      os.chdir(currentPath)
      #arcpy.env.workspace = currentPath
      #print os.getcwd()
      lstFCs = glob.glob('*'+SearchString[0]+'*.shp')
      #print lstFCs
      OutPutDir=os.path.abspath(currentPath)
      for fc in lstFCs:
          filecount=filecount+1
          list.append(OutPutDir+"\\"+fc)       

      lstFCs = glob.glob('*'+SearchString[1]+'*.shp')
      #print lstFCs
      for fc in lstFCs:
          OutPutDir=RootOutput+"\\"+directory
          filecount=filecount+1
          list.append(OutPutDir+"\\"+fc)

print 'Merging: ' + str(list)
#arcpy.Merge_management(list, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()

所以列表应该附加 fc 和完整路径,但实际上只得到了根路径和路径的最后一部分 - 而不是其中间的目录。
谢谢你的建议,
[错误信息]
工作目录:L:\Raster_Data\Topographic_Maps 合并: ['L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp', 'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp', 'L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp', 'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp', 'L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp', 'L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp', 'L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp', 'L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp', 'L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp', 'L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp', 'L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp', 'L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp'] 追踪(最近的调用在最后): 文件"L:\ Raster_Data \ Topographic_Maps \ CreateFileList.py",第64行, 在arcpy.Merge_management(list,RootOutput +“\ Full_Extent.shp”)
文件“C:\ Program Files \ ArcGIS \ Desktop10.0 \ arcpy \ arcpy \ management.py”中, 第3124行,合并。错误:无法执行。参数无效。 错误000732:输入数据集:数据集 'L:\ Raster_Data \ Topographic_Maps \ 100K \ 2010_100K \ Map_Sheets_BestResolution \ qld_north \ SE55 \ ecw \ prj_Temp_Polygon_Extent_0.shp; L:\ Raster_Data \ Topographic_Maps \ 100K \ 2010_100K \ Map_Sheets_BestResolution \ qld_north \ SE55 \ ecw \ Temp_Polygon_Extent_0.shp; L:\ Raster_Data \ Topographic_Maps \ ecw \ Spatial_Extent.shp; L:\ Raster_Data \ Topographic_Maps \ SC54 \ Spatial_Extent.shp; L:\ Raster_Data \ Topographic_Maps \ SC55 \ Spatial_Extent.shp; L:\ Raster_Data \ Topographic_Maps \ SD54 \ Spatial_Extent.shp; L:\ Raster_Data \ Topographic_Maps \ SD55 \

2
嗨,GeorgeC。这个问题更多地涉及一般编程,只是间接与GIS相关。我会将它迁移到Stackoverflow上。 - underdark
4个回答

7

感谢大家的帮助,我使用论坛上的意见完成了脚本。以下是脚本内容,供需要的人参考。祝好。

# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION:  This is dangerous!  For example, if top == '/', it
# could affect all your disk files.

import os, arcpy, sys, datetime
top = os.getcwd()
RootOutput = top
FileTypes=['shp']
SearchStrings=['Temp_Pol', 'Spatial_Ex']

filecount=0
#successcount=0
#errorcount=0

print "Working in: "+os.getcwd()

list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run @:"+str(datetime.datetime.now())+"\n")
f.close()

for root, dirs, files in os.walk(top, topdown=False):
    for fl in files:
      currentFile=os.path.join(root, fl)
      for FileType in FileTypes:
          status= str.endswith(currentFile,FileType)
          if str(status) == 'True':
              for SearchString in SearchStrings:
                  if str(SearchString in currentFile) == 'True':
                    #print str(currentFile)+str(status)       
                    filecount=filecount+1
                    list.append(currentFile)

print 'Merging: ' + str(list)

#替换为您想要执行的任何函数生成的文件列表。 #arcpy.Merge_management(list, RootOutput +"\ Full_Extent.shp")

print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()

2
你应该使用:

os.path.join(root, file) 

“而不是简单地像os.walk文档示例中建议的那样使用文件。os.walk
“顺便提一下,要小心保留关键字。file是内置函数,list也是。”
>>> a = list()
>>> a
[]
>>> list = []
>>> b = list()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: 'list' object is not callable

谢谢,我尝试过了,但是这只给出了根目录和文件名...例如,如果它是root>dir1>sub1>subsub1>file.shp,那么(root,file)会给出root\file.shp,(directory,file)会给出directory\file.shp,而我想要的是root\dir1\sub1\subsub1\file.shp被写入列表中。感谢您提醒保留字的问题。 - GeorgeC

2

对于某些应用程序,在 os.walk 递归调用时我们总是需要更改当前工作目录,在这种情况下,我建议按照下面所示更改当前工作目录两次。我正在写关于绝对文件路径无法帮助的情况。

from os import listdir
from os.path import isfile, join
import os
import re
# store the location of the top most directory 
top = os.getcwd()

for (dirname, dirs, files) in os.walk(os.getcwd()):        
        for filename in files:
                os.chdir(dirname)
                # add all your operations for the current job in the directory
                # Now go back to the top of the chain
                os.chdir(top)

0

看起来你需要一个递归的全局搜索。下面这段代码可能会有用:

class rglob:
    '''A recursive/regex enhanced glob
       adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm 
    '''
    def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True):
        ''' @type    directory: C{str}
            @param   directory: Path to search
            @type    pattern: C{type}
            @param   pattern: Regular expression/wildcard pattern to match files against
            @type    regex: C{boolean}
            @param   regex: Use regular expression matching (if False, use fnmatch)
                            See U{http://docs.python.org/library/re.html}
            @type    regex_flags: C{int}
            @param   regex_flags: Flags to pass to the regular expression compiler.
                                  See U{http://docs.python.org/library/re.html}
            @type    recurse: C{boolean} 
            @param   recurse: Recurse into the directory?
        '''
        self.stack = [directory]
        self.pattern = pattern
        self.regex = regex
        self.recurse = recurse
        self.regex_flags = regex_flags
        self.files = []
        self.index = 0

    def __getitem__(self, index):
        while 1:
            try:
                file = self.files[self.index]
                self.index = self.index + 1
            except IndexError:
                # pop next directory from stack

                self.directory = self.stack.pop()
                try:
                    self.files = os.listdir(self.directory)
                    self.index = 0
                except:pass
            else:
                # got a filename
                fullname = os.path.join(self.directory, file)
                if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse:
                    self.stack.append(fullname)
                if self.regex:
                    import re
                    if re.search(self.pattern,file,self.regex_flags):
                        return fullname
                else:
                    import fnmatch
                    if fnmatch.fnmatch(file, self.pattern):
                        return fullname

shplist=[shp for shp in rglob(top,'*.shp')]
print 'Merging: ' + str(shplist)
#arcpy.Merge_management(shplist, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(shplist)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接