用 Python 得出資料夾項下的各子資料夾

4/17/2021 Python

假設現在我們有一個需求,需要取得 C:\AAA\BBB\CCC 底下的各資料夾路徑(並排除檔案),而這個路徑底下可能有好幾層,如: C:\AAA\BBB\CCC\111\222\333\444
而我們只須取到資料夾下的第三層就好,也就是 C:\AAA\BBB\CCC\111\222\333,該怎麼做呢?

# 取得目標資料夾底下前三層的資料夾

只須 import os 並使用 os.walk 就可遍歷(iterate)資料夾下的所有檔案及資料夾

import os
import re

readPath = r'C:\AAA\BBB\CCC'

# 運用 for in 的這三個參數可分別得到:
# folder Path (資料夾完整路徑), folder name List (資料夾名稱,用 list 型態儲存), file name (檔案名稱,用 list 型態儲存)
for folderPath, folderNameList, fileName in os.walk(readPath):
    
    # 因為我們只要取得資料夾就好,因此這邊只要拿 folder path
    # 但題目有限制只需取得目標資料夾的底下三層就好,因此我們建立一個方法 def removePath 來去除第三層以下的資料夾
    newPath = removePath(folderPath)

# 這裡將方法放在程式下方是為了講解,在執行程式時 def 須放在程式上方
# 運用 re.finditer 取出每個 folder path 的 \ 在字串的第幾位(index)(要用 index.start 才可得到 index)
# eg. C:\AAA\BBB\CCC 的 \ 就分別在 2, 6, 10
def removePath(folderPath):
    
    # 用 list 將 \ 的 index 儲存起來
    indexList = []
    for index in re.finditer(r'\\', folderPath):
        indexList.append(index.start())
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

# 去除資料夾路徑的前段路徑

在上面那段,我們已可成功取得目標資料夾(C:\AAA\BBB\CCC)項下的前三層路徑資料夾 C:\AAA\BBB\CCC\111\222\333
現在要將這個路徑去掉目標資料夾的路徑,讓它變成 111\222\333

# 從原始路徑:C:\AAA\BBB\CCC\111\222\333\444\555
# 更新為新路徑:111\222\333

# 用 len(indexList) 來判斷該資料夾共有幾層(含目標資料夾 C:\AAA\BBB\CCC )
# 並考慮到每個資料夾所在階層不一樣,而分別截取我們所需要的那段路徑
# 一樣用 indexList 裡面的 / index 來定位字串再做擷取
if len(indexList) == 5:
     folderPath = folderPath[indexList[4] + 1 :]
     return folderPath
                
elif len(indexList) == 6:
    folderPath = folderPath[indexList[3] + 1 : ]
    return folderPath

elif len(indexList) >= 7:
    folderPath = folderPath[indexList[3]+1 : indexList[6]]
    return folderPath
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

# 將結果寫入 Excel

# 建立 Excel 檔案(檔名為 Folder Name),並新增一個分頁(分頁名稱為 sheetName)
workbook = xlsxwriter.Workbook('Folder Name.xlsx')
worksheet = workbook.add_worksheet('sheetName')

# 建立一個 list 來儲存已寫入 excel 中的資料夾路徑
# 因為去頭去尾後,路徑可能會有重複的
newPathList = []

for folderPath, folderNameList, fileName in os.walk(readPath):
    newPath = removePath(folderPath)

    # 如果 新得出 的資料夾路徑 不是空的(去頭去尾後,可能會有空的) 以及 目前該路徑還沒被寫入(newPathList)
    if newPath and newPath not in newPathList:
        # 就寫入該路徑
        worksheet.write(row, col, newPath)
        # 寫完後跳下一行,避免重複寫在同一儲存格,後者覆蓋前者
        row += 1
        # 寫完後,將該路徑存至 newPathList,用來檢查是否已重複寫入一樣的路徑
        newPathList.append(newPath)

# 最後關閉 Excel
workbook.close()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

# 成果

最後下面這個 Source Code 則是可以藉由讓 User 輸入 1 或 2 ,來讀取二個不同路徑的資料夾(且二個資料夾項下的階層也不同)

# Source Code

import os
import xlsxwriter
import re

# Excel Settings 
# Start from the first cell. Rows and columns are zero indexed.
row = 0
col = 0

# Decide path and sheet name
def getPath(userInput, sheetName):
    if userInput == '1':
        #readPath = r'R:\00.Shared\Vendor Management_Secretarial\1.Active Vendor'
        readPath = r'C:\AAA\BBB\CCC'
        return readPath, 'ABC'
        

    elif userInput == '2':
        readPath = r'C:\XXX\YYY\ZZZ'
        return readPath, 'XYZ'

def removePath(sheetName, folderPath):
    # Store index \ to a list
    indexList = []
    for index in re.finditer(r'\\', folderPath):
        # Get every \ index
        indexList.append(index.start())

    # For Folder Active Update folder path
    # From path R:\00.Shared\Vendor Management_Secretarial\1.Active Vendor\Critical Vendor\Oracle\20200831 Supporting Document\13. Oracle Initial Screening\highlight
    # To Oracle\20200831 Supporting Document

    if sheetName == 'ABC':
        if len(indexList) == 5:
            folderPath = folderPath[indexList[4] + 1 :]
            return folderPath
                
        elif len(indexList) == 6:
            folderPath = folderPath[indexList[3] + 1 : ]
            return folderPath

        elif len(indexList) >= 7:
            folderPath = folderPath[indexList[3]+1 : indexList[6]]
            return folderPath
    
    # For Folder Inactive Update folder path
    elif sheetName == 'XYZ':                
        if len(indexList) == 4 or len(indexList) == 5:
            folderPath = folderPath[indexList[3] + 1 : ]
            return folderPath

        elif len(indexList) >= 6:
            folderPath = folderPath[indexList[3] + 1 : indexList[5]]
            return folderPath

userInput = input("To run the program for ABC press 1; Inactive XYZ 2 : ")
sheetName = 'Sheet1'



try:
    # Get params from function getPath
    getPathTuple = getPath(userInput, sheetName)

    readPath = getPathTuple[0]
    sheetName = getPathTuple[1]

    # Create a workbook and add a worksheet.
    workbook = xlsxwriter.Workbook('Folder Name.xlsx')
    worksheet = workbook.add_worksheet(sheetName)

    # Store the data be writen to Excel
    newPathList = []

    for folderPath, folderNameList, fileName in os.walk(readPath):
        newPath = removePath(sheetName, folderPath)
        
        # if new path is not empty
        if newPath and newPath not in newPathList:
            worksheet.write(row, col, newPath)
            row += 1
            newPathList.append(newPath)
                

    workbook.close()

except Exception as e:
    print('You typed the wrong number')
    print('Exception : ', e)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
Last Updated: 6/30/2023, 2:59:11 PM

歡迎點擊追蹤:

(adsbygoogle = window.adsbygoogle || []).push({});