PDA

View Full Version : [SOLVED:] Looking in subfolders instead of root folder only



Jesseke
11-20-2020, 06:00 AM
I need to count the total number of pages in a big directory with thousands of pdf files and subdirectories.I found following code on the internet but it only search in the root directory that you set in the dialog box. How can I make this code working to look in all subdirectories too?


Sub Test() Dim I As Long
Dim xRg As Range
Dim xStr As String
Dim xFd As FileDialog
Dim xFdItem As Variant
Dim xFileName As String
Dim xFileNum As Long
Dim RegExp As Object
Set xFd = Application.FileDialog(msoFileDialogFolderPicker)
If xFd.Show = -1 Then
xFdItem = xFd.SelectedItems(1) & Application.PathSeparator
'xFileName = Dir(xFdItem & "*_2020_*.pdf", vbDirectory)
xFileName = Dir(xFdItem & "*.pdf", vbDirectory)
Set xRg = Range("A1")
Range("A:B").ClearContents
Range("A1:B1").Font.Bold = True
xRg = "File Name"
xRg.Offset(0, 1) = "Pages"
I = 2
xStr = ""
Do While xFileName <> ""
Cells(I, 1) = xFileName
Set RegExp = CreateObject("VBscript.RegExp")
RegExp.Global = True
RegExp.Pattern = "/Type\s*/Page[^s]"
xFileNum = FreeFile
Open (xFdItem & xFileName) For Binary As #xFileNum
xStr = Space(LOF(xFileNum))
Get #xFileNum, , xStr
Close #xFileNum
Cells(I, 2) = RegExp.Execute(xStr).Count
I = I + 1
xFileName = Dir
Loop
Columns("A:B").AutoFit
End If
End Sub

Bob Phillips
11-20-2020, 06:52 AM
You would need to recurse the subfolders. Not tested, but it would look something like this


Dim xRg As Range
Dim RegExp As Object
Dim nextrow As Long

Sub Test()
Dim xFd As FileDialog
Dim xFdItem As String

Set RegExp = CreateObject("VBscript.RegExp")

Set xFd = Application.FileDialog(msoFileDialogFolderPicker)
If xFd.Show = -1 Then

Set xRg = Range("A1")
With Range("A1:B1")

.EntireColumn.ClearContents

.Font.Bold = True
.Value = Array("File Name", "Pages")
End With
nextrow = 2

xFdItem = xFd.SelectedItems(1) & Application.PathSeparator

SelectFiles xFdItem
Columns("A:B").AutoFit
End If
End Sub

Private Function SelectFiles(Optional Filepath As String)
Static FSO As Object
Dim mSubFolder As Object
Dim mFolder As Object
Dim mFile As Object
Dim mFiles As Object
Dim mPath As Variant
Dim xStr As String
Dim xFileNum As Long

If FSO Is Nothing Then Set FSO = CreateObject("Scripting.FilesystemObject")

mPath = Split(Filepath, Application.PathSeparator)

Set mFolder = FSO.GetFolder(Filepath)
Set mFiles = mFolder.Files
For Each mFile In mFiles

If Right$(mFile.Name, 4) = ".pdf" Then

Cells(nextrow, 1) = mFile.Name
RegExp.Global = True
RegExp.Pattern = "/Type\s*/Page[^s]"
xFileNum = FreeFile
Open (mFolder.Path & mFile.Name) For Binary As #xFileNum
xStr = Space(LOF(xFileNum))
Get #xFileNum, , xStr
Close #xFileNum
Cells(nextrow, 2) = RegExp.Execute(xStr).Count
nextrow = nextrow + 1
End If
Next mFile

For Each mSubFolder In mFolder.subfolders

SelectFiles mSubFolder.Path
Next
End Function

Jesseke
11-20-2020, 07:17 AM
code crashes here: Cells(nextrow, 1) = mFile.Name
Another pitfall: it's a directory with a collection of 20 years of pdf files. Naming of pdf files is name_YYYY_MM_affix.pdf. I want a report of total pages of all pdf files made in 2020 for example

GTO
11-21-2020, 11:52 PM
Greetings,


Another pitfall: it's a directory with a collection of 20 years of pdf files. Naming of pdf files is name_YYYY_MM_affix.pdf. I want a report of total pages of all pdf files made in 2020 for example

Presuming the name_ part is just letters/spaces followed by an underscore, and using Bob's function, maybe just add a second pattern to test against for the filenames sought.



Private Function SelectFiles(Optional Filepath As String)
Const YEAR_SOUGHT As String = "2020"
' For the name_ part, assumes just letters and space(s), alter to suite
Const PART1 As String = "^[a-zA-Z\s]+_"
Static FSO As Object
Dim mSubFolder As Object
Dim mFolder As Object
Dim mFile As Object
Dim mFiles As Object
Dim mPath As Variant
Dim xStr As String
Dim xFileNum As Long


If FSO Is Nothing Then Set FSO = CreateObject("Scripting.FilesystemObject")

'mPath = Split(Filepath, Application.PathSeparator)

Set mFolder = FSO.GetFolder(Filepath)
Set mFiles = mFolder.Files
For Each mFile In mFiles

RegExp.Pattern = PART1 & YEAR_SOUGHT & "\_[0-9]{2}_affix.pdf$"
'name_YYYY_MM_affix.pdf
If RegExp.Test(mFile.Name) Then
Cells(nextrow, 1) = mFile.Name
RegExp.Global = True
RegExp.Pattern = "/Type\s*/Page[^s]"
xFileNum = FreeFile
'Change Open (mFolder.Path & mFile.Name) ... to:
Open (mFolder.Path & "\" & mFile.Name) For Binary As #xFileNum
xStr = Space(LOF(xFileNum))
Get #xFileNum, , xStr
Close #xFileNum
Cells(nextrow, 2) = RegExp.Execute(xStr).Count
nextrow = nextrow + 1
End If
Next mFile

For Each mSubFolder In mFolder.subfolders
SelectFiles mSubFolder.Path
Next

End Function


I could not replicate your crashing at Cells(nextrow, 1) = mFile.Name. I assume you have a worksheet activated and the code in a regular module, is that correct?

Hope that helps,

Mark

macropod
11-22-2020, 02:01 AM
Try:

Sub Demo()
Application.ScreenUpdating = False
Dim StrFolder As String, StrFileList As String, StrFileName As String
Dim i As Long, j As Long, xStr As String, RegExp As Object
' Browse for the starting folder
StrFolder = GetTopFolder
If StrFolder = "" Then Exit Sub
StrFolder = StrFolder & "\*.pdf"
'Get the matching folder & sub-folder contents
StrFileList = CreateObject("wscript.shell").Exec("Cmd /c Dir """ & StrFolder & """ /B/S").StdOut.ReadAll
If UBound(Split(StrFileList, vbCrLf)) = 0 Then Exit Sub
Set RegExp = CreateObject("VBscript.RegExp")
RegExp.Global = True
RegExp.Pattern = "/Type\s*/Page[^s]"
'Output the results
For i = 0 To UBound(Split(StrFileList, vbCrLf)) - 1
StrFileName = Split(StrFileList, vbCrLf)(i)
Range("A" & i + 1).Value = StrFileName
j = FreeFile
Open StrFileName For Binary As j
xStr = Space(LOF(j))
Get j, , xStr
Close j
Range("B" & i + 1).Value = RegExp.Execute(xStr).Count
Next
Set RegExp = Nothing
Application.ScreenUpdating = True
End Sub


Function GetTopFolder() As String
Dim oFolder As Object
GetTopFolder = ""
Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
If (Not oFolder Is Nothing) Then GetTopFolder = oFolder.Items.Item.Path
Set oFolder = Nothing
End Function

snb
11-22-2020, 03:55 AM
All PDF-files in G:\ and subdirectories that contain 2020 in the filename


Sub M_snb()
c00 = "G:\*2020*.pdf"

c01 = CreateObject("wscript.shell").Exec("cmd /c Dir """ & c00 & """ /b /s").StdOut.ReadAll
If c01 <> "" Then
sn = Split(c01, vbCrLf)
With CreateObject("VBscript.RegExp")
.Global = True
.Pattern = "/Type\s*/Page[^s]"

For j = 0 To UBound(sn) - 1
Open sn(j) For Binary As 1
sn(j) = sn(j) & " | " & .Execute(Input(LOF(1), 1)).Count
Close
Next
End With
End If

Cells(1).Resize(UBound(sn) + 1) = Application.Transpose(sn)
End Sub

Jesseke
11-23-2020, 03:07 AM
Try:


All PDF-files in G:\ and subdirectories that contain 2020 in the filename
Code keeps running without any progress (progress circle is showing). Maybe the large networkdrive is the reason? :(

snb
11-23-2020, 04:09 AM
Please do not quote !
Yes, it might take some time the first time it runs. Do not worry, but only wait for the results you are looking for.
Can you post the adapted code you use ?
When the results have been shown we can make still 1 improvement in the code.

macropod
11-23-2020, 06:03 AM
Code keeps running without any progress (progress circle is showing). Maybe the large networkdrive is the reason? :(
Well, you did say you have:

a big directory with thousands of pdf files and subdirectories.
You can hardly expect instantaneous results, especially if network latency is significant.

Jesseke
11-24-2020, 07:32 AM
Thanks Macropod and snb to telling me to have patience! :bow: