Consulting

Results 1 to 10 of 10

Thread: Looking in subfolders instead of root folder only

  1. #1
    VBAX Newbie
    Joined
    Jun 2020
    Posts
    5
    Location

    Looking in subfolders instead of root folder only

    I need to count the total number of pages in a big directory with thousands of pdf files and subdirectories.I found following code on the internet but it only search in the root directory that you set in the dialog box. How can I make this code working to look in all subdirectories too?

    Sub Test()    Dim I As Long
        Dim xRg As Range
        Dim xStr As String
        Dim xFd As FileDialog
        Dim xFdItem As Variant
        Dim xFileName As String
        Dim xFileNum As Long
        Dim RegExp As Object
        Set xFd = Application.FileDialog(msoFileDialogFolderPicker)
        If xFd.Show = -1 Then
            xFdItem = xFd.SelectedItems(1) & Application.PathSeparator
            'xFileName = Dir(xFdItem & "*_2020_*.pdf", vbDirectory)
            xFileName = Dir(xFdItem & "*.pdf", vbDirectory)
            Set xRg = Range("A1")
            Range("A:B").ClearContents
            Range("A1:B1").Font.Bold = True
            xRg = "File Name"
            xRg.Offset(0, 1) = "Pages"
            I = 2
            xStr = ""
            Do While xFileName <> ""
                Cells(I, 1) = xFileName
                Set RegExp = CreateObject("VBscript.RegExp")
                RegExp.Global = True
                RegExp.Pattern = "/Type\s*/Page[^s]"
                xFileNum = FreeFile
                Open (xFdItem & xFileName) For Binary As #xFileNum
                    xStr = Space(LOF(xFileNum))
                    Get #xFileNum, , xStr
                Close #xFileNum
                Cells(I, 2) = RegExp.Execute(xStr).Count
                I = I + 1
                xFileName = Dir
            Loop
            Columns("A:B").AutoFit
        End If
    End Sub

  2. #2
    Distinguished Lord of VBAX VBAX Grand Master Bob Phillips's Avatar
    Joined
    Apr 2005
    Posts
    25,453
    Location
    You would need to recurse the subfolders. Not tested, but it would look something like this

    Dim xRg As Range
    Dim RegExp As Object
    Dim nextrow As Long
    
    Sub Test()
    Dim xFd As FileDialog
    Dim xFdItem As String
        
        Set RegExp = CreateObject("VBscript.RegExp")
    
        Set xFd = Application.FileDialog(msoFileDialogFolderPicker)
        If xFd.Show = -1 Then
        
            Set xRg = Range("A1")
            With Range("A1:B1")
            
                .EntireColumn.ClearContents
            
                .Font.Bold = True
                .Value = Array("File Name", "Pages")
            End With
            nextrow = 2
        
            xFdItem = xFd.SelectedItems(1) & Application.PathSeparator
            
            SelectFiles xFdItem
            Columns("A:B").AutoFit
        End If
    End Sub
    
    Private Function SelectFiles(Optional Filepath As String)
    Static FSO As Object
    Dim mSubFolder As Object
    Dim mFolder As Object
    Dim mFile As Object
    Dim mFiles As Object
    Dim mPath As Variant
    Dim xStr As String
    Dim xFileNum As Long
    
        If FSO Is Nothing Then Set FSO = CreateObject("Scripting.FilesystemObject")
        
        mPath = Split(Filepath, Application.PathSeparator)
    
        Set mFolder = FSO.GetFolder(Filepath)
        Set mFiles = mFolder.Files
        For Each mFile In mFiles
            
            If Right$(mFile.Name, 4) = ".pdf" Then
            
                Cells(nextrow, 1) = mFile.Name
                RegExp.Global = True
                RegExp.Pattern = "/Type\s*/Page[^s]"
                xFileNum = FreeFile
                Open (mFolder.Path & mFile.Name) For Binary As #xFileNum
                    xStr = Space(LOF(xFileNum))
                    Get #xFileNum, , xStr
                Close #xFileNum
                Cells(nextrow, 2) = RegExp.Execute(xStr).Count
                nextrow = nextrow + 1
            End If
        Next mFile
        
        For Each mSubFolder In mFolder.subfolders
        
            SelectFiles mSubFolder.Path
        Next
    End Function
    ____________________________________________
    Nihil simul inventum est et perfectum

    Abusus non tollit usum

    Last night I dreamed of a small consolation enjoyed only by the blind: Nobody knows the trouble I've not seen!
    James Thurber

  3. #3
    VBAX Newbie
    Joined
    Jun 2020
    Posts
    5
    Location
    code crashes here: Cells(nextrow, 1) = mFile.Name
    Another pitfall: it's a directory with a collection of 20 years of pdf files. Naming of pdf files is name_YYYY_MM_affix.pdf. I want a report of total pages of all pdf files made in 2020 for example
    Last edited by Bob Phillips; 11-24-2020 at 12:40 PM. Reason: Removed redundant quote

  4. #4
    Knowledge Base Approver VBAX Guru GTO's Avatar
    Joined
    Sep 2008
    Posts
    3,368
    Location
    Greetings,

    Quote Originally Posted by Jesseke View Post
    Another pitfall: it's a directory with a collection of 20 years of pdf files. Naming of pdf files is name_YYYY_MM_affix.pdf. I want a report of total pages of all pdf files made in 2020 for example
    Presuming the name_ part is just letters/spaces followed by an underscore, and using Bob's function, maybe just add a second pattern to test against for the filenames sought.

    Private Function SelectFiles(Optional Filepath As String)
    Const YEAR_SOUGHT As String = "2020"
    ' For the name_ part, assumes just letters and space(s), alter to suite
    Const PART1 As String = "^[a-zA-Z\s]+_"
    Static FSO As Object
    Dim mSubFolder As Object
    Dim mFolder As Object
    Dim mFile As Object
    Dim mFiles As Object
    Dim mPath As Variant
    Dim xStr As String
    Dim xFileNum As Long
    
    
        If FSO Is Nothing Then Set FSO = CreateObject("Scripting.FilesystemObject")
        
        'mPath = Split(Filepath, Application.PathSeparator)
    
        Set mFolder = FSO.GetFolder(Filepath)
        Set mFiles = mFolder.Files
        For Each mFile In mFiles
            
            RegExp.Pattern = PART1 & YEAR_SOUGHT & "\_[0-9]{2}_affix.pdf$"
            'name_YYYY_MM_affix.pdf
            If RegExp.Test(mFile.Name) Then
                Cells(nextrow, 1) = mFile.Name
                RegExp.Global = True
                RegExp.Pattern = "/Type\s*/Page[^s]"
                xFileNum = FreeFile
                'Change Open (mFolder.Path & mFile.Name) ... to:
                Open (mFolder.Path & "\" & mFile.Name) For Binary As #xFileNum
                    xStr = Space(LOF(xFileNum))
                    Get #xFileNum, , xStr
                Close #xFileNum
                Cells(nextrow, 2) = RegExp.Execute(xStr).Count
                nextrow = nextrow + 1
            End If
        Next mFile
        
        For Each mSubFolder In mFolder.subfolders
            SelectFiles mSubFolder.Path
        Next
        
    End Function
    I could not replicate your crashing at Cells(nextrow, 1) = mFile.Name. I assume you have a worksheet activated and the code in a regular module, is that correct?

    Hope that helps,

    Mark
    Last edited by GTO; 11-22-2020 at 12:03 AM. Reason: It's been too long and I goofed...

  5. #5
    Knowledge Base Approver VBAX Guru macropod's Avatar
    Joined
    Jul 2008
    Posts
    4,435
    Location
    Try:
    Sub Demo()
    Application.ScreenUpdating = False
    Dim StrFolder As String, StrFileList As String, StrFileName As String
    Dim i As Long, j As Long, xStr As String, RegExp As Object
    ' Browse for the starting folder
    StrFolder = GetTopFolder
    If StrFolder = "" Then Exit Sub
    StrFolder = StrFolder & "\*.pdf"
    'Get the matching folder & sub-folder contents
    StrFileList = CreateObject("wscript.shell").Exec("Cmd /c Dir """ & StrFolder & """ /B/S").StdOut.ReadAll
    If UBound(Split(StrFileList, vbCrLf)) = 0 Then Exit Sub
    Set RegExp = CreateObject("VBscript.RegExp")
    RegExp.Global = True
    RegExp.Pattern = "/Type\s*/Page[^s]"
    'Output the results
    For i = 0 To UBound(Split(StrFileList, vbCrLf)) - 1
      StrFileName = Split(StrFileList, vbCrLf)(i)
      Range("A" & i + 1).Value = StrFileName
      j = FreeFile
      Open StrFileName For Binary As j
      xStr = Space(LOF(j))
      Get j, , xStr
      Close j
      Range("B" & i + 1).Value = RegExp.Execute(xStr).Count
    Next
    Set RegExp = Nothing
    Application.ScreenUpdating = True
    End Sub
    
    
    Function GetTopFolder() As String
    Dim oFolder As Object
    GetTopFolder = ""
    Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
    If (Not oFolder Is Nothing) Then GetTopFolder = oFolder.Items.Item.Path
    Set oFolder = Nothing
    End Function
    Last edited by macropod; 11-22-2020 at 01:42 PM.
    Cheers
    Paul Edstein
    [Fmr MS MVP - Word]

  6. #6
    Knowledge Base Approver VBAX Wizard
    Joined
    Apr 2012
    Posts
    5,642
    All PDF-files in G:\ and subdirectories that contain 2020 in the filename

    Sub M_snb()
      c00 = "G:\*2020*.pdf"
    
      c01 = CreateObject("wscript.shell").Exec("cmd /c Dir """ & c00 & """ /b /s").StdOut.ReadAll
      If c01 <> "" Then
        sn = Split(c01, vbCrLf)
        With CreateObject("VBscript.RegExp")
          .Global = True
          .Pattern = "/Type\s*/Page[^s]"
    
          For j = 0 To UBound(sn) - 1
            Open sn(j) For Binary As 1
              sn(j) = sn(j) & " | " & .Execute(Input(LOF(1), 1)).Count
            Close
          Next
        End With
      End If
       
      Cells(1).Resize(UBound(sn) + 1) = Application.Transpose(sn)
    End Sub

  7. #7
    VBAX Newbie
    Joined
    Jun 2020
    Posts
    5
    Location
    Quote Originally Posted by macropod View Post
    Try:
    Quote Originally Posted by snb View Post
    All PDF-files in G:\ and subdirectories that contain 2020 in the filename
    Code keeps running without any progress (progress circle is showing). Maybe the large networkdrive is the reason?
    Last edited by macropod; 11-23-2020 at 06:00 AM. Reason: Deleted unnecessary code quotes & merged otherwise duplicate posts

  8. #8
    Knowledge Base Approver VBAX Wizard
    Joined
    Apr 2012
    Posts
    5,642
    Please do not quote !
    Yes, it might take some time the first time it runs. Do not worry, but only wait for the results you are looking for.
    Can you post the adapted code you use ?
    When the results have been shown we can make still 1 improvement in the code.
    Last edited by snb; 11-23-2020 at 07:41 AM.

  9. #9
    Knowledge Base Approver VBAX Guru macropod's Avatar
    Joined
    Jul 2008
    Posts
    4,435
    Location
    Quote Originally Posted by Jesseke View Post
    Code keeps running without any progress (progress circle is showing). Maybe the large networkdrive is the reason?
    Well, you did say you have:
    Quote Originally Posted by Jesseke View Post
    a big directory with thousands of pdf files and subdirectories.
    You can hardly expect instantaneous results, especially if network latency is significant.
    Cheers
    Paul Edstein
    [Fmr MS MVP - Word]

  10. #10
    VBAX Newbie
    Joined
    Jun 2020
    Posts
    5
    Location
    Thanks Macropod and snb to telling me to have patience!

Tags for this Thread

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •