PDA

View Full Version : Solved: (VBA) Recursively list files and output to csv



omgrtm
11-16-2009, 10:06 AM
Hi guys,

I've got a problem with my VBA (you guessed it). Apologies for link format (my first post here), but i think it'd be better if you knew where i was coming from.

I took code from this post:
vbaexpress dot com/kb/getarticle.php?kb_id=405
and added in:
wilmott dot com/messageview.cfm?catid=10&threadid=40372

Basically, first code would output to a worksheet, but as I theoretically could breach the 65k rows limit, I'd like to output to a csv instead (i.e. write every line found out to an external text file). I don't really need the time limit functionality, i didn't get around to deleting that yet.

I'm running Excel 2003 SP2 on Windows XP Pro SP3. So the code that i have modified (combined version of the two above):


Public X()
Public i As Long
Public objShell, objFolder, objFolderItem
Public FSO, oFolder, Fil

Sub MainExtractData()

Dim NewSht As Worksheet
Dim MainFolderName As String
Dim TimeLimit As Long, StartTime As Double

Const ForReading = 1, ForWriting = 2, ForAppending = 8
Dim FSO, f, g

ReDim X(1 To 65536, 1 To 11)

Set objShell = CreateObject("Shell.Application")
TimeLimit = 0
StartTime = Timer

Application.ScreenUpdating = False
MainFolderName = "c:\temp" 'list files in this folder
' Set NewSht = ThisWorkbook.Sheets.Add

' File attribs are as follows:
' X(1, 1) = "Path"
' X(1, 2) = "File Name"
' X(1, 3) = "Last Accessed"
' X(1, 4) = "Last Modified"
' X(1, 5) = "Created"
' X(1, 6) = "Type"
' X(1, 7) = "Size"
' X(1, 8) = "Owner"
'X(1, 9) = "Author"
'X(1, 10) = "Title"
'X(1, 11) = "Comments"

i = 1

Set FSO = CreateObject("scripting.FileSystemObject")
Set oFolder = FSO.GetFolder(MainFolderName)
'error handling to stop the obscure error that occurs at time when retrieving DateLastAccessed
On Error Resume Next

Set f = FSO.OpenTextFile("c:\temp\ls_output.csv", ForWriting, True)

For Each Fil In oFolder.Files
Set objFolder = objShell.Namespace(oFolder.path)
Set objFolderItem = objFolder.ParseName(Fil.Name)
i = i + 1
If i Mod 20 = 0 And TimeLimit <> 0 And Timer > (TimeLimit * 60 + StartTime) Then
GoTo FastExit
End If
If i Mod 50 = 0 Then
Application.StatusBar = "Processing File " & i
DoEvents
End If


d_path = oFolder.path
d_filename = Fil.Name
d_dateaccess = Fil.DateLastAccessed
d_lastmod = Fil.DateLastModified
d_datecreate = Fil.DateCreated
d_type = Fil.Type
d_size = Fil.Size
d_owner = objFolder.GetDetailsOf(objFolderItem, 8)
d_all = d_path & "," & d_filename & "," & d_dateaccess & "," & d_lastmod & "," & d_datecreate & "," & d_type & "," & d_size & "," & d_owner
f.Writeline d_all 'write ob

Next

'Get subdirectories
If TimeLimit = 0 Then
Call RecursiveFolder(oFolder, 0)
Else
If Timer < (TimeLimit * 60 + StartTime) Then Call RecursiveFolder(oFolder, TimeLimit * 60 + StartTime)
f.Write d_all 'write ob
End If
FastExit:
' Range("A:K") = X
' If i < 65535 Then Range(Cells(i + 1, "A"), Cells(65536, "A")).EntireRow.Delete
' Range("A:K").WrapText = False
' Range("A:K").EntireColumn.AutoFit
' Range("1:1").Font.Bold = True
' Rows("2:2").Select
' ActiveWindow.FreezePanes = True
' Range("a1").Activate

Set FSO = Nothing
Set objShell = Nothing
Set oFolder = Nothing
Set objFolder = Nothing
Set objFolderItem = Nothing
Set Fil = Nothing
Application.StatusBar = ""
Application.ScreenUpdating = True
f.Close
End Sub

Sub RecursiveFolder(xFolder, TimeTest As Long)
Dim SubFld

For Each SubFld In xFolder.SubFolders
Set oFolder = FSO.GetFolder(SubFld)
Set objFolder = objShell.Namespace(SubFld.path)
For Each Fil In SubFld.Files
Set objFolder = objShell.Namespace(oFolder.path)
'Problem with objFolder at times
If Not objFolder Is Nothing Then
Set objFolderItem = objFolder.ParseName(Fil.Name)
i = i + 1
If i Mod 20 = 0 And TimeTest <> 0 And Timer > TimeTest Then
Exit Sub
End If
If i Mod 50 = 0 Then
Application.StatusBar = "Processing File " & i
DoEvents
End If
d_path = oFolder.path
d_filename = Fil.Name
d_dateaccess = Fil.DateLastAccessed
d_lastmod = Fil.DateLastModified
d_datecreate = Fil.DateCreated
d_type = Fil.Type
d_size = Fil.Size
d_owner = objFolder.GetDetailsOf(objFolderItem, 8)

d_all = d_path & "," & d_filename & "," & d_dateaccess & "," & d_lastmod & "," & d_datecreate & "," & d_type & "," & d_size & "," & d_owner & ","
f.Writeline d_all 'write ob
Else
Debug.Print Fil.path & " " & Fil.Name
End If

Next
Call RecursiveFolder(SubFld, TimeTest)
Next
End Sub



What i'm actually after is - files in the current directory (c:\temp) to be listed recursively (i.e. i also want files from subfolders), with their attributes (date modified, file owner etc) - output into a comma separated file (to be used elsewhere).

What this code actually does is list the files (and attribs) in the current folder, and closes the file. After inserting some breaks it seems that it jumps from this line
Set oFolder = FSO.GetFolder(SubFld)
(RecursiveFolder sub) back to a previous sub where the call to recursivefolder came from. This is weird as it does not happen in the original piece of code. Feel there's a schoolboy error in there somewhere!


Any help is greatly appreciated.

Cheers
Dan

GTO
11-16-2009, 10:21 PM
Greetings Dan,

You did not declare all your new variables (Option Explicit), so not well tested, but here's what I think is happening...


Public X()
Public i As Long
Public objShell, objFolder, objFolderItem

'FSO is declared Public at the top of the module, so that when the recursive sub (RecursiveFolder) is called,
' FSO doesn't need passed to the function...
Public FSO, oFolder, Fil

Sub MainExtractData()

Dim NewSht As Worksheet
Dim MainFolderName As String
Dim TimeLimit As Long, StartTime As Double

Const ForReading = 1, ForWriting = 2, ForAppending = 8
'...but... FSO is now declared at the procedure's level. So when it is created (CreateObject below),
' it will only be accessable from within this procedure. This holds true for 'f' as well,
' which also causes a problem later...
Dim FSO, f, g

'...Other Statements...

Set FSO = CreateObject("scripting.FileSystemObject")
Set oFolder = FSO.GetFolder(MainFolderName)
'error handling to stop the obscure error that occurs at time when retrieving DateLastAccessed
On Error Resume Next

Set f = FSO.OpenTextFile("c:\temp\ls_output.csv", ForWriting, True)

For Each Fil In oFolder.Files

'...Other Statements...

Next

'Get subdirectories
If TimeLimit = 0 Then
Call RecursiveFolder(oFolder, 0)
Else
If Timer < (TimeLimit * 60 + StartTime) Then Call RecursiveFolder(oFolder, TimeLimit * 60 + StartTime)
f.Write d_all 'write ob
End If

FastExit:

'...Other Statements...

End Sub

Sub RecursiveFolder(xFolder, TimeTest As Long)
Dim SubFld

For Each SubFld In xFolder.SubFolders
' Now - this statement fails, as FSO doesn't exist in this Sub... You do not directly see this,
' as error handling was turned off in the calling procedure.
Set oFolder = FSO.GetFolder(SubFld)
Set objFolder = objShell.Namespace(SubFld.path)
For Each Fil In SubFld.Files
Set objFolder = objShell.Namespace(oFolder.path)
'Problem with objFolder at times
If Not objFolder Is Nothing Then
Set objFolderItem = objFolder.ParseName(Fil.Name)
i = i + 1
If i Mod 20 = 0 And TimeTest <> 0 And Timer > TimeTest Then
Exit Sub
End If
If i Mod 50 = 0 Then
Application.StatusBar = "Processing File " & i
DoEvents
End If
d_path = oFolder.path
d_filename = Fil.Name
d_dateaccess = Fil.DateLastAccessed
d_lastmod = Fil.DateLastModified
d_datecreate = Fil.DateCreated
d_type = Fil.Type
d_size = Fil.Size
d_owner = objFolder.GetDetailsOf(objFolderItem, 8)

d_all = d_path & "," & d_filename & "," & d_dateaccess & "," & d_lastmod & "," & d_datecreate & "," & d_type & "," & d_size & "," & d_owner & ","

' Same here. If we get this far, 'f' is not visible to this sub...
f.Writeline d_all 'write ob
Else
Debug.Print Fil.path & " " & Fil.Name
End If

Next
Call RecursiveFolder(SubFld, TimeTest)
Next
End Sub


Hope that helps,

Mark

omgrtm
11-18-2009, 04:29 AM
Cheers Mark, you were absolutely right - i dimmed it both outside and inside the sub. The final code i used:


Public X()
Public i As Long
Public objShell, objFolder, objFolderItem
Public FSO, oFolder, Fil, f

Sub MainExtractData()

Dim MainFolderName As String
Dim TimeLimit As Long, StartTime As Double

Const ForReading = 1, ForWriting = 2, ForAppending = 8

Set objShell = CreateObject("Shell.Application")

Application.ScreenUpdating = False
MainFolderName = "c:\temp" 'list files in this folder

' File attribs are as follows:
' X(1, 1) = "Path"
' X(1, 2) = "File Name"
' X(1, 3) = "Last Accessed"
' X(1, 4) = "Last Modified"
' X(1, 5) = "Created"
' X(1, 6) = "Type"
' X(1, 7) = "Size"
' X(1, 8) = "Owner" - for getdetsilsof

i = 1

Set FSO = CreateObject("scripting.FileSystemObject")
Set oFolder = FSO.GetFolder(MainFolderName)
'error handling to stop the obscure error that occurs at time when retrieving DateLastAccessed

Set f = FSO.OpenTextFile("c:\temp\output.csv", ForWriting, True)

On Error Resume Next
'error handling to stop the obscure error that occurs at time when retrieving DateLastAccessed
For Each Fil In oFolder.Files
Set objFolder = objShell.Namespace(oFolder.path)
Set objFolderItem = objFolder.ParseName(Fil.Name)
i = i + 1
Application.StatusBar = "Files processed: " & i
DoEvents

d_path = oFolder.path
d_filename = Fil.Name
d_dateaccess = Fil.DateLastAccessed
d_lastmod = Fil.DateLastModified
d_datecreate = Fil.DateCreated
d_type = Fil.Type
d_size = Fil.Size
d_owner = objFolder.GetDetailsOf(objFolderItem, 8)
d_all = Chr(34) & d_path & Chr(34) & "," & Chr(34) & d_filename & Chr(34) & "," & d_dateaccess & "," & d_lastmod & "," & d_datecreate & "," & d_type & "," & d_size & "," & d_owner
f.Writeline d_all 'write ob
Next

'Get subdirectories
Call RecursiveFolder(oFolder)

FastExit:
Set FSO = Nothing
Set objShell = Nothing
Set oFolder = Nothing
Set objFolder = Nothing
Set objFolderItem = Nothing
Set Fil = Nothing
Application.StatusBar = ""
Application.ScreenUpdating = True
f.Close
End Sub

Sub RecursiveFolder(xFolder)
Dim SubFld
For Each SubFld In xFolder.SubFolders
Set oFolder = FSO.GetFolder(SubFld)
Set objFolder = objShell.Namespace(SubFld.path)
For Each Fil In SubFld.Files
Set objFolder = objShell.Namespace(oFolder.path)
'Problem with objFolder at times
If Not objFolder Is Nothing Then
Set objFolderItem = objFolder.ParseName(Fil.Name)
i = i + 1
Application.StatusBar = "Files processed: " & i
DoEvents

d_path = SubFld.path
d_filename = Fil.Name
d_dateaccess = Fil.DateLastAccessed
d_lastmod = Fil.DateLastModified
d_datecreate = Fil.DateCreated
d_type = Fil.Type
d_size = Fil.Size
d_owner = objFolder.GetDetailsOf(objFolderItem, 8)

d_all = Chr(34) & d_path & Chr(34) & "," & Chr(34) & d_filename & Chr(34) & "," & d_dateaccess & "," & d_lastmod & "," & d_datecreate & "," & d_type & "," & d_size & "," & d_owner
f.Writeline d_all 'write ob
Else
Debug.Print Fil.path & " " & Fil.Name
End If

Next
Call RecursiveFolder(SubFld)
Next
End Sub


Will list files recursively in "c:\temp" and output to "c:\temp\output.csv". Good spot Mark!

Cheers
Dan