What I have:
I have a macro in a word document that looks for statements that say "shall" or "will". When it finds it, it writes the statement and the section header to an excel file.
The problem:
These are crappy word files exported from PDF files, so they don't have "real" section headers. As a result, I'm forced to identify headers using fairly complex logic. The end result is that my macro runs slowly--far too slowly. On long proposals (100+ pages) it can take over an hour.
I need help with:
I could use some help identifying bottlenecks in my code. The attached file includes part of a proposal I'm using to gauge performance. I'm getting about 2 minutes right now, but ideally this file should process in about 30 seconds.
The Code:
Here's the code in all its twisted glory.
[VBA]Dim sKWArray() As String
Function FileThere(FileName As String) As Boolean
FileThere = (Dir(FileName) > "")
End Function
'Queries the user for a file name where we'll save our information
Function GetXLSFileName() As String
'open excel document
Dim fd As FileDialog
Set fd = Application.FileDialog(msoFileDialogFilePicker)
Dim vSelFile As Variant
With fd
.AllowMultiSelect = False
.Filters.Add "Excel 2003", "*.xls", 1
.Filters.Add "Excel 2007 & 2010", "*.xlsx", 1
.Title = "Choose your destination spreadsheet file."
'Show the dialog box
If .Show = -1 Then
'Set the file string to the selected file
GetXLSFileName = .SelectedItems(1)
'If the user presses Cancel...
Else
MsgBox "No file selected. Macro will use 'C:\ShallStatements.xls' as default."
GetXLSFileName = "C:\ShallStatements.xls"
End If
End With
End Function
'Interface function that gathers keywords to search for in the document.
Function SetUpKeywords() As Boolean
ReDim sKWArray(1 To 2) As String
sKWArray(1) = "shall"
sKWArray(2) = "must" '<< Section simplified in this example as it was not causing a performance issue
SetUpKeywords = True
End Function
'Finds the first searched keyword in a line and returns the position
Function HasKeywords(line As String) As Integer
HasKeywords = 0
Dim x As Integer
For x = 1 To UBound(sKWArray)
HasKeywords = IIf(Not IsNull(InStr(1, line, sKWArray(x), 1)), InStr(1, line, sKWArray(x), 1), 0)
'Exit the loop if we find something
If HasKeywords <> 0 Then Exit For
Next x
End Function
'See if we have a heading-style number. Returns the heading, or blank if not a heading.
Private Function HeaderOutlineTitle(line As String) As String
'Set up variables
Dim CharCounter As Integer
Dim NumberCounter As Integer
Dim tCh As String
CharCounter = 0
NumberCounter = 0
'For the 4 character headings, this skips all the unncessary logic
If Len(line) >= 4 And IsDigits(Left(line, 4)) Then
HeaderOutlineTitle = Left(line, 4)
GoTo SkipToEnd
End If
'Start looping through the line character by character
For y = 1 To Len(line)
tCh = Mid(line, y, 1)
'This allows the letter "H" to be the first character
If y = 1 And tCh = "H" Then GoTo SkipNumberCheck
'If it's blank or a number or a period, exit
If tCh = " " Or (Not IsDigit(tCh) And tCh <> ".") Then Exit For
'If this is a number, keep track, otherwise reset the counter
NumberCounter = IIf(IsNumeric(tCh), NumberCounter + 1, 0)
'If we've had more than 2 numbers in a row, we're probably not seeing a heading
If NumberCounter > 2 Then
CharCounter = 0
Exit For
End If
SkipNumberCheck: 'We can also skip all the other logic since it's the first character
CharCounter = CharCounter + 1
Next y
'If this was a outline heading, list that information in column 4
'(AND As long as the last character before the space was a number)
If CharCounter <> 0 Then
If IsNumeric(Mid(line, CharCounter, 1)) Then
HeaderOutlineTitle = Left(line, CharCounter)
Else
HeaderOutlineTitle = Left(line, CharCounter - 1)
End If
Else
HeaderOutlineTitle = ""
End If
SkipToEnd:
End Function
Function IsDigit(char As String) As Boolean
'Uses [Asc] function to get an ASCII code of a character.
Dim iAsciiCode As Integer
iAsciiCode = Asc(char)
'The ASCII codes for the digits range from 48 to 57. If an ASCII
'code of the character is within this range, function returns true
IsDigit = iAsciiCode >= 48 And iAsciiCode <= 57
End Function
Function IsDigits(text As String) As Boolean
'If the text is 0 length, it's not a digit-only string
If Len(text) = 0 Then
IsDigits = False
Else
Dim x As Integer
'the default is true. We'll set to false if...
IsDigits = True
For x = 1 To Len(text)
'...we find something that's not a digit
If Not IsDigit(Mid(text, x, 1)) Then
IsDigits = False
Exit For
End If
Next x
End If
End Function
'Main function that extracts requirements from the document
Sub ExtractRequirements()
Dim x As Integer, y As Integer, tCh As Integer
Dim intRowCount As Integer 'The row excel will import data into. Used as counter & start value.
intRowCount = 10 'Data starts on row 10. The first 9 rows are reserved for performance metrics and the headers.
'Create arrays to hold data
Dim sHeaders() As String
Dim sStatements() As String
ReDim sHeaders(1 To 1000) As String 'Supports maximum of 1000 requirements per document.
ReDim sStatements(1 To 1000) As String 'Supports maximum of 1000 requirements per document.
'Record Start Time
sHeaders(1) = "Start Time"
sStatements(1) = DateTime.Now
'Figure out how many lines are in the document
Dim NumOfLines As Integer
NumOfLines = ActiveDocument.ComputeStatistics(wdStatisticLines, False)
'Record time for setup to be completed
sHeaders(2) = "Lines Computed"
sStatements(2) = DateTime.Now
'Set up requirement words (exit sub on failure)
If Not SetUpKeywords Then Exit Sub
'Turn off screen updating to improve performance
If MsgBox("Would you like to turn off screen updating to improve performance? " & _
"Please note that this may cause Word to appear unresponsive for quite " & _
"some time on large files.", vbYesNo, "Improve Performance") = vbYes Then
Application.ScreenUpdating = False
End If
'Record time for information to be collected
sHeaders(3) = "Params Collected"
sStatements(3) = DateTime.Now
'Loop through every line and look for headers
Dim tmpHeaderRslt As String, LineText As String, SentenceText As String, header As String, ShallPos As Integer
header = "None" 'This is the header value used before we've found a legitimate header
For x = 1 To NumOfLines
'Go to the correct line
Selection.GoTo wdGoToLine, wdGoToAbsolute, x
'Expand to select the entire line, but skip the logic if it's blank
Selection.Expand wdLine
LineText = Trim(Selection.text)
If LineText <> "" Then
'see if it's a header, and if so, set the header
tmpHeaderRslt = HeaderOutlineTitle(LineText)
If tmpHeaderRslt <> "" Then header = tmpHeaderRslt
'See if the line has the word "shall" or "must" in it
ShallPos = HasKeywords(LineText)
'If so, write it to the spreadsheet
If ShallPos > 0 Then
'Move to the correct character and expand to the full sentence
Selection.Collapse wdCollapseStart
Selection.Move wdCharacter, ShallPos
Selection.Expand wdSentence
SentenceText = Selection.text
'Write the header to the spreadsheet
sHeaders(intRowCount) = header
sStatements(intRowCount) = SentenceText
'Move the selection forward to the proper line (avoids replication)
Selection.Collapse wdCollapseEnd
If (x + 1) < Selection.Information(wdFirstCharacterLineNumber) Then x = Selection.Information(wdFirstCharacterLineNumber) - 1
'Incremement the row counter
intRowCount = intRowCount + 1
End If
End If
Next x
'Record time for statements to be collected
sHeaders(4) = "Statements Collected"
sStatements(4) = DateTime.Now
'Open excel file and set up for receiving data
Dim xlsFileString As String
xlsFileString = GetXLSFileName
Dim appExcel As Object
Dim objSheet As Object
Set appExcel = CreateObject("Excel.Application")
Set objSheet = appExcel.workbooks.Open(xlsFileString).Sheets("Sheet1")
'Write data to the spreadsheet
sHeaders(5) = "Excel write begin"
sStatements(5) = DateTime.Now
For x = 1 To intRowCount
objSheet.Cells(x, 1) = sHeaders(x)
objSheet.Cells(x, 2) = sStatements(x)
Next x
'Write end time
objSheet.Cells(6, 1) = "Excel write ends"
objSheet.Cells(6, 2) = DateTime.Now
'Clean up and open for viewing.
appExcel.workbooks(1).Close True
appExcel.Quit
Set objSheet = Nothing
Set appExcel = Nothing
'Open the new spreadsheet for viewing
If FileThere("C:\Program Files\Microsoft Office\Office12\excel.exe ") Then
Shell ("C:\Program Files\Microsoft Office\Office12\excel.exe """ & xlsFileString & """")
ElseIf FileThere("C:\Program Files\Microsoft Office\Office14\excel.exe ") Then
Shell ("C:\Program Files\Microsoft Office\Office14\excel.exe """ & xlsFileString & """")
End If
End Sub
[/VBA]