Consulting

Results 1 to 20 of 30

Thread: Complex macro has very poor performance

Threaded View

Previous Post Previous Post   Next Post Next Post
  1. #1
    VBAX Regular
    Joined
    May 2012
    Posts
    30
    Location

    Complex macro has very poor performance

    What I have:
    I have a macro in a word document that looks for statements that say "shall" or "will". When it finds it, it writes the statement and the section header to an excel file.

    The problem:
    These are crappy word files exported from PDF files, so they don't have "real" section headers. As a result, I'm forced to identify headers using fairly complex logic. The end result is that my macro runs slowly--far too slowly. On long proposals (100+ pages) it can take over an hour.

    I need help with:
    I could use some help identifying bottlenecks in my code. The attached file includes part of a proposal I'm using to gauge performance. I'm getting about 2 minutes right now, but ideally this file should process in about 30 seconds.

    The Code:
    Here's the code in all its twisted glory.

    [VBA]Dim sKWArray() As String

    Function FileThere(FileName As String) As Boolean
    FileThere = (Dir(FileName) > "")
    End Function

    'Queries the user for a file name where we'll save our information
    Function GetXLSFileName() As String
    'open excel document
    Dim fd As FileDialog
    Set fd = Application.FileDialog(msoFileDialogFilePicker)
    Dim vSelFile As Variant
    With fd
    .AllowMultiSelect = False
    .Filters.Add "Excel 2003", "*.xls", 1
    .Filters.Add "Excel 2007 & 2010", "*.xlsx", 1
    .Title = "Choose your destination spreadsheet file."

    'Show the dialog box
    If .Show = -1 Then
    'Set the file string to the selected file
    GetXLSFileName = .SelectedItems(1)
    'If the user presses Cancel...
    Else
    MsgBox "No file selected. Macro will use 'C:\ShallStatements.xls' as default."
    GetXLSFileName = "C:\ShallStatements.xls"
    End If
    End With
    End Function

    'Interface function that gathers keywords to search for in the document.
    Function SetUpKeywords() As Boolean
    ReDim sKWArray(1 To 2) As String
    sKWArray(1) = "shall"
    sKWArray(2) = "must" '<< Section simplified in this example as it was not causing a performance issue
    SetUpKeywords = True
    End Function

    'Finds the first searched keyword in a line and returns the position
    Function HasKeywords(line As String) As Integer
    HasKeywords = 0
    Dim x As Integer
    For x = 1 To UBound(sKWArray)
    HasKeywords = IIf(Not IsNull(InStr(1, line, sKWArray(x), 1)), InStr(1, line, sKWArray(x), 1), 0)

    'Exit the loop if we find something
    If HasKeywords <> 0 Then Exit For
    Next x
    End Function

    'See if we have a heading-style number. Returns the heading, or blank if not a heading.
    Private Function HeaderOutlineTitle(line As String) As String
    'Set up variables
    Dim CharCounter As Integer
    Dim NumberCounter As Integer
    Dim tCh As String

    CharCounter = 0
    NumberCounter = 0

    'For the 4 character headings, this skips all the unncessary logic
    If Len(line) >= 4 And IsDigits(Left(line, 4)) Then
    HeaderOutlineTitle = Left(line, 4)
    GoTo SkipToEnd
    End If

    'Start looping through the line character by character
    For y = 1 To Len(line)
    tCh = Mid(line, y, 1)

    'This allows the letter "H" to be the first character
    If y = 1 And tCh = "H" Then GoTo SkipNumberCheck

    'If it's blank or a number or a period, exit
    If tCh = " " Or (Not IsDigit(tCh) And tCh <> ".") Then Exit For

    'If this is a number, keep track, otherwise reset the counter
    NumberCounter = IIf(IsNumeric(tCh), NumberCounter + 1, 0)

    'If we've had more than 2 numbers in a row, we're probably not seeing a heading
    If NumberCounter > 2 Then
    CharCounter = 0
    Exit For
    End If

    SkipNumberCheck: 'We can also skip all the other logic since it's the first character
    CharCounter = CharCounter + 1
    Next y

    'If this was a outline heading, list that information in column 4
    '(AND As long as the last character before the space was a number)
    If CharCounter <> 0 Then
    If IsNumeric(Mid(line, CharCounter, 1)) Then
    HeaderOutlineTitle = Left(line, CharCounter)
    Else
    HeaderOutlineTitle = Left(line, CharCounter - 1)
    End If
    Else
    HeaderOutlineTitle = ""
    End If

    SkipToEnd:
    End Function

    Function IsDigit(char As String) As Boolean
    'Uses [Asc] function to get an ASCII code of a character.
    Dim iAsciiCode As Integer
    iAsciiCode = Asc(char)

    'The ASCII codes for the digits range from 48 to 57. If an ASCII
    'code of the character is within this range, function returns true
    IsDigit = iAsciiCode >= 48 And iAsciiCode <= 57
    End Function

    Function IsDigits(text As String) As Boolean
    'If the text is 0 length, it's not a digit-only string
    If Len(text) = 0 Then
    IsDigits = False
    Else
    Dim x As Integer
    'the default is true. We'll set to false if...
    IsDigits = True
    For x = 1 To Len(text)
    '...we find something that's not a digit
    If Not IsDigit(Mid(text, x, 1)) Then
    IsDigits = False
    Exit For
    End If
    Next x
    End If
    End Function

    'Main function that extracts requirements from the document
    Sub ExtractRequirements()
    Dim x As Integer, y As Integer, tCh As Integer
    Dim intRowCount As Integer 'The row excel will import data into. Used as counter & start value.
    intRowCount = 10 'Data starts on row 10. The first 9 rows are reserved for performance metrics and the headers.

    'Create arrays to hold data
    Dim sHeaders() As String
    Dim sStatements() As String
    ReDim sHeaders(1 To 1000) As String 'Supports maximum of 1000 requirements per document.
    ReDim sStatements(1 To 1000) As String 'Supports maximum of 1000 requirements per document.

    'Record Start Time
    sHeaders(1) = "Start Time"
    sStatements(1) = DateTime.Now

    'Write headers
    sHeaders(intRowCount - 1) = "Header"
    sStatements(intRowCount - 1) = "Requirement"

    'Figure out how many lines are in the document
    Dim NumOfLines As Integer
    NumOfLines = ActiveDocument.ComputeStatistics(wdStatisticLines, False)

    'Record time for setup to be completed
    sHeaders(2) = "Lines Computed"
    sStatements(2) = DateTime.Now

    'Set up requirement words (exit sub on failure)
    If Not SetUpKeywords Then Exit Sub

    'Turn off screen updating to improve performance
    If MsgBox("Would you like to turn off screen updating to improve performance? " & _
    "Please note that this may cause Word to appear unresponsive for quite " & _
    "some time on large files.", vbYesNo, "Improve Performance") = vbYes Then
    Application.ScreenUpdating = False
    End If

    'Record time for information to be collected
    sHeaders(3) = "Params Collected"
    sStatements(3) = DateTime.Now

    'Loop through every line and look for headers
    Dim tmpHeaderRslt As String, LineText As String, SentenceText As String, header As String, ShallPos As Integer
    header = "None" 'This is the header value used before we've found a legitimate header
    For x = 1 To NumOfLines
    'Go to the correct line
    Selection.GoTo wdGoToLine, wdGoToAbsolute, x

    'Expand to select the entire line, but skip the logic if it's blank
    Selection.Expand wdLine
    LineText = Trim(Selection.text)
    If LineText <> "" Then
    'see if it's a header, and if so, set the header
    tmpHeaderRslt = HeaderOutlineTitle(LineText)
    If tmpHeaderRslt <> "" Then header = tmpHeaderRslt

    'See if the line has the word "shall" or "must" in it
    ShallPos = HasKeywords(LineText)

    'If so, write it to the spreadsheet
    If ShallPos > 0 Then
    'Move to the correct character and expand to the full sentence
    Selection.Collapse wdCollapseStart
    Selection.Move wdCharacter, ShallPos
    Selection.Expand wdSentence
    SentenceText = Selection.text

    'Write the header to the spreadsheet
    sHeaders(intRowCount) = header
    sStatements(intRowCount) = SentenceText

    'Move the selection forward to the proper line (avoids replication)
    Selection.Collapse wdCollapseEnd
    If (x + 1) < Selection.Information(wdFirstCharacterLineNumber) Then x = Selection.Information(wdFirstCharacterLineNumber) - 1

    'Incremement the row counter
    intRowCount = intRowCount + 1
    End If
    End If
    Next x

    'Record time for statements to be collected
    sHeaders(4) = "Statements Collected"
    sStatements(4) = DateTime.Now

    'Open excel file and set up for receiving data
    Dim xlsFileString As String
    xlsFileString = GetXLSFileName
    Dim appExcel As Object
    Dim objSheet As Object
    Set appExcel = CreateObject("Excel.Application")
    Set objSheet = appExcel.workbooks.Open(xlsFileString).Sheets("Sheet1")

    'Write data to the spreadsheet
    sHeaders(5) = "Excel write begin"
    sStatements(5) = DateTime.Now
    For x = 1 To intRowCount
    objSheet.Cells(x, 1) = sHeaders(x)
    objSheet.Cells(x, 2) = sStatements(x)
    Next x

    'Write end time
    objSheet.Cells(6, 1) = "Excel write ends"
    objSheet.Cells(6, 2) = DateTime.Now

    'Clean up and open for viewing.
    appExcel.workbooks(1).Close True
    appExcel.Quit
    Set objSheet = Nothing
    Set appExcel = Nothing

    'Open the new spreadsheet for viewing
    If FileThere("C:\Program Files\Microsoft Office\Office12\excel.exe ") Then
    Shell ("C:\Program Files\Microsoft Office\Office12\excel.exe """ & xlsFileString & """")
    ElseIf FileThere("C:\Program Files\Microsoft Office\Office14\excel.exe ") Then
    Shell ("C:\Program Files\Microsoft Office\Office14\excel.exe """ & xlsFileString & """")
    End If
    End Sub
    [/VBA]

    I appreciate any help you can give. Thank you!

    Dan
    Attached Files Attached Files

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •