Consulting

Results 1 to 6 of 6

Thread: Acronym Matcher - exclude terms

  1. #1

    Question Acronym Matcher - exclude terms

    Hi all,

    I am using the code below to find acronyms in Word documents. It uses the Acronyms.xlsx reference file which has two worksheets - Acronyms and Excluded.

    I'd like to update the code below to check if any acronyms in the Word document match those listed in the Excluded worksheet, so it will not add them to the final table. This will save much time having to delete them each time

    I hope that makes sense and I appreciate anybody's help with this.


    Sub AcronymMatcher()
    '
    ' AcronymMatcher Macro
    '
    '
    Dim oDoc_Source As Document
    Dim oDoc_Target As Document
    Dim strListSep As String
    Dim strAcronym As String
    Dim strDef As String
    Dim oTable As Table
    Dim oRange As Range
    Dim n As Long
    Dim m As Long
    m = 0
    Dim strAllFound As String
    Dim Title As String
    Dim Msg As String
    Dim objExcel As Object
    Dim objWbk As Object
    Dim rngSearch As Object
    Dim rngFound As Object
    Dim targetCellValue As String
    
    
    ' message box title
    Title = "Extract Acronyms to New Document"
    
    
    ' Set message box message
    Msg = "This macro finds all Acronyms (consisting of 2 or more " & _
    "uppercase letters, Numbers or '/') and their associated definitions. It " & _
    "then extracts the words to a table at the current location you have selected" & vbCr & vbCr & _
    "Warning - Please make sure you check the table manually after!" & vbCr & vbCr & _
    "Do you want to continue?"
    
    
    ' Display message box
    If MsgBox(Msg, vbYesNo + vbQuestion, Title) <> vbYes Then
        Exit Sub
    End If
    
    
    ' Stop the screen from updating
    Application.ScreenUpdating = False
    
    
    
    
    'Find the list separator from international settings
    'May be a comma or semicolon depending on the country
    strListSep = Application.International(wdListSeparator)
    
    
    'Start a string to be used for storing names of acronyms found
    strAllFound = "#"
    
    
    ' give the active document a variable
    Set oDoc_Source = ActiveDocument
    
    
    'Crete a variable for excel and open the definition workbook
    Set objExcel = CreateObject("Excel.Application")
    Set objWbk = objExcel.Workbooks.Open("D:\Acronyms.xlsx")
    'objExcel.Visible = True
    objWbk.Activate
    
    
    'Create new document to temporarily store the acronyms
    Set oDoc_Target = Documents.Add
    
    
    ' Use the target document
    With oDoc_Target
    
    
        'Make sure document is empty
        .Range = ""
    
    
        'Insert info in header - change date format as you wish
        '.PageSetup.TopMargin = CentimetersToPoints(3)
        '.Sections(1).Headers(wdHeaderFooterPrimary).Range.Text = _
        '    "Acronyms extracted from: " & oDoc_Source.FullName & vbCr & _
        '    "Created by: " & Application.UserName & vbCr & _
        '    "Creation date: " & Format(Date, "MMMM d, yyyy")
    
    
        'Adjust the Normal style and Header style
        With .Styles(wdStyleNormal)
            .Font.Name = "Arial"
            .Font.Size = 10
            .ParagraphFormat.LeftIndent = 0
            .ParagraphFormat.SpaceAfter = 6
        End With
    
    
        With .Styles(wdStyleHeader)
            .Font.Size = 8
            .ParagraphFormat.SpaceAfter = 0
        End With
    
    
        'Insert a table with room for acronym and definition
        Set oTable = .Tables.Add(Range:=.Range, NumRows:=2, NumColumns:=2)
        With oTable
            'Format the table a bit
            'Insert headings
            .Range.Style = wdStyleNormal
            .AllowAutoFit = False
            .Cell(1, 1).Range.Text = "Acronym"
            .Cell(1, 2).Range.Text = "Definition"
    
    
            'Set row as heading row
            .Rows(1).HeadingFormat = True
            .Rows(1).Range.Font.Bold = True
            .PreferredWidthType = wdPreferredWidthPercent
            .Columns(1).PreferredWidth = 20
            .Columns(2).PreferredWidth = 70
    
    
        End With
    End With
    
    
    
    
    
    
    With oDoc_Source
        Set oRange = .Range
    
    
        n = 1 'used to count below
    
    
        ' within the total range of the source document
        With oRange.Find
            'Use wildcard search to find strings consisting of 3 or more uppercase letters
            'Set the search conditions
            'NOTE: If you want to find acronyms with e.g. 2 or more letters,
            'change 3 to 2 in the line below
            .Text = "<[A-Z][A-Z0-9/]{1" & strListSep & "}>"
            .Forward = True
            .Wrap = wdFindStop
            .Format = False
            .MatchCase = True
            .MatchWildcards = True
    
    
            'Perform the search
            Do While .Execute
    
    
            'Continue while found
            strAcronym = oRange
    
    
            'Insert in target doc
            'If strAcronym is already in strAllFound, do not add again
            If InStr(1, strAllFound, "#" & strAcronym & "#") = 0 Then
    
    
                'Add new row in table from second acronym
                If n > 1 Then oTable.Rows.Add
    
    
                    'Was not found before
                    strAllFound = strAllFound & strAcronym & "#"
    
    
                    'Insert in column 1 in oTable
                    'Compensate for heading row
    
    
                    With oTable
                        .Cell(n + 1, 1).Range.Text = strAcronym
    
    
                        ' Find the definition from the Excel document
                        With objWbk.Sheets("Sheet1")
                            ' Find the range of the cells with data in Excel doc
                            Set rngSearch = .Range(.Range("A1"), .Range("A" & .Rows.Count).End(-4162))
    
    
                            ' Search in the found range for the
                            Set rngFound = rngSearch.Find(What:=strAcronym, After:=.Range("A1"), LookAt:=1)
    
    
                            ' if nothing is found count the number of acronyms without definitions
                            If rngFound Is Nothing Then
                                m = m + 1
    
    
                                ' Set the cell variable in the new table as blank
                                targetCellValue = ""
    
    
                            ' If a definition is found enter it into the cell variable
                            Else
                                targetCellValue = .Cells(rngFound.Row, 2).Value
    
    
                            End If
                        End With
    
    
                        ' enter the cell varibale into the definition cell
                        .Cell(n + 1, 2).Range.Text = targetCellValue
                    End With
    
    
    
    
                    ' add one to the loop count
                    n = n + 1
    
    
                End If
            Loop
        End With
    End With
    
    
    
    
    
    
    'Sort the acronyms alphabetically - skip if only 1 found
    If n > 2 Then
    
    
        With Selection
            .Sort ExcludeHeader:=True, FieldNumber:="Column 1", SortFieldType _
                :=wdSortFieldAlphanumeric, SortOrder:=wdSortOrderAscending
    
    
            'Go to start of document
            .HomeKey (wdStory)
    
    
        End With
    End If
    
    
    'Copy the whole table, switch to the source document and past
    'in the table at the original selection location
    Selection.WholeStory
    Selection.Copy
    oDoc_Source.Activate
    Selection.Paste
    
    
    ' update screen
    Application.ScreenUpdating = True
    
    
    'If no acronyms found set message saying so
    If n = 1 Then
        Msg = "No acronyms found."
    
    
    ' set the final messagebox message to show the number of acronyms found and those that did not have definitions
    Else
        Msg = "Finished extracting " & n - 1 & " acronymn(s) to a new document. Unable to find definitions for " & m & " acronyms."
    End If
    
    
    ' Show the finished message box
    'AppActivate Application.Caption
    'MsgBox Msg, vbOKOnly, Title
    
    
    'make the target document active and close it down without saving
    oDoc_Target.Activate
    ActiveDocument.Close SaveChanges:=wdDoNotSaveChanges
    
    
    'Close Excel after
    objWbk.Close Saved = True
    
    
    'Clean up
    Set oRange = Nothing
    Set oDoc_Source = Nothing
    Set oDoc_Target = Nothing
    Set oTable = Nothing
    Set objExcel = Nothing
    Set objWbk = Nothing
    
    
    End Sub
    Last edited by Paul_Hossler; 11-13-2021 at 12:54 PM.

  2. #2
    VBAX Expert Dave's Avatar
    Joined
    Mar 2005
    Posts
    833
    Location
    Hi Iscmal1984 and welcome to this forum. It seems to me that you could make a large string of the excluded acronyms and check it similar to this line of code...
    If InStr(1, strAllFound, "#" & strAcronym & "#") = 0 Then

    something like..
    If InStr(1, strAllExcluded, "#" & strAcronym & "#") = 0 Then

    where the strAllExcluded is a delimited string containing all the excluded acronyms. Maybe place this line of code before the check to see if the acronym has already been found. HTH. Dave
    ps. please use code tags

  3. #3
    VBAX Sage
    Joined
    Apr 2007
    Location
    United States
    Posts
    8,724
    Location
    @Iscmal1984

    Welcome to the forum. Take a minute to read the FAQs at the link in my sig

    1. I added CODE tags to your post. It gets formatted better and sets it off

    2. It might be helpful if you attached a sample for your files to show people what you're working with. If there's a special output format, it's usually a good idea to (manually if necessary) create a sample
    ---------------------------------------------------------------------------------------------------------------------

    Paul


    Remember: Tell us WHAT you want to do, not HOW you think you want to do it

    1. Use [CODE] ....[/CODE ] Tags for readability
    [CODE]PasteYourCodeHere[/CODE ] -- (or paste your code, select it, click [#] button)
    2. Upload an example
    Go Advanced / Attachments - Manage Attachments / Add Files / Select Files / Select the file(s) / Upload Files / Done
    3. Mark the thread as [Solved] when you have an answer
    Thread Tools (on the top right corner, above the first message)
    4. Read the Forum FAQ, especially the part about cross-posting in other forums
    http://www.vbaexpress.com/forum/faq...._new_faq_item3

  4. #4
    If you have a worksheet with two columns starting at A1, you don't need to run Excel to read that sheet. You can read the sheet directly into an array with the following function and compare the values in the array with those from your search. This is much faster than opening and reading from Excel. If there are no header rows in the worksheets change HDR=YES to HDR=NO

    Private Function xlFillArray(strWorkbook As String, _
                                 strRange As String) As Variant
    'Graham Mayor - http://www.gmayor.com - 24/09/2016
    Dim RS As Object
    Dim CN As Object
    Dim iRows As Long
    
    strRange = strRange & "$]"    'Use this to work with a named worksheet
        'strRange = strRange & "]" 'Use this to work with a named range
        Set CN = CreateObject("ADODB.Connection")
    
        'Set HDR=NO for no header row
        CN.Open ConnectionString:="Provider=Microsoft.ACE.OLEDB.12.0;" & _
                                  "Data Source=" & strWorkbook & ";" & _
                                  "Extended Properties=""Excel 12.0 Xml;HDR=YES;IMEX=1"""
    
        Set RS = CreateObject("ADODB.Recordset")
        RS.Open "SELECT * FROM [" & strRange, CN, 2, 1
    
        With RS
            .MoveLast
            iRows = .RecordCount
            .MoveFirst
        End With
        xlFillArray = RS.GetRows(iRows)
        If RS.State = 1 Then RS.Close
        Set RS = Nothing
        If CN.State = 1 Then CN.Close
        Set CN = Nothing
    lbl_Exit:
        Exit Function
    End Function
    You can incorporate that in your code to check and write the values into your table as required e.g.
    Const sWorkbook As String = "D:\Acronyms.xlsx"
    Const sAcronyms As String = "Acronyms"
    Const sExcluded As String = "Excluded"
    Dim Arr() As Variant
    Dim i As Integer
        Arr = xlFillArray(sWorkbook, sAcronyms)
        For i = 0 To UBound(Arr, 2)
            'compare the word with the acronym list in column 1 - Arr(0, 1)
    Debug.Print Arr(0, i)
            'get the associated description from column 2 - Arr(1, i)
    Debug.Print Arr(1, i)
        Next i
        Arr = xlFillArray(sWorkbook, sExcluded)
        For i = 0 To UBound(Arr, 2)
            'compare the word with the excluded list in column 1  - Arr(0, 1)
    Debug.Print Arr(0, i)
            'get the associated description from column 2 only if available and required for the excluded list- Arr(1, 1)
    Debug.Print Arr(1, i)
        Next i
    Graham Mayor - MS MVP (Word) 2002-2019
    Visit my web site for more programming tips and ready made processes
    http://www.gmayor.com

  5. #5
    Playing around with your macro, I think the following will work for you:
    Option Explicit
    
    Sub AcronymMatcher()
    '
    ' AcronymMatcher Macro
    'Graham Mayor - https://www.gmayor.com - Last updated - 14 Nov 2021
    '
    Dim oDoc_Source As Document
    Dim oDoc_Target As Document
    Dim strListSep As String
    Dim strAcronym As String
    Dim strDef As String
    Dim oTable As Table
    Dim oRow As Row
    Dim oRng As Range
    Dim oCell As Range
    Dim n As Long, i As Long
    Dim m As Long
    
    Dim sTitle As String
    Dim sMsg As String
    Dim ArrAcr() As Variant, ArrExc() As Variant
    Dim colFound As Collection, colNotFound As Collection, colExcluded As Collection
    Dim bFound As Boolean
    
    Const sWorkbook As String = "D:\Acronyms.xlsx"
    Const sAcronyms As String = "Acronyms"
    Const sExcluded As String = "Excluded"
    
        ' message box title
        sTitle = "Extract Acronyms to New Document"
        ' Set message box message
        sMsg = "This macro finds all Acronyms (consisting of 2 or more " & _
               "uppercase letters, Numbers or '/') and their associated definitions. It " & _
               "then extracts the words to a table at the current location you have selected" & vbCr & vbCr & _
               "Warning - Please make sure you check the table manually after!" & vbCr & vbCr & _
               "Do you want to continue?"
        ' Display message box
        If Not MsgBox(sMsg, vbYesNo + vbQuestion, sTitle) = vbYes Then Exit Sub
    
        ArrAcr = xlFillArray(sWorkbook, sAcronyms)
        ArrExc = xlFillArray(sWorkbook, sExcluded)
    
        ' Stop the screen from updating
        Application.ScreenUpdating = False
    
        'Find the list separator from international settings
        'May be a comma or semicolon depending on the country
        strListSep = Application.International(wdListSeparator)
    
        ' give the active document a variable
        Set oDoc_Source = ActiveDocument
    
        'Create new document to temporarily store the acronyms
        Set oDoc_Target = Documents.Add
    
        ' Use the target document
        With oDoc_Target
    
            'Adjust the Normal style and Header style
            With .Styles(wdStyleNormal)
                .Font.Name = "Arial"
                .Font.Size = 10
                .ParagraphFormat.LeftIndent = 0
                .ParagraphFormat.SpaceAfter = 6
            End With
    
    
            With .Styles(wdStyleHeader)
                .Font.Size = 8
                .ParagraphFormat.SpaceAfter = 0
            End With
    
    
            'Insert a table with room for acronym and definition
            Set oTable = .Tables.Add(Range:=.Range, NumRows:=1, NumColumns:=2)
            With oTable
                'Format the table a bit
                'Insert headings
                .Range.Style = wdStyleNormal
                .AllowAutoFit = False
                .Cell(1, 1).Range.Text = "Acronym"
                .Cell(1, 2).Range.Text = "Definition"
    
                'Set row as heading row
                .Rows(1).HeadingFormat = True
                .Rows(1).Range.Font.Bold = True
                .PreferredWidthType = wdPreferredWidthPercent
                .Columns(1).PreferredWidth = 20
                .Columns(2).PreferredWidth = 70
            End With
        End With
    
        Set colFound = New Collection
        Set colNotFound = New Collection
        Set colExcluded = New Collection
    
        With oDoc_Source
            Set oRng = .Range
    
            ' within the total range of the source document
            With oRng.Find
                'Use wildcard search to find strings consisting of 3 or more uppercase letters
                'Set the search conditions
                'NOTE: If you want to find acronyms with e.g. 2 or more letters,
                'change 3 to 2 in the line below
                .Text = "<[A-Z][A-Z0-9/]{1" & strListSep & "}>"
                .Forward = True
                .Wrap = wdFindStop
                .Format = False
                .MatchCase = True
                .MatchWildcards = True
    
    
                'Perform the search
                Do While .Execute
                    'Continue while found
                    On Error Resume Next
                    colFound.Add oRng.Text, oRng.Text
                    oRng.Collapse 0
                Loop
    
                With oTable
                    For n = 1 To colFound.Count
                        For m = 0 To UBound(ArrExc, 2)
                            If colFound(n) = ArrExc(0, m) Then
                                colExcluded.Add colFound(n), colFound(n)
                                GoTo Skip
                                Exit For
                            End If
                        Next m
                        For i = 0 To UBound(ArrAcr, 2)
                            bFound = False
                            If colFound(n) = ArrAcr(0, i) Then
                                Set oRow = .Rows.Add
                                oRow.HeadingFormat = False
                                oRow.Range.Font.Bold = False
    
                                Set oCell = .Cell(oRow.Index, 1).Range
                                oCell.End = oCell.End - 1
                                oCell.Text = ArrAcr(0, i)
    
                                Set oCell = .Cell(oRow.Index, 2).Range
                                oCell.End = oCell.End - 1
                                oCell.Text = ArrAcr(1, i)
                                bFound = True
                                Exit For
                            End If
                            If bFound = False Then colNotFound.Add colFound(n), colFound(n)
                        Next i
    
    Skip:
                    Next n
                    'Sort the acronyms alphabetically - skip if only 1 found
                    If .Rows.Count > 2 Then
                        .Sort _
                                ExcludeHeader:=True, _
                                FieldNumber:="Column 1", _
                                SortFieldType:=wdSortFieldAlphanumeric, _
                                SortOrder:=wdSortOrderAscending
                    End If
                    If .Rows.Count > 1 Then
                        oDoc_Source.Range.InsertParagraphAfter
                        Set oRng = oDoc_Source.Range
                        oRng.Collapse 0
                        oRng.FormattedText = .Range.FormattedText
                        sMsg = "Finished extracting " & .Rows.Count - 1 & " acronym(s)."
                    Else
                        sMsg = "No acronyms found."
                    End If
                End With
            End With
        End With
    
        Set oRng = oTable.Range
        oRng.End = oRng.End + 1
        oRng.Collapse 0
        oRng.Text = vbCr
        oRng.Collapse 0
    
        'MsgBox colNotFound.Count & " not found"
        If colNotFound.Count > 0 Then
            oRng.Text = vbCr & "Not Found: " & vbCr
            oRng.Paragraphs(2).Range.Font.Bold = True
            oRng.Collapse 0
            For i = 1 To colNotFound.Count
                oRng.Text = colNotFound(i) & vbCr
                oRng.Collapse 0
            Next i
        End If
    
        'MsgBox colExcluded.Count & " excluded"
        If colExcluded.Count > 0 Then
            oRng.Text = vbCr & "Excluded: " & vbCr
            oRng.Paragraphs(2).Range.Font.Bold = True
            oRng.Collapse 0
            For i = 1 To colExcluded.Count
                oRng.Text = colExcluded(i) & vbCr
                oRng.Collapse 0
            Next i
        End If
    
        ' update screen
        Application.ScreenUpdating = True
    
        ' Show the finished message box
        MsgBox sMsg, vbInformation
    
        'oDoc_Target.Close SaveChanges:=wdDoNotSaveChanges
    
        'Clean up
        Set oRng = Nothing
        Set oDoc_Source = Nothing
        Set oDoc_Target = Nothing
        Set oTable = Nothing
        Set oCell = Nothing
        Set colFound = Nothing
    End Sub
    
    Private Function xlFillArray(strWorkbook As String, _
                                 strRange As String) As Variant
    'Graham Mayor - http://www.gmayor.com - 24/09/2016
    Dim RS As Object
    Dim CN As Object
    Dim iRows As Long
    
    strRange = strRange & "$]"    'Use this to work with a named worksheet
        'strRange = strRange & "]" 'Use this to work with a named range
        Set CN = CreateObject("ADODB.Connection")
    
        'Set HDR=NO for no header row
        CN.Open ConnectionString:="Provider=Microsoft.ACE.OLEDB.12.0;" & _
                                  "Data Source=" & strWorkbook & ";" & _
                                  "Extended Properties=""Excel 12.0 Xml;HDR=YES;IMEX=1"""
    
        Set RS = CreateObject("ADODB.Recordset")
        RS.Open "SELECT * FROM [" & strRange, CN, 2, 1
    
        With RS
            .MoveLast
            iRows = .RecordCount
            .MoveFirst
        End With
        xlFillArray = RS.GetRows(iRows)
        If RS.State = 1 Then RS.Close
        Set RS = Nothing
        If CN.State = 1 Then CN.Close
        Set CN = Nothing
    lbl_Exit:
        Exit Function
    End Function
    Graham Mayor - MS MVP (Word) 2002-2019
    Visit my web site for more programming tips and ready made processes
    http://www.gmayor.com

  6. #6
    VBAX Sage
    Joined
    Apr 2007
    Location
    United States
    Posts
    8,724
    Location
    Graham --

    I like that

    Private Function xlFillArray(strWorkbook As String, strRange As String) As Variant
    ---------------------------------------------------------------------------------------------------------------------

    Paul


    Remember: Tell us WHAT you want to do, not HOW you think you want to do it

    1. Use [CODE] ....[/CODE ] Tags for readability
    [CODE]PasteYourCodeHere[/CODE ] -- (or paste your code, select it, click [#] button)
    2. Upload an example
    Go Advanced / Attachments - Manage Attachments / Add Files / Select Files / Select the file(s) / Upload Files / Done
    3. Mark the thread as [Solved] when you have an answer
    Thread Tools (on the top right corner, above the first message)
    4. Read the Forum FAQ, especially the part about cross-posting in other forums
    http://www.vbaexpress.com/forum/faq...._new_faq_item3

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •