PDA

View Full Version : Acronym Matcher - exclude terms



lsmcal1984
11-13-2021, 08:21 AM
Hi all,

I am using the code below to find acronyms in Word documents. It uses the Acronyms.xlsx reference file which has two worksheets - Acronyms and Excluded.

I'd like to update the code below to check if any acronyms in the Word document match those listed in the Excluded worksheet, so it will not add them to the final table. This will save much time having to delete them each time :)

I hope that makes sense and I appreciate anybody's help with this.



Sub AcronymMatcher()
'
' AcronymMatcher Macro
'
'
Dim oDoc_Source As Document
Dim oDoc_Target As Document
Dim strListSep As String
Dim strAcronym As String
Dim strDef As String
Dim oTable As Table
Dim oRange As Range
Dim n As Long
Dim m As Long
m = 0
Dim strAllFound As String
Dim Title As String
Dim Msg As String
Dim objExcel As Object
Dim objWbk As Object
Dim rngSearch As Object
Dim rngFound As Object
Dim targetCellValue As String


' message box title
Title = "Extract Acronyms to New Document"


' Set message box message
Msg = "This macro finds all Acronyms (consisting of 2 or more " & _
"uppercase letters, Numbers or '/') and their associated definitions. It " & _
"then extracts the words to a table at the current location you have selected" & vbCr & vbCr & _
"Warning - Please make sure you check the table manually after!" & vbCr & vbCr & _
"Do you want to continue?"


' Display message box
If MsgBox(Msg, vbYesNo + vbQuestion, Title) <> vbYes Then
Exit Sub
End If


' Stop the screen from updating
Application.ScreenUpdating = False




'Find the list separator from international settings
'May be a comma or semicolon depending on the country
strListSep = Application.International(wdListSeparator)


'Start a string to be used for storing names of acronyms found
strAllFound = "#"


' give the active document a variable
Set oDoc_Source = ActiveDocument


'Crete a variable for excel and open the definition workbook
Set objExcel = CreateObject("Excel.Application")
Set objWbk = objExcel.Workbooks.Open("D:\Acronyms.xlsx")
'objExcel.Visible = True
objWbk.Activate


'Create new document to temporarily store the acronyms
Set oDoc_Target = Documents.Add


' Use the target document
With oDoc_Target


'Make sure document is empty
.Range = ""


'Insert info in header - change date format as you wish
'.PageSetup.TopMargin = CentimetersToPoints(3)
'.Sections(1).Headers(wdHeaderFooterPrimary).Range.Text = _
' "Acronyms extracted from: " & oDoc_Source.FullName & vbCr & _
' "Created by: " & Application.UserName & vbCr & _
' "Creation date: " & Format(Date, "MMMM d, yyyy")


'Adjust the Normal style and Header style
With .Styles(wdStyleNormal)
.Font.Name = "Arial"
.Font.Size = 10
.ParagraphFormat.LeftIndent = 0
.ParagraphFormat.SpaceAfter = 6
End With


With .Styles(wdStyleHeader)
.Font.Size = 8
.ParagraphFormat.SpaceAfter = 0
End With


'Insert a table with room for acronym and definition
Set oTable = .Tables.Add(Range:=.Range, NumRows:=2, NumColumns:=2)
With oTable
'Format the table a bit
'Insert headings
.Range.Style = wdStyleNormal
.AllowAutoFit = False
.Cell(1, 1).Range.Text = "Acronym"
.Cell(1, 2).Range.Text = "Definition"


'Set row as heading row
.Rows(1).HeadingFormat = True
.Rows(1).Range.Font.Bold = True
.PreferredWidthType = wdPreferredWidthPercent
.Columns(1).PreferredWidth = 20
.Columns(2).PreferredWidth = 70


End With
End With






With oDoc_Source
Set oRange = .Range


n = 1 'used to count below


' within the total range of the source document
With oRange.Find
'Use wildcard search to find strings consisting of 3 or more uppercase letters
'Set the search conditions
'NOTE: If you want to find acronyms with e.g. 2 or more letters,
'change 3 to 2 in the line below
.Text = "<[A-Z][A-Z0-9/]{1" & strListSep & "}>"
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = True
.MatchWildcards = True


'Perform the search
Do While .Execute


'Continue while found
strAcronym = oRange


'Insert in target doc
'If strAcronym is already in strAllFound, do not add again
If InStr(1, strAllFound, "#" & strAcronym & "#") = 0 Then


'Add new row in table from second acronym
If n > 1 Then oTable.Rows.Add


'Was not found before
strAllFound = strAllFound & strAcronym & "#"


'Insert in column 1 in oTable
'Compensate for heading row


With oTable
.Cell(n + 1, 1).Range.Text = strAcronym


' Find the definition from the Excel document
With objWbk.Sheets("Sheet1")
' Find the range of the cells with data in Excel doc
Set rngSearch = .Range(.Range("A1"), .Range("A" & .Rows.Count).End(-4162))


' Search in the found range for the
Set rngFound = rngSearch.Find(What:=strAcronym, After:=.Range("A1"), LookAt:=1)


' if nothing is found count the number of acronyms without definitions
If rngFound Is Nothing Then
m = m + 1


' Set the cell variable in the new table as blank
targetCellValue = ""


' If a definition is found enter it into the cell variable
Else
targetCellValue = .Cells(rngFound.Row, 2).Value


End If
End With


' enter the cell varibale into the definition cell
.Cell(n + 1, 2).Range.Text = targetCellValue
End With




' add one to the loop count
n = n + 1


End If
Loop
End With
End With






'Sort the acronyms alphabetically - skip if only 1 found
If n > 2 Then


With Selection
.Sort ExcludeHeader:=True, FieldNumber:="Column 1", SortFieldType _
:=wdSortFieldAlphanumeric, SortOrder:=wdSortOrderAscending


'Go to start of document
.HomeKey (wdStory)


End With
End If


'Copy the whole table, switch to the source document and past
'in the table at the original selection location
Selection.WholeStory
Selection.Copy
oDoc_Source.Activate
Selection.Paste


' update screen
Application.ScreenUpdating = True


'If no acronyms found set message saying so
If n = 1 Then
Msg = "No acronyms found."


' set the final messagebox message to show the number of acronyms found and those that did not have definitions
Else
Msg = "Finished extracting " & n - 1 & " acronymn(s) to a new document. Unable to find definitions for " & m & " acronyms."
End If


' Show the finished message box
'AppActivate Application.Caption
'MsgBox Msg, vbOKOnly, Title


'make the target document active and close it down without saving
oDoc_Target.Activate
ActiveDocument.Close SaveChanges:=wdDoNotSaveChanges


'Close Excel after
objWbk.Close Saved = True


'Clean up
Set oRange = Nothing
Set oDoc_Source = Nothing
Set oDoc_Target = Nothing
Set oTable = Nothing
Set objExcel = Nothing
Set objWbk = Nothing


End Sub

Dave
11-13-2021, 09:12 AM
Hi Iscmal1984 and welcome to this forum. It seems to me that you could make a large string of the excluded acronyms and check it similar to this line of code...

If InStr(1, strAllFound, "#" & strAcronym & "#") = 0 Then
something like..

If InStr(1, strAllExcluded, "#" & strAcronym & "#") = 0 Then
where the strAllExcluded is a delimited string containing all the excluded acronyms. Maybe place this line of code before the check to see if the acronym has already been found. HTH. Dave
ps. please use code tags

Paul_Hossler
11-13-2021, 12:57 PM
@Iscmal1984

Welcome to the forum. Take a minute to read the FAQs at the link in my sig

1. I added CODE tags to your post. It gets formatted better and sets it off

2. It might be helpful if you attached a sample for your files to show people what you're working with. If there's a special output format, it's usually a good idea to (manually if necessary) create a sample

gmayor
11-13-2021, 10:15 PM
If you have a worksheet with two columns starting at A1, you don't need to run Excel to read that sheet. You can read the sheet directly into an array with the following function and compare the values in the array with those from your search. This is much faster than opening and reading from Excel. If there are no header rows in the worksheets change HDR=YES to HDR=NO


Private Function xlFillArray(strWorkbook As String, _
strRange As String) As Variant
'Graham Mayor - http://www.gmayor.com - 24/09/2016
Dim RS As Object
Dim CN As Object
Dim iRows As Long

strRange = strRange & "$]" 'Use this to work with a named worksheet
'strRange = strRange & "]" 'Use this to work with a named range
Set CN = CreateObject("ADODB.Connection")

'Set HDR=NO for no header row
CN.Open ConnectionString:="Provider=Microsoft.ACE.OLEDB.12.0;" & _
"Data Source=" & strWorkbook & ";" & _
"Extended Properties=""Excel 12.0 Xml;HDR=YES;IMEX=1"""

Set RS = CreateObject("ADODB.Recordset")
RS.Open "SELECT * FROM [" & strRange, CN, 2, 1

With RS
.MoveLast
iRows = .RecordCount
.MoveFirst
End With
xlFillArray = RS.GetRows(iRows)
If RS.State = 1 Then RS.Close
Set RS = Nothing
If CN.State = 1 Then CN.Close
Set CN = Nothing
lbl_Exit:
Exit Function
End Function

You can incorporate that in your code to check and write the values into your table as required e.g.

Const sWorkbook As String = "D:\Acronyms.xlsx"
Const sAcronyms As String = "Acronyms"
Const sExcluded As String = "Excluded"
Dim Arr() As Variant
Dim i As Integer
Arr = xlFillArray(sWorkbook, sAcronyms)
For i = 0 To UBound(Arr, 2)
'compare the word with the acronym list in column 1 - Arr(0, 1)
Debug.Print Arr(0, i)
'get the associated description from column 2 - Arr(1, i)
Debug.Print Arr(1, i)
Next i
Arr = xlFillArray(sWorkbook, sExcluded)
For i = 0 To UBound(Arr, 2)
'compare the word with the excluded list in column 1 - Arr(0, 1)
Debug.Print Arr(0, i)
'get the associated description from column 2 only if available and required for the excluded list- Arr(1, 1)
Debug.Print Arr(1, i)
Next i

gmayor
11-14-2021, 04:03 AM
Playing around with your macro, I think the following will work for you:

Option Explicit

Sub AcronymMatcher()
'
' AcronymMatcher Macro
'Graham Mayor - https://www.gmayor.com - Last updated - 14 Nov 2021
'
Dim oDoc_Source As Document
Dim oDoc_Target As Document
Dim strListSep As String
Dim strAcronym As String
Dim strDef As String
Dim oTable As Table
Dim oRow As Row
Dim oRng As Range
Dim oCell As Range
Dim n As Long, i As Long
Dim m As Long

Dim sTitle As String
Dim sMsg As String
Dim ArrAcr() As Variant, ArrExc() As Variant
Dim colFound As Collection, colNotFound As Collection, colExcluded As Collection
Dim bFound As Boolean

Const sWorkbook As String = "D:\Acronyms.xlsx"
Const sAcronyms As String = "Acronyms"
Const sExcluded As String = "Excluded"

' message box title
sTitle = "Extract Acronyms to New Document"
' Set message box message
sMsg = "This macro finds all Acronyms (consisting of 2 or more " & _
"uppercase letters, Numbers or '/') and their associated definitions. It " & _
"then extracts the words to a table at the current location you have selected" & vbCr & vbCr & _
"Warning - Please make sure you check the table manually after!" & vbCr & vbCr & _
"Do you want to continue?"
' Display message box
If Not MsgBox(sMsg, vbYesNo + vbQuestion, sTitle) = vbYes Then Exit Sub

ArrAcr = xlFillArray(sWorkbook, sAcronyms)
ArrExc = xlFillArray(sWorkbook, sExcluded)

' Stop the screen from updating
Application.ScreenUpdating = False

'Find the list separator from international settings
'May be a comma or semicolon depending on the country
strListSep = Application.International(wdListSeparator)

' give the active document a variable
Set oDoc_Source = ActiveDocument

'Create new document to temporarily store the acronyms
Set oDoc_Target = Documents.Add

' Use the target document
With oDoc_Target

'Adjust the Normal style and Header style
With .Styles(wdStyleNormal)
.Font.Name = "Arial"
.Font.Size = 10
.ParagraphFormat.LeftIndent = 0
.ParagraphFormat.SpaceAfter = 6
End With


With .Styles(wdStyleHeader)
.Font.Size = 8
.ParagraphFormat.SpaceAfter = 0
End With


'Insert a table with room for acronym and definition
Set oTable = .Tables.Add(Range:=.Range, NumRows:=1, NumColumns:=2)
With oTable
'Format the table a bit
'Insert headings
.Range.Style = wdStyleNormal
.AllowAutoFit = False
.Cell(1, 1).Range.Text = "Acronym"
.Cell(1, 2).Range.Text = "Definition"

'Set row as heading row
.Rows(1).HeadingFormat = True
.Rows(1).Range.Font.Bold = True
.PreferredWidthType = wdPreferredWidthPercent
.Columns(1).PreferredWidth = 20
.Columns(2).PreferredWidth = 70
End With
End With

Set colFound = New Collection
Set colNotFound = New Collection
Set colExcluded = New Collection

With oDoc_Source
Set oRng = .Range

' within the total range of the source document
With oRng.Find
'Use wildcard search to find strings consisting of 3 or more uppercase letters
'Set the search conditions
'NOTE: If you want to find acronyms with e.g. 2 or more letters,
'change 3 to 2 in the line below
.Text = "<[A-Z][A-Z0-9/]{1" & strListSep & "}>"
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = True
.MatchWildcards = True


'Perform the search
Do While .Execute
'Continue while found
On Error Resume Next
colFound.Add oRng.Text, oRng.Text
oRng.Collapse 0
Loop

With oTable
For n = 1 To colFound.Count
For m = 0 To UBound(ArrExc, 2)
If colFound(n) = ArrExc(0, m) Then
colExcluded.Add colFound(n), colFound(n)
GoTo Skip
Exit For
End If
Next m
For i = 0 To UBound(ArrAcr, 2)
bFound = False
If colFound(n) = ArrAcr(0, i) Then
Set oRow = .Rows.Add
oRow.HeadingFormat = False
oRow.Range.Font.Bold = False

Set oCell = .Cell(oRow.Index, 1).Range
oCell.End = oCell.End - 1
oCell.Text = ArrAcr(0, i)

Set oCell = .Cell(oRow.Index, 2).Range
oCell.End = oCell.End - 1
oCell.Text = ArrAcr(1, i)
bFound = True
Exit For
End If
If bFound = False Then colNotFound.Add colFound(n), colFound(n)
Next i

Skip:
Next n
'Sort the acronyms alphabetically - skip if only 1 found
If .Rows.Count > 2 Then
.Sort _
ExcludeHeader:=True, _
FieldNumber:="Column 1", _
SortFieldType:=wdSortFieldAlphanumeric, _
SortOrder:=wdSortOrderAscending
End If
If .Rows.Count > 1 Then
oDoc_Source.Range.InsertParagraphAfter
Set oRng = oDoc_Source.Range
oRng.Collapse 0
oRng.FormattedText = .Range.FormattedText
sMsg = "Finished extracting " & .Rows.Count - 1 & " acronym(s)."
Else
sMsg = "No acronyms found."
End If
End With
End With
End With

Set oRng = oTable.Range
oRng.End = oRng.End + 1
oRng.Collapse 0
oRng.Text = vbCr
oRng.Collapse 0

'MsgBox colNotFound.Count & " not found"
If colNotFound.Count > 0 Then
oRng.Text = vbCr & "Not Found: " & vbCr
oRng.Paragraphs(2).Range.Font.Bold = True
oRng.Collapse 0
For i = 1 To colNotFound.Count
oRng.Text = colNotFound(i) & vbCr
oRng.Collapse 0
Next i
End If

'MsgBox colExcluded.Count & " excluded"
If colExcluded.Count > 0 Then
oRng.Text = vbCr & "Excluded: " & vbCr
oRng.Paragraphs(2).Range.Font.Bold = True
oRng.Collapse 0
For i = 1 To colExcluded.Count
oRng.Text = colExcluded(i) & vbCr
oRng.Collapse 0
Next i
End If

' update screen
Application.ScreenUpdating = True

' Show the finished message box
MsgBox sMsg, vbInformation

'oDoc_Target.Close SaveChanges:=wdDoNotSaveChanges

'Clean up
Set oRng = Nothing
Set oDoc_Source = Nothing
Set oDoc_Target = Nothing
Set oTable = Nothing
Set oCell = Nothing
Set colFound = Nothing
End Sub

Private Function xlFillArray(strWorkbook As String, _
strRange As String) As Variant
'Graham Mayor - http://www.gmayor.com - 24/09/2016
Dim RS As Object
Dim CN As Object
Dim iRows As Long

strRange = strRange & "$]" 'Use this to work with a named worksheet
'strRange = strRange & "]" 'Use this to work with a named range
Set CN = CreateObject("ADODB.Connection")

'Set HDR=NO for no header row
CN.Open ConnectionString:="Provider=Microsoft.ACE.OLEDB.12.0;" & _
"Data Source=" & strWorkbook & ";" & _
"Extended Properties=""Excel 12.0 Xml;HDR=YES;IMEX=1"""

Set RS = CreateObject("ADODB.Recordset")
RS.Open "SELECT * FROM [" & strRange, CN, 2, 1

With RS
.MoveLast
iRows = .RecordCount
.MoveFirst
End With
xlFillArray = RS.GetRows(iRows)
If RS.State = 1 Then RS.Close
Set RS = Nothing
If CN.State = 1 Then CN.Close
Set CN = Nothing
lbl_Exit:
Exit Function
End Function

Paul_Hossler
11-14-2021, 05:37 PM
Graham --

I like that


Private Function xlFillArray(strWorkbook As String, strRange As String) As Variant