Consulting

Results 1 to 6 of 6

Thread: List all capitalized word from documents

  1. #1
    VBAX Newbie
    Joined
    Jan 2016
    Posts
    3
    Location

    Exclamation List all capitalized word from documents

    I found this vba-code on net and I've tried to adjust so it only search for words with capitalized first letter. My problem is that I don't even can get it to list all words so it match case from the orginal documents.

    I have tried to
    SingleWord = Trim(LCase(aWord))
    with
    SingleWord = Trim(aWord)
    Is there anyone who can help me with this issue?

    Here is the code;


    Option Explicit
    
    
    
    Function GetFolder(Optional Title As String = "Select a Folder", Optional RootFolder As Variant) As String
     On Error Resume Next
     GetFolder = CreateObject("Shell.Application").BrowseForFolder(0, Title, 0, RootFolder).Items.Item.Path
     End Function
    
    
    Sub LoopAll()
      Dim strPath As String
      Dim strFile As String
      Dim oCurDoc As Document
      Dim oDoc As Document
      Dim oTempDoc As Document
      Dim Ans As String
    
    
      'Word Breaker variables
      Dim TotalWords As Long
      Dim SingleWord As String      'Raw word pulled from doc
      Const MaxWords = 9000         'Maximum unique words allowed
      Dim Words(MaxWords) As String 'Array to hold unique words
      Dim Freq(MaxWords) As Integer 'Frequency counter for Unique Words
      Dim WordNum As Integer        'Number of unique words
      Dim ByFreq As Boolean         'Flag for sorting order
      Dim ttlwds As Long            'Total words in the document
      Dim Found As Boolean          'Temporary flag
      Dim j, k, l, Temp As Integer  'Temporary variables
      Dim tword As String '
      Dim thisDoc As String
      Dim Excludes As String        'Words to be excluded
      Dim aWord As Range
      
      On Error GoTo ErrHandler
    
    
      Set oCurDoc = Documents.Add
      'strPath = "C:\Word\"  ' note the trailing \
      'strPath = "D:\Data\TestFolder\"  ' note the trailing \
       strPath = InputBox$("Enter Path (ex: D:\data\testfolder\ - note trailing \)", "Data Path", "D:\data\testfolder\")
      'strPath = GetFolder(Title:="Find a Folder", RootFolder:=&H11) & ""
    
    
      'strFile = Dir(strPath & "*.doc")  'word docs only
      'strFile = Dir(strPath & "*.*")   'scan all files
       strFile = Dir(strPath & InputBox$("Enter File Extensions to scan", "File types", "*.docx"))
    
    
      ' Set up excluded words ... edit the list below.
      Excludes = "[a][also][an][and][are][as][at][be][by]" & vbCrLf
      Excludes = Excludes & "[for][if][in][is][it][its][of][on][or][so]" & vbCrLf
      Excludes = Excludes & "[then][there][them][the][this][that][to][you]" & vbCrLf
      Excludes = Excludes & InputBox$("The following words are excluded: " & Excludes & _
        ".Enter any other words that you wish to exclude, surrounding each word with [ ].", _
        "Excluded Words", "")
    
    
      ' Find out how to sort
      ByFreq = True
      Ans = InputBox$("Sort by WORD or by FREQ?", "Sort order", "WORD")
      If Ans = "" Then
        Exit Sub
      End If
    
    
      If UCase(Ans) = "WORD" Then
        ByFreq = False
      End If
    
    
      System.Cursor = wdCursorWait
      Application.ScreenUpdating = False
    
    
      Do While Not strFile = ""
        ' Open document
        Set oDoc = Documents.Open(FileName:=strPath & strFile, AddToRecentFiles:=False)
        thisDoc = oDoc.Name
    
    
        '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        'This code compiles a sorted table showing the frequency with which
        'any given word appears in a document. Words not required for inclusion in
        'the count can be excluded.
        'Adapted from code by macropod on WOPR.com by Chalmers Davis
        '20030820
    
    
        Selection.HomeKey Unit:=wdStory
        WordNum = 0
        ttlwds = ActiveDocument.Words.Count
        TotalWords = ActiveDocument.Words.Count
        Selection.Range.Case = wdTitleWord
        
        ' Control the repeat
        For Each aWord In ActiveDocument.Words
        
        SingleWord = Trim(LCase(aWord))
         ' SingleWord = Trim(aWord)
              If (Left(SingleWord, 1) = "'") And (Len(SingleWord) > 1) Then
            SingleWord = Mid(SingleWord, 2)     'trash that leading apostrophe
            If Right(SingleWord, 1) = "'" Then    'drop any trailing one, too.
              SingleWord = Left(SingleWord, Len(SingleWord) - 1)
            End If
          Else
            If SingleWord < "a" Or SingleWord > "z" Then SingleWord = ""     'Out of range?
          End If
      
          If InStr(Excludes, "[" & SingleWord & "]") Then SingleWord = "" 'On exclude list?
      
          If Len(SingleWord) > 0 Then
            Found = False
        
            For j = 1 To WordNum
              If Words(j) = SingleWord Then
                Freq(j) = Freq(j) + 1
                Found = True
                Exit For
              End If
            Next j
    
    
            If Not Found Then
              WordNum = WordNum + 1
              Words(WordNum) = SingleWord
              Freq(WordNum) = 1
            End If
        
            If WordNum > MaxWords - 1 Then
              MsgBox "The maximum array size has been exceeded. Increase maxwords.", vbExclamation
              Exit For
            End If
    
    
          End If
          ttlwds = ttlwds - 1
          StatusBar = "Remaining: " & ttlwds & "   Unique: " & WordNum
        Next aWord
    
    
        ' Now sort it into word order
        For j = 1 To WordNum - 1
          k = j
      
          For l = j + 1 To WordNum
            If (Not ByFreq And Words(l) < Words(k)) Or (ByFreq And Freq(l) > Freq(k)) Then k = l
          Next l
    
    
          If k <> j Then
            tword = Words(j)
            Words(j) = Words(k)
            Words(k) = tword
            Temp = Freq(j)
            Freq(j) = Freq(k)
            Freq(k) = Temp
          End If
    
    
          StatusBar = "Sorting: " & WordNum - j
    
    
        Next j
    
    
        ' Close document without saving it
        oDoc.Close SaveChanges:=wdDoNotSaveChanges
    
    
        ' Now write out the results
        Set oTempDoc = Documents.Add ' write results to this document
        Selection.ParagraphFormat.TabStops.ClearAll
    
    
        With Selection
          For j = 1 To WordNum
            .TypeText Text:=Words(j) & vbTab & Trim(Str(Freq(j))) & vbCrLf
          Next j
        End With
    
    
        oTempDoc.Range.Select
        Selection.ConvertToTable
        Selection.Collapse wdCollapseStart
    
    
        With oTempDoc.Tables(1)
          .Rows.Add BeforeRow:=Selection.Rows(1)
    
    
          'Insert filename in first row
         .Cell(1, 1).Range.InsertBefore "Title"
         .Cell(1, 2).Range.InsertBefore thisDoc
         .Rows.Add
    
    
         .Range.ParagraphFormat.Alignment = wdAlignParagraphLeft
    
    
         .Rows.Add
         .Cell(ActiveDocument.Tables(1).Rows.Count, 1).Range.InsertBefore _
                                      "Total words in Document"
         .Cell(ActiveDocument.Tables(1).Rows.Count, 2).Range.InsertBefore TotalWords
         .Rows.Add
         .Cell(ActiveDocument.Tables(1).Rows.Count, 1).Range.InsertBefore _
                                        "No. of different words"
         .Cell(ActiveDocument.Tables(1).Rows.Count, 2).Range.InsertBefore _
                                        Trim(Str(WordNum))
          'Adjust column withs with AutoFit
         .Columns.AutoFit
         .Range.Copy
        End With
      
        'Run Spell Checker
        'For i = ActiveDocument.SpellingErrors.Count To 1 Step -1
        '  SendKeys "%c"
        '  ActiveDocument.SpellingErrors(i).CheckSpelling
        'Next i
        oCurDoc.Activate
        Selection.EndKey Unit:=wdStory
        Selection.Paste
        Selection.InsertBreak Type:=wdPageBreak
        oTempDoc.Close SaveChanges:=wdDoNotSaveChanges
        '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    
        strFile = Dir
      Loop
    
    
    ExitHandler:
      ' Cleaning up
      Selection.HomeKey wdStory
      System.Cursor = wdCursorNormal
      Application.ScreenUpdating = True
      Set aWord = Nothing
      Set oCurDoc = Nothing
      Set oTempDoc = Nothing
      Set oDoc = Nothing
      Exit Sub
    
    
    ErrHandler:
      MsgBox Err.Description, vbExclamation
      Resume ExitHandler
    End Sub

  2. #2
    Rather than try and debug someone else's code the following will extract all the unique words beginning with a capital letter to a new document:
    Option Explicit
    
    Sub FindWords()
    Dim oRng As Range
    Dim Coll As New Collection
    Dim Arr() As Variant
    Dim oDoc As Document
    Dim i As Long
        Set oRng = ActiveDocument.Range
        With oRng.Find
            Do While .Execute(FindText:="<[A-Z][a-z]{1,}>", MatchWildcards:=True)
                Coll.Add oRng.Text
                oRng.Collapse 0
            Loop
        End With
        Arr = toArray(Coll)
        QuickSort Arr
        Set oDoc = Documents.Add
        For i = LBound(Arr) To UBound(Arr)
            Select Case i
                Case LBound(Arr)
                    oDoc.Range.InsertAfter Arr(i)
                    If i < UBound(Arr) Then oDoc.Range.InsertAfter vbCr
                Case Else
                    If Not Arr(i) = Arr(i - 1) Then
                        oDoc.Range.InsertAfter Arr(i)
                        If i < UBound(Arr) Then oDoc.Range.InsertAfter vbCr
                    End If
            End Select
        Next i
    lbl_Exit:
        Exit Sub
    End Sub
    Private Function toArray(ByVal Coll As Collection) As Variant
    Dim Arr() As Variant
    Dim i As Long
        ReDim Arr(1 To Coll.Count) As Variant
        For i = 1 To Coll.Count
            Arr(i) = Coll(i)
        Next
        toArray = Arr
    lbl_Exit:
        Exit Function
    End Function
    
    Private Sub QuickSort(vArray As Variant, _
                          Optional lng_Low As Long, _
                          Optional lng_High As Long)
    Dim vPivot As Variant
    Dim vTmp_Swap As Variant
    Dim tmp_Low As Long
    Dim tmp_High As Long
    
        If lng_High = 0 Then
            lng_Low = LBound(vArray)
            lng_High = UBound(vArray)
        End If
    
        tmp_Low = lng_Low
        tmp_High = lng_High
        vPivot = vArray((lng_Low + lng_High) \ 2)
        While (tmp_Low <= tmp_High)
            While (vArray(tmp_Low) < vPivot And tmp_Low < lng_High)
                tmp_Low = tmp_Low + 1
            Wend
            While (vPivot < vArray(tmp_High) And tmp_High > lng_Low)
                tmp_High = tmp_High - 1
            Wend
            If (tmp_Low <= tmp_High) Then
                vTmp_Swap = vArray(tmp_Low)
                vArray(tmp_Low) = vArray(tmp_High)
                vArray(tmp_High) = vTmp_Swap
                tmp_Low = tmp_Low + 1
                tmp_High = tmp_High - 1
            End If
        Wend
        If (lng_Low < tmp_High) Then QuickSort vArray, lng_Low, tmp_High
        If (tmp_Low < lng_High) Then QuickSort vArray, tmp_Low, lng_High
    lbl_Exit:
        Exit Sub
    End Sub
    Graham Mayor - MS MVP (Word) 2002-2019
    Visit my web site for more programming tips and ready made processes
    http://www.gmayor.com

  3. #3
    VBAX Newbie
    Joined
    Jan 2016
    Posts
    3
    Location
    Hi gmayor!


    You saved my day! VBA is not my strength, so I tried to adjust/debug with my minor competence.


    If you wrote this script today - am I really impressed.


    Thank you very much!

  4. #4
    The two processes are standard processes for converting a collection to an array and sorting an array copied from my collection of functions. The rest is just a simple range search adding what you find that matches to a collection and then writing the unique values from the sorted array to a new document. Glad it did the job for you.
    Graham Mayor - MS MVP (Word) 2002-2019
    Visit my web site for more programming tips and ready made processes
    http://www.gmayor.com

  5. #5
    VBAX Newbie
    Joined
    Jan 2016
    Posts
    3
    Location
    Is there a way to expand
    <[A-Z][a-z]{1,}>
    to look for one, two and up to three words in same sentence?

    I've tried
    [!. ]<[A-Z][a-z]{1;}>?<[A-Z][a-z]{1;}>?<[A-Z][a-z]{1;}>
    - but this will return words before "." and paragraphs.

    e.g.

    Lorem Ipsum dolor sit amet, consectetur adipiscing elit. Mauris lobortis, Quam At Dignissim mollis, mi urna convallis tellus, ac malesuada lacus metus ut lacus. Nullam Quis ipsum vel diam maximus ultricies et gravida dolor.
    ..should give;
    Lorem Ipsum
    Mauris
    Quam At Dignissim
    Nullam Quis

    or should I use another vba-syntax?

    -r

  6. #6
    The following should do the job

    Sub FindWords()
    Dim oRng As Range, oFound As Range
    Dim Coll As New Collection
    Dim Arr() As Variant
    Dim oDoc As Document
    Dim i As Long
        Set oRng = ActiveDocument.Range
        With oRng.Find
            Do While .Execute(FindText:="<[A-Z][a-z]{1,}>", MatchWildcards:=True)
                If oRng.Words(1).Next.Characters(1).Case = wdUpperCase Then
                    oRng.End = oRng.Words(1).Next.End
                    oRng.MoveEndWhile "abcdefghijklmnopqrstuvwxyz"
                    If oRng.Words(2).Next.Characters(1).Case = wdUpperCase Then
                        oRng.End = oRng.Words(2).Next.End
                        oRng.MoveEndWhile "abcdefghijklmnopqrstuvwxyz"
                        If oRng.Words(3).Next.Characters(1).Case = wdUpperCase Then
                            oRng.End = oRng.Words(3).Next.End
                            oRng.MoveEndWhile "abcdefghijklmnopqrstuvwxyz"
                            If oRng.Words(4).Next.Characters(1).Case = wdUpperCase Then
                                oRng.End = oRng.Words(4).Next.End
                                oRng.MoveEndWhile "abcdefghijklmnopqrstuvwxyz"
                            End If
                        End If
                    End If
                End If
                'MsgBox oRng.Text
                Coll.Add oRng.Text
                oRng.Collapse 0
            Loop
        End With
        Arr = toArray(Coll)
        QuickSort Arr
        Set oDoc = Documents.Add
        For i = LBound(Arr) To UBound(Arr)
            Select Case i
                Case LBound(Arr)
                    oDoc.Range.InsertAfter Arr(i)
                    If i < UBound(Arr) Then oDoc.Range.InsertAfter vbCr
                Case Else
                    If Not Arr(i) = Arr(i - 1) Then
                        oDoc.Range.InsertAfter Arr(i)
                        If i < UBound(Arr) Then oDoc.Range.InsertAfter vbCr
                    End If
            End Select
        Next i
    lbl_Exit:
        Exit Sub
    End Sub
    The rest of the code is the same. If there are more than 5 consecutive words with initial caps in the string you will have to modify the code to accommodate them.
    Graham Mayor - MS MVP (Word) 2002-2019
    Visit my web site for more programming tips and ready made processes
    http://www.gmayor.com

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •