Consulting

Results 1 to 19 of 19

Thread: Extracting "Shall" Statements with Associated Paragraph Number

  1. #1

    Extracting "Shall" Statements with Associated Paragraph Number

    I found a helpful script on this site that was created by lucas. It extracts shall statements from a word doc and places them in an .xlsx spreadsheet. I would also like for the script to pull the associated paragraph number with the "shall" statement and place all of that in the xlsx spreadsheet...paragraph number in first column and shall statement in the next column.

    I am a complete novice, so I have no idea how to make that happen.

    Here is the script by lucas:

    ' Put this code in a module:

    Option Explicit

    Sub FindWordCopySentence()
    Dim appExcel As Object
    Dim objSheet As Object
    Dim aRange As Range
    Dim intRowCount As Integer
    intRowCount = 1
    Set aRange = ActiveDocument.Range
    With aRange.Find
    Do
    .Text = "shall" ' the word I am looking for
    .Execute
    If .Found Then
    aRange.Expand Unit:=wdSentence
    aRange.Copy
    aRange.Collapse wdCollapseEnd
    If objSheet Is Nothing Then
    Set appExcel = CreateObject("Excel.Application")
    'Change the file path to match the location of your test.xlsx
    Set objSheet = appExcel.workbooks.Open("C:\temp\test.xlsx").Sheets("Sheet1")
    intRowCount = 1
    End If
    objSheet.Cells(intRowCount, 1).Select
    objSheet.Paste
    intRowCount = intRowCount + 1
    End If
    Loop While .Found
    End With
    If Not objSheet Is Nothing Then
    appExcel.workbooks(1).Close True
    appExcel.Quit
    Set objSheet = Nothing
    Set appExcel = Nothing
    End If
    Set aRange = Nothing
    End Sub

  2. #2
    If you want a related paragraph count you are going to have to process the paragraphs separately and count them e.g. as follows. Inevitably this will take longer to process.

    Option Explicit
    
    Sub FindWordCopySentence()
    Dim appExcel As Object
    Dim objSheet As Object
    Dim aRange As Range
    Dim intRowCount As Integer
    Dim intParaCount As Integer
    
        If objSheet Is Nothing Then
            Set appExcel = CreateObject("Excel.Application")
            'Change the file path to match the location of your test.xlsx
            Set objSheet = appExcel.workbooks.Open("C:\Test\test.xlsx").Sheets("Sheet1")
            intRowCount = 1
            objSheet.Cells(intRowCount, 1) = "Paragraph Number"
            objSheet.Cells(intRowCount, 2) = "Sentence Text"
            intRowCount = intRowCount + 1
        End If
        
        For intParaCount = 1 To ActiveDocument.Paragraphs.Count
            Set aRange = ActiveDocument.Paragraphs(intParaCount).Range
            With aRange.Find
                Do
                    .Text = "shall"    ' the word I am looking for
                    .Execute
                    If .Found Then
                        aRange.Expand Unit:=wdSentence
                        aRange.Copy
                        aRange.Collapse wdCollapseEnd
                        objSheet.Cells(intRowCount, 1) = intParaCount
                        objSheet.Cells(intRowCount, 2).Select
                        objSheet.Paste
                        intRowCount = intRowCount + 1
                    End If
                Loop While .Found
            End With
        Next intParaCount
        If Not objSheet Is Nothing Then
            objSheet.usedrange.Columns.AutoFit
            appExcel.workbooks(1).Close True
            appExcel.Quit
            Set objSheet = Nothing
            Set appExcel = Nothing
        End If
        Set aRange = Nothing
    End Sub
    Graham Mayor - MS MVP (Word) 2002-2019
    Visit my web site for more programming tips and ready made processes
    http://www.gmayor.com

  3. #3
    Thank you gmayor!

    I ran the script and I received the following error after it ran for about an hour: Run-time error '6': Overflow

    It stopped right after objSheet.Paste. The row below that intRowCount = intRowCount + 1 is highlighted in yellow for debug.

    As I said, I no nothing about this, so I have no idea how to debug it or what caused the error.

  4. #4
    Microsoft Word MVP 2003-2009 VBAX Guru gmaxey's Avatar
    Joined
    Sep 2005
    Posts
    3,335
    Location
    Try:

    Sub FindWordCopySentence()
        Dim appExcel As Object
        Dim objSheet As Object
        Dim aRange As Range
        Dim intRowCount As Integer
        Dim intParaCount As Integer
        Dim oRngBounds As Range
        If objSheet Is Nothing Then
            Set appExcel = CreateObject("Excel.Application")
             'Change the file path to match the location of your test.xlsx
            Set objSheet = appExcel.workbooks.Open("C:\Test\test.xlsx").Sheets("Sheet1")
            intRowCount = 1
            objSheet.Cells(intRowCount, 1) = "Paragraph Number"
            objSheet.Cells(intRowCount, 2) = "Sentence Text"
            intRowCount = intRowCount + 1
        End If
         
        For intParaCount = 1 To ActiveDocument.Paragraphs.Count
            Set aRange = ActiveDocument.Paragraphs(intParaCount).Range
            Set oRngBounds = aRange.Duplicate
            With aRange.Find
                Do
                    .Text = "shall" ' the word I am looking for
                    .Execute
                    If .Found And aRange.InRange(oRngBounds) Then
                        aRange.Expand Unit:=wdSentence
                        aRange.Copy
                        Debug.Print aRange
                        aRange.Collapse wdCollapseEnd
                        
                        objSheet.Cells(intRowCount, 1) = intParaCount
                        objSheet.Cells(intRowCount, 2).Select
                        objSheet.Paste
                        intRowCount = intRowCount + 1
                    End If
                Loop While .Found
            End With
        Next intParaCount
        If Not objSheet Is Nothing Then
            objSheet.usedrange.Columns.AutoFit
            appExcel.workbooks(1).Close True
            appExcel.Quit
            Set objSheet = Nothing
            Set appExcel = Nothing
        End If
        Set aRange = Nothing
    End Sub
    Greg

    Visit my website: http://gregmaxey.com

  5. #5
    VBAX Sage
    Joined
    Apr 2007
    Location
    United States
    Posts
    8,724
    Location
    I'd change to Long's if you're getting an overflow just to be sure

    Dim intRowCount As Long 
     Dim intParaCount As Long
    ---------------------------------------------------------------------------------------------------------------------

    Paul


    Remember: Tell us WHAT you want to do, not HOW you think you want to do it

    1. Use [CODE] ....[/CODE ] Tags for readability
    [CODE]PasteYourCodeHere[/CODE ] -- (or paste your code, select it, click [#] button)
    2. Upload an example
    Go Advanced / Attachments - Manage Attachments / Add Files / Select Files / Select the file(s) / Upload Files / Done
    3. Mark the thread as [Solved] when you have an answer
    Thread Tools (on the top right corner, above the first message)
    4. Read the Forum FAQ, especially the part about cross-posting in other forums
    http://www.vbaexpress.com/forum/faq...._new_faq_item3

  6. #6
    Microsoft Word MVP 2003-2009 VBAX Guru gmaxey's Avatar
    Joined
    Sep 2005
    Posts
    3,335
    Location
    Paul,

    Good point. I don't know how large the OP's document is, but running an hour seemed excessive in any case and I have assumed the issue could be caused by a run-a-way range.
    Greg

    Visit my website: http://gregmaxey.com

  7. #7
    VBAX Regular
    Joined
    Mar 2018
    Location
    Leesburg
    Posts
    68
    Location
    Hello, I know this is an old post (which, I think should have been flagged as solved because the solution provided works great!). I am wondering how the output could cite the page number in addition to or instead of paragraphs? Any ideas? For long documents, the page number would be more useful.

  8. #8
    Microsoft Word MVP 2003-2009 VBAX Guru gmaxey's Avatar
    Joined
    Sep 2005
    Posts
    3,335
    Location
    Something like this:

    Sub FindWordCopySentence()
        Dim appExcel As Object
        Dim objSheet As Object
        Dim aRange As Range
        Dim intRowCount As Integer
        Dim intParaCount As Integer
        Dim oRngBounds As Range
        If objSheet Is Nothing Then
        Dim lngPage As Long
            Set appExcel = CreateObject("Excel.Application")
             'Change the file path to match the location of your test.xlsx
            Set objSheet = appExcel.workbooks.Open("D:\test.xlsx").Sheets("Sheet1")
            intRowCount = 1
            objSheet.Cells(intRowCount, 1) = "Paragraph Number"
            objSheet.Cells(intRowCount, 2) = "Sentence Text"
            intRowCount = intRowCount + 1
        End If
         
        For intParaCount = 1 To ActiveDocument.Paragraphs.Count
            Set aRange = ActiveDocument.Paragraphs(intParaCount).Range
            Set oRngBounds = aRange.Duplicate
            With aRange.Find
                Do
                    .Text = "shall" ' the word I am looking for
                    .Execute
                    If .Found And aRange.InRange(oRngBounds) Then
                        lngPage = aRange.Information(wdActiveEndPageNumber)
                        aRange.Expand Unit:=wdSentence
                        aRange.Copy
                        Debug.Print aRange
                        aRange.Collapse wdCollapseEnd
                        
                        objSheet.Cells(intRowCount, 1) = intParaCount & " - Page: " & lngPage
                        objSheet.Cells(intRowCount, 2).Select
                        objSheet.Paste
                        intRowCount = intRowCount + 1
                    End If
                Loop While .Found
            End With
        Next intParaCount
        If Not objSheet Is Nothing Then
            objSheet.usedrange.Columns.AutoFit
            appExcel.workbooks(1).Close True
            appExcel.Quit
            Set objSheet = Nothing
            Set appExcel = Nothing
        End If
        Set aRange = Nothing
    End Sub
    Greg

    Visit my website: http://gregmaxey.com

  9. #9
    VBAX Regular
    Joined
    Mar 2018
    Location
    Leesburg
    Posts
    68
    Location
    Greg! This works perfectly! Thank you so very much for revisiting this old post for me. I greatly appreciate it. What is especially valuable to me is that I can see what changes you made to the original post, giving me a chance to learn a little. Thank you!
    Since I did not post this originally, proper protocol would be that I should NOT flag this as solved, right? (I am still learning how the forum works.)

  10. #10
    Microsoft Word MVP 2003-2009 VBAX Guru gmaxey's Avatar
    Joined
    Sep 2005
    Posts
    3,335
    Location
    I'm not a moderator (or if I am then I've forgotten about it). I think that as the code as published worked and then it works as modified for you then marking the thread solved (if you even have that option) wouldn't be a capital offense.
    Greg

    Visit my website: http://gregmaxey.com

  11. #11
    VBAX Regular
    Joined
    Mar 2018
    Location
    Leesburg
    Posts
    68
    Location
    Thanks, I just tried and it doesn't give me that option, so it must be set up to only allow the person who started the thread (or is a moderator) mark it as solved. Thanks for all your help. This is perfect for my needs!

  12. #12
    VBAX Regular
    Joined
    Mar 2018
    Location
    Leesburg
    Posts
    68
    Location
    Greg, it's me again. I am hoping I can impose upon your goodwill one more time...
    I took some code from another one of your excellent posts on this form - it won't let me post the full URL for some reason but the subject is "getting-the-paragraph-heading-of-current-location&highlight=find+heading+for+text)." I tried to incorporate it into the above code to try to add a 3rd column to the extracted spreadsheet, this one including the header under which the shall statement has been found.

    Here is my feeble attempt at this (obviously I do not understand the range function). When I run this it locks up Word. I think it's looping endlessly but do not know how to fix it. I think I have caused overlapping ranges or something like that.

    Private Sub cmdExtractShalls_Click()
    Dim appExcel As Object
    Dim objSheet As Object
    Dim aRange As range
    Dim intRowCount As Integer
    Dim intParaCount As Integer
    Dim oRngBounds As range

    Dim oRng As range

    If objSheet Is Nothing Then

    Dim lngPage As Long
    Set appExcel = CreateObject("Excel.Application")
    'Change the file path to match the location of your test.xlsx
    Set objSheet = appExcel.workbooks.Open("C:\test\test.xlsx").Sheets("Sheet1")
    intRowCount = 1
    objSheet.Cells(intRowCount, 1) = "Page Number"
    objSheet.Cells(intRowCount, 2) = "Section Number"
    objSheet.Cells(intRowCount, 3) = "Sentence Text"
    intRowCount = intRowCount + 1
    End If

    For intParaCount = 1 To ActiveDocument.Paragraphs.Count
    Set aRange = ActiveDocument.Paragraphs(intParaCount).range
    Set oRngBounds = aRange.Duplicate
    With aRange.Find
    Do
    .Text = "shall" ' the word I am looking for
    .Execute
    If .Found And aRange.InRange(oRngBounds) Then
    lngPage = aRange.Information(wdActiveEndPageNumber)
    aRange.Expand Unit:=wdSentence
    aRange.Copy
    Debug.Print aRange
    aRange.Collapse wdCollapseEnd

    'objSheet.Cells(intRowCount, 1) = intParaCount & " - Page: " & lngPage
    objSheet.Cells(intRowCount, 1) = lngPage 'I removed the paracount b/c I only need page number
    objSheet.Cells(intRowCount, 3).Select

    objSheet.Paste
    intRowCount = intRowCount + 1
    Set oRng = Selection.range
    While oRng.ListParagraphs.Count = 0
    oRng.MoveStart Unit:=wdParagraph, Count:=-1
    oRng.Select
    Wend
    While oRng.ListParagraphs(1).range.ListFormat.ListType = 2
    oRng.MoveStart Unit:=wdParagraph, Count:=-1
    oRng.Select
    Wend
    objSheet.Cells(intRowCount, 2) = oRng.ListParagraphs(1).range.ListFormat.ListString

    End If
    Loop While .Found
    End With
    Next intParaCount
    If Not objSheet Is Nothing Then
    objSheet.usedrange.Columns.AutoFit
    appExcel.workbooks(1).Close True
    appExcel.Quit
    Set objSheet = Nothing
    Set appExcel = Nothing
    End If
    Set aRange = Nothing
    End Sub

  13. #13
    Microsoft Word MVP 2003-2009 VBAX Guru gmaxey's Avatar
    Joined
    Sep 2005
    Posts
    3,335
    Location
    What exactly are you trying to do?
    Greg

    Visit my website: http://gregmaxey.com

  14. #14
    VBAX Regular
    Joined
    Mar 2018
    Location
    Leesburg
    Posts
    68
    Location
    I'm trying to search though a document, looking for each sentence that contains the word "shall." Then have the program create a spreadsheet with 3 columns: 1) The page number that the shall statement was found on, 2) The heading number in which the shall statement resides, and 3) The sentence itself containing the shall statement. The two forum programs mentioned (both of which you authored!) each does part of this. One does Items 1 and 3, and the other does item 2. I was hoping to combine them into a single program.

  15. #15
    Microsoft Word MVP 2003-2009 VBAX Guru gmaxey's Avatar
    Joined
    Sep 2005
    Posts
    3,335
    Location
    I still don't know what you are trying to do, because listype 2 is a bulleted list, not a numbered heading:

    Private Sub cmdExtractShalls_Click()
    Dim oApp As Object
    Dim oSheet As Object
    Dim oRng As Range, oRngHeader As Range
    Dim lngRowCount As Long
    Dim lngParCount As Long
    Dim oRngBounds As Range
    Dim lngPage As Long
      If oSheet Is Nothing Then
        Set oApp = CreateObject("Excel.Application")
        'Change the file path to match the location of your test.xlsx
        Set oSheet = oApp.workbooks.Open("D:\Test.xlsx").Sheets("Sheet1")
        lngRowCount = 1
        With oSheet
          .Cells(lngRowCount, 1) = "Page Number"
          .Cells(lngRowCount, 2) = "Section Number"
          .Cells(lngRowCount, 3) = "Sentence Text"
        End With
        lngRowCount = lngRowCount + 1
     End If
     For lngParCount = 1 To ActiveDocument.Paragraphs.Count
       Set oRng = ActiveDocument.Paragraphs(lngParCount).Range
       Set oRngBounds = oRng.Duplicate
       With oRng.Find
         Do
           .Text = "shall" ' the word I am looking for
           .Execute
            If .Found And oRng.InRange(oRngBounds) Then
              lngPage = oRng.Information(wdActiveEndPageNumber)
              oRng.Expand Unit:=wdSentence
              oRng.Copy
              oRng.Collapse wdCollapseEnd
              oSheet.Cells(lngRowCount, 1) = lngPage
              oSheet.Cells(lngRowCount, 3).Select
              oSheet.Paste
              Set oRngHeader = oRng.Duplicate
              Do Until oRngHeader.Paragraphs(1).Range.ListFormat.ListType = 3 Or oRngHeader.Paragraphs(1).Range.ListFormat.ListType = 4
                oRngHeader.Move wdParagraph, -1
                If oRngHeader.Paragraphs(1).Range = ActiveDocument.Paragraphs(1).Range Then Exit Do
              Loop
              oSheet.Cells(lngRowCount, 2) = oRngHeader.ListParagraphs(1).Range.ListFormat.ListString
              lngRowCount = lngRowCount + 1
            End If
          Loop While .Found
         End With
      Next lngParCount
      If Not oSheet Is Nothing Then
        oSheet.usedrange.Columns.AutoFit
        oApp.workbooks(1).Close True
        oApp.Quit
        Set oSheet = Nothing
        Set oApp = Nothing
       End If
       Set oRng = Nothing
     End Sub
    Sub Test()
      MsgBox Selection.Paragraphs(1).Range.ListParagraphs(1).Range.ListFormat.ListType
    End Sub
    Greg

    Visit my website: http://gregmaxey.com

  16. #16
    VBAX Regular
    Joined
    Mar 2018
    Location
    Leesburg
    Posts
    68
    Location
    Greg, good catch. You are right, I don't care about bullet items, only headings. When I run the above code (your latest posting), I get a requested member of the collection does not exist on this line:
    oSheet.Cells(lngRowCount, 2) = oRngHeader.ListParagraphs(1).Range.ListFormat.ListString

  17. #17
    Microsoft Word MVP 2003-2009 VBAX Guru gmaxey's Avatar
    Joined
    Sep 2005
    Posts
    3,335
    Location
    Then your document does not have a paragraph that meets this condition:

    oRngHeader.Paragraphs(1).Range.ListFormat.ListType = 3 Or oRngHeader.Paragraphs(1).Range.ListFormat.ListType = 4

    between the word shall and the start of the document.

    It is hard to write code for a requirement you can't see and isn't fully explained.
    Greg

    Visit my website: http://gregmaxey.com

  18. #18
    VBAX Regular
    Joined
    Mar 2018
    Location
    Leesburg
    Posts
    68
    Location
    Greg, my sincere apologies. It would have been smarter of me to instead have asked the question: How can the below code (your code from the 3/13 @ 11:50 post above) be modified to add another column to the Excel spreadsheet which contains the heading number associated with the found shall statement?

    Sub FindWordCopySentence()
    Dim appExcel As Object
    Dim objSheet As Object
    Dim aRange As Range
    Dim intRowCount As Integer
    Dim intParaCount As Integer
    Dim oRngBounds As Range
    If objSheet Is Nothing Then
    Dim lngPage As Long
    Set appExcel = CreateObject("Excel.Application")
    'Change the file path to match the location of your test.xlsx
    Set objSheet = appExcel.workbooks.Open("D:\test.xlsx").Sheets("Sheet1")
    intRowCount = 1
    objSheet.Cells(intRowCount, 1) = "Paragraph Number"
    objSheet.Cells(intRowCount, 2) = "Sentence Text"
    intRowCount = intRowCount + 1
    End If

    For intParaCount = 1 To ActiveDocument.Paragraphs.Count
    Set aRange = ActiveDocument.Paragraphs(intParaCount).Range
    Set oRngBounds = aRange.Duplicate
    With aRange.Find
    Do
    .Text = "shall" ' the word I am looking for
    .Execute
    If .Found And aRange.InRange(oRngBounds) Then
    lngPage = aRange.Information(wdActiveEndPageNumber)
    aRange.Expand Unit:=wdSentence
    aRange.Copy
    Debug.Print aRange
    aRange.Collapse wdCollapseEnd

    objSheet.Cells(intRowCount, 1) = intParaCount & " - Page: " & lngPage
    objSheet.Cells(intRowCount, 2).Select
    objSheet.Paste
    intRowCount = intRowCount + 1
    End If
    Loop While .Found
    End With
    Next intParaCount
    If Not objSheet Is Nothing Then
    objSheet.usedrange.Columns.AutoFit
    appExcel.workbooks(1).Close True
    appExcel.Quit
    Set objSheet = Nothing
    Set appExcel = Nothing
    End If
    Set aRange = Nothing
    End Sub

  19. #19
    Moderator VBAX Sage SamT's Avatar
    Joined
    Oct 2006
    Location
    Near Columbia
    Posts
    7,814
    Location
    Greg,

    PM me when this is done and I will close the thread

    Sam
    I expect the student to do their homework and find all the errrors I leeve in.


    Please take the time to read the Forum FAQ

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •