Consulting

Page 1 of 2 1 2 LastLast
Results 1 to 20 of 22

Thread: Find different expressions in same text (wildcard)

  1. #1
    VBAX Regular
    Joined
    Apr 2020
    Posts
    12
    Location

    Exclamation Find different expressions in same text (wildcard)

    Hi, I would like to find expressions in a word document and extract them to a excel file.
    The text have some terms that can be useful to find the expressions between two parts.
    Like: 1. name Paul Sartre 2. id 20202-2 2.1 smoke ( x) yes ( ) no 4. ( x ) one day a week ( ) two days a week ( ) seven days a month.
    Thank you for answering this.

    I need to extract something like.

    find "name * 2. id" - result -> Paul Sartre
    find "2. id * 2.1" - result ->20202-2
    find "smoke * no 4." - result -> ( x) yes ( ) no
    find "4. ( * Thank you" - result -> ( x ) one day a week ( ) two days a week ( ) seven days a month.

    I find something useful, but I do not now how to put it in excel and how to add another expressions to search

    https://www.datanumen.com/blogs/extr...ument-another/

    Sub ExtractContentsBetweenTwoWords()
    Dim strFirstWord As String
    Dim strLastWord As String
    Dim objDoc As Document
    Dim objDocAdd As Document
    Dim objRange As Range


    ' Initialize and create a new blank document.
    Set objDoc = ActiveDocument
    Set objDocAdd = Documents.Add
    objDoc.Activate

    ' Enter the first and last words.
    strFirstWord = name
    strLastWord = 2. id


    ' Find and extract contents and insert them into the new document.
    With Selection
    .HomeKey Unit:=wdStory
    With Selection.Find
    .ClearFormatting
    .Text = strFirstWord & "*" & strLastWord
    .MatchWildcards = True
    .MatchWholeWord = True

    Do While .Execute
    Selection.MoveStart Unit:=wdCharacter, Count:=Len(strFirstWord)
    Selection.MoveEnd Unit:=wdCharacter, Count:=-Len(strLastWord)

    objDocAdd.Range.InsertAfter Selection.Range & vbNewLine
    Selection.Collapse wdCollapseEnd
    Loop
    End With
    End With
    End Sub

  2. #2
    Knowledge Base Approver VBAX Guru macropod's Avatar
    Joined
    Jul 2008
    Posts
    4,435
    Location
    It is far from apparent how your data are structured. Without a consistent structure - and knowing how the data are delineated, it's impossible to write any meaningful code.


    Can you attach a document to a post with some representative data (delete anything sensitive)? You do this via the paperclip symbol on the 'Go Advanced' tab at the bottom of this screen.
    Cheers
    Paul Edstein
    [Fmr MS MVP - Word]

  3. #3
    If the document is as you describe, with the data all in one row, the following will extract the data from it to a worksheet (which it will create if it doesn't exist).
    If the document is not as you describe, then you will find examples of how to extract data from forms and documents on my web site and a utility add-in for extracting data from forms.

    Option Explicit
    'Graham Mayor - https://www.gmayor.com - Last updated - 11 Apr 2020 
    Private Const strPath As String = "C:\DataPath\"
    Private Const strWorkbook As String = "C:\DataPath\DataWorkbookName.xlsx"
    Private Const strSheet As String = "Sheet1"
    
    Sub ExtractData()
    Dim orng As Range
    Dim strText As String
    Dim vText As Variant
    Dim i As Integer
    Dim bTrue As Boolean
        strText = "": bTrue = False
        Set orng = ActiveDocument.Range
        With orng.Find
            Do While .Execute("1. name ")
                orng.Collapse 0
                orng.MoveEndUntil "0123456789"
                strText = Trim(orng.Text) & "', '"
                Exit Do
            Loop
        End With
        Set orng = ActiveDocument.Range
        With orng.Find
            Do While .Execute("smoke ")
                orng.Collapse 0
                orng.MoveEndUntil "("
                orng.Collapse 0
                orng.MoveEndUntil ")"
                orng.End = orng.End + 1
                If InStr(1, orng.Text, "x") > 0 Then
                    strText = strText & "True" & "', '"
                    bTrue = True
                Else
                    strText = strText & "False" & "', '"
                End If
                Exit Do
            Loop
        End With
        Set orng = ActiveDocument.Range
        With orng.Find
            Do While .Execute(" no 4. ")
                orng.Collapse 0
                For i = 1 To 3
                    orng.MoveEndUntil ")"
                    orng.End = orng.End + 1
                Next i
                orng.End = orng.End + 1
                vText = Split(orng.Text, ")")
                If bTrue = False Then
                    strText = strText & "0"
                Else
                    For i = 0 To 2
                        If InStr(vText(i), "x") > 0 Then
                            Select Case i
                                Case 0: strText = strText & "1"
                                Case 1: strText = strText & "2"
                                Case 2: strText = strText & "7"
                                Case Else: strText = strText & "0"
                            End Select
                        End If
                    Next i
                End If
                Exit Do
            Loop
            WriteToXL strText
        End With
    lbl_Exit:
        Set orng = Nothing
        Exit Sub
    End Sub
    
    Sub WriteToXL(strValues As String)
    'Graham Mayor - https://www.gmayor.com - Last updated - 11 Apr 2020 
    Dim xlApp As Object
    Dim xlWB As Object
    Dim bXLStarted As Boolean
        CreateFolders strPath
        If Not FileExists(strWorkbook) Then
            On Error Resume Next
            Set xlApp = GetObject(, "Excel.Application")
            If Err <> 0 Then
                Set xlApp = CreateObject("Excel.Application")
                bXLStarted = True
            End If
            On Error GoTo 0
            'Open the workbook to input the data
            Set xlWB = xlApp.Workbooks.Add
            With xlWB.Sheets(1)
                .Range("A1") = "Name"
                .Range("B1") = "Smokes"
                .Range("C1") = "Frequency"
                .Range("A1:C1").Style = "Accent1"
                .Columns(1).ColumnWidth = 16
                .Columns(1).NumberFormat = "General"
                .Columns(2).ColumnWidth = 16
                .Columns(2).NumberFormat = "General"
                .Columns(3).ColumnWidth = 16
                .Columns(3).NumberFormat = "General"
            End With
            xlWB.SaveAs strWorkbook
            xlWB.Close 1
            If bXLStarted Then
                xlApp.Quit
                Set xlApp = Nothing
                Set xlWB = Nothing
            End If
        End If
        WriteToWorksheet strWorkbook:=strWorkbook, strRange:="Sheet1", strValues:=strValues
        DoEvents
    lbl_Exit:
        Exit Sub
    End Sub
    
    Private Function WriteToWorksheet(strWorkbook As String, _
                                      strRange As String, _
                                      strValues As String)
    Dim ConnectionString As String
    Dim strSQL As String
    Dim CN As Object
        ConnectionString = "Provider=Microsoft.ACE.OLEDB.12.0;" & _
                           "Data Source=" & strWorkbook & ";" & _
                           "Extended Properties=""Excel 12.0 Xml;HDR=YES;"";"
        strSQL = "INSERT INTO [" & strRange & "$] VALUES('" & strValues & "')"
        Set CN = CreateObject("ADODB.Connection")
        Call CN.Open(ConnectionString)
        Call CN.Execute(strSQL, , 1 Or 128)
        CN.Close
        Set CN = Nothing
    lbl_Exit:
        Exit Function
    End Function
    
    Private Sub CreateFolders(strPath As String)
    'A Graham Mayor/Greg Maxey AddIn Utility Macro
    Dim oFSO As Object
    Dim lng_PathSep As Long
    Dim lng_PS As Long
        If Right(strPath, 1) <> "\" Then strPath = strPath & "\"
        lng_PathSep = InStr(3, strPath, "\")
        If lng_PathSep = 0 Then GoTo lbl_Exit
        Set oFSO = CreateObject("Scripting.FileSystemObject")
        Do
            lng_PS = lng_PathSep
            lng_PathSep = InStr(lng_PS + 1, strPath, "\")
            If lng_PathSep = 0 Then Exit Do
            If Len(Dir(Left(strPath, lng_PathSep), vbDirectory)) = 0 Then Exit Do
        Loop
        Do Until lng_PathSep = 0
            If Not oFSO.FolderExists(Left(strPath, lng_PathSep)) Then
                oFSO.CreateFolder Left(strPath, lng_PathSep)
            End If
            lng_PS = lng_PathSep
            lng_PathSep = InStr(lng_PS + 1, strPath, "\")
        Loop
    lbl_Exit:
        Set oFSO = Nothing
        Exit Sub
    End Sub
    
    Private Function FileExists(strFullName As String) As Boolean
    'Graham Mayor
    'strFullName is the name with path of the file to check
    Dim FSO As Object
        Set FSO = CreateObject("Scripting.FileSystemObject")
        If FSO.FileExists(strFullName) Then
            FileExists = True
        Else
            FileExists = False
        End If
    lbl_Exit:
        Set FSO = Nothing
        Exit Function
    End Function
    Graham Mayor - MS MVP (Word) 2002-2019
    Visit my web site for more programming tips and ready made processes
    http://www.gmayor.com

  4. #4
    VBAX Regular
    Joined
    Apr 2020
    Posts
    12
    Location
    My document is not well structured.

    1. Name:
    2. Id:
    3. Smoke:

  5. #5
    VBAX Regular
    Joined
    Apr 2020
    Posts
    12
    Location
    Hi, Thankyou very much!
    I'm in doubt yet because "MoveEndUntil" search any caracter. But, I would like to search expressions, like ID.

    "1. name Paul Sartre 2. id 20202-2 2.1 smoke ( x) yes ( ) no 4. ( x ) one day a week ( ) two days a week ( ) seven days a month.
    Thank you for answering this."

    I need to search from 2. id until 2.1 and have the result 20202-2.

    It's a very long document...

  6. #6
    Knowledge Base Approver VBAX Guru macropod's Avatar
    Joined
    Jul 2008
    Posts
    4,435
    Location
    Quote Originally Posted by Wandem View Post
    My document is not well structured.

    1. Name:
    2. Id:
    3. Smoke:
    It would be really helpful, then, if you attached a representative sample as requested.
    Cheers
    Paul Edstein
    [Fmr MS MVP - Word]

  7. #7
    VBAX Regular
    Joined
    Apr 2020
    Posts
    12
    Location
    It's a standard form with a lot of word cells, without patterns. There are about 20 cells and different information.
    I can copy all the form and paste without format.
    I understood Graham code, but i'm newbie in VBA and need now to understand how to extract string between two expressions.

    My current difficult yet is extract the name. I don't want to use MoveEndUntil to do this, because It will not be enough to extract adress, for exemple - because if I put "Do While .Execute("address:"): orng.Collapse 0: orng.MoveEndUntil "4"" and the address has a number 4, it will be a problem.

    After paste, without format, it's similar to this. It's not the real form, but it has the same method used in the real form.

    I – PERSONAL INFORMATION:

    1. Full name:
    2. ID: 3. SOCIAL INSURANCE NUMBER:
    4. Address: 4.1.Zip code:
    5. E-mail: 6. Phone number:
    7. Graduate:
    8. Previous job: ()
    8.1. Company name:


    II – DO YOU WANT TO ANSWER PROFISSIONAL REQUESTS? ( ) YES ( ) NO


    10. Is this your first job? ( ) YES ( ) NO
    Inform the date you started working: _____ /_____ / _____

    III – INFORMATION ABOUT HEALTH

    11. Type of previus disease:
    ( ) headache;
    ( ) bellyache;
    ( ) elbow;
    ( ) another.
    11.1. Describe syntoms:

    _________________________________________________________________________
    11.2. Name of hospital:

    _________________________________________________________________________ 11.3. Period:
    Date of sytoms started
    _____ /_____ / _____
    Date of syntonms end
    _____ /_____ / _____
    Last edited by Wandem; 04-11-2020 at 04:58 PM.

  8. #8
    Knowledge Base Approver VBAX Guru macropod's Avatar
    Joined
    Jul 2008
    Posts
    4,435
    Location
    I haven't posted any code; Graham did. Your reference to cells makes the document structure even less clear. If you want help, you're going to have to attach an actual representative document to a post.
    Cheers
    Paul Edstein
    [Fmr MS MVP - Word]

  9. #9
    VBAX Regular
    Joined
    Apr 2020
    Posts
    12
    Location
    Thanks for advice! I will thanks Graham.

    Quote Originally Posted by gmayor View Post
    If the document is as you describe, with the data all in one row, the following will extract the data from it to a worksheet (which it will create if it doesn't exist).
    If the document is not as you describe, then you will find examples of how to extract data from forms and documents on my web site and a utility add-in for extracting data from forms.
    Thank you very much! It's advanced for my knowledge, but It will be very usefull to treat the information between parantheses.
    Attached Files Attached Files
    Last edited by macropod; 04-11-2020 at 05:07 PM.

  10. #10
    The document you have posted bears no relationship whatsoever to your previous request for help.

    Have you already sent out these forms? If not it would save you a lot of later time and effort if you used content controls for the data that you want to collect. You can then just read the controls into the worksheet. https://www.gmayor.com/ExtractDataFromForms.htm will help with that. I would suggest also that you run a spell check on your document.
    Graham Mayor - MS MVP (Word) 2002-2019
    Visit my web site for more programming tips and ready made processes
    http://www.gmayor.com

  11. #11
    VBAX Regular
    Joined
    Apr 2020
    Posts
    12
    Location
    Quote Originally Posted by gmayor View Post
    The document you have posted bears no relationship whatsoever to your previous request for help.

    Have you already sent out these forms? If not it would save you a lot of later time and effort if you used content controls for the data that you want to collect. You can then just read the controls into the worksheet.https://www.gmayor.com/ExtractDataFromForms.htm will help with that. I would suggest also that you run a spell check on your document.
    thank you again. Sorry about the poor English. I translated with bad accuracy from portugueses and changed some information.

    I do not have control about the form type. It’s given.

    I do not need full vba code, because I will last this month to use the code you wrote. What you wrote to me will be very useful.

    but I need to understand how to search expressions inside text, between two other expressions that are standard.

    sorry for any inconvenience.

  12. #12
    Knowledge Base Approver VBAX Guru macropod's Avatar
    Joined
    Jul 2008
    Posts
    4,435
    Location
    With a properly structured document, you don't have to search, you can extract the data explicitly. For example to extract the name & ID from a folder full of documents like your attachment:
    Sub GetData()
    'Note: this code requires a reference to the Word object model. See under the VBE's Tools|References.
    Application.ScreenUpdating = False
    Dim wdApp As New Word.Application, wdDoc As Word.Document
    Dim strFolder As String, strFile As String, WkSht As Worksheet, r As Long
    strFolder = GetFolder: If strFolder = "" Then GoTo ErrExit
    Set WkSht = ActiveSheet: r = WkSht.Cells(WkSht.Rows.Count, 1).End(xlUp).Row
    'Disable any auto macros in the documents being processed
    wdApp.WordBasic.DisableAutoMacros
    strFile = Dir(strFolder & "\*.doc", vbNormal)
    While strFile <> ""
      Set wdDoc = wdApp.Documents.Open(Filename:=strFolder & "\" & strFile, AddToRecentFiles:=False, Visible:=False)
      r = r + 1
      With wdDoc
        With .Tables(1)
          WkSht.Cells(r, 1).Value = Trim(Split(Split(.Cell(1, 1).Range.Text, ":")(1), vbCr)(0))
          WkSht.Cells(r, 2).Value = Trim(Split(Split(.Cell(2, 1).Range.Text, ":")(1), vbCr)(0))
        End With
        .Close SaveChanges:=False
      End With
      strFile = Dir()
    Wend
    ErrExit:
    wdApp.Quit
    Set wdDoc = Nothing: Set wdApp = Nothing: Set WkSht = Nothing
    Application.ScreenUpdating = True
    End Sub
     
    Function GetFolder() As String
        Dim oFolder As Object
        GetFolder = ""
        Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
        If (Not oFolder Is Nothing) Then GetFolder = oFolder.Items.Item.Path
        Set oFolder = Nothing
    End Function
    Cheers
    Paul Edstein
    [Fmr MS MVP - Word]

  13. #13
    As Paul says, starting with a well constructed form is half the battle to collating data. If your document is representative, then there is nothing to stop users from wrecking the form and thus making data recovery efforts difficult or impossible. For the future, you need to ensure that whoever is responsible for this form takes on board the need to recover data from it. Using my add-in https://www.gmayor.com/insert_content_control_addin.htm it took but a few minutes to make your document more robust and provide a platform for ease of data extraction - see attached.

    In the meantime Paul has explained how you might extract data from the table, assuming no one has altered the table.
    Attached Files Attached Files
    Graham Mayor - MS MVP (Word) 2002-2019
    Visit my web site for more programming tips and ready made processes
    http://www.gmayor.com

  14. #14
    VBAX Regular
    Joined
    Apr 2020
    Posts
    12
    Location
    People, thanks a lot for the help. But I do not have control the way form is produced and it can have updates along the time.
    I need to search strings between expressions. I cannot advance.
    I would like something like described here ( I think I am making horrible mistakes, but I need to overcome this to go ahead).

    Set orng = ActiveDocument.Range (start:=selection.end, end:=activedocument.range.end)
    With orng.Find
    Do While .Execute("1. Full name:")
    orng.Collapse 
    .Execute (“2. ID:”)
    selection.end = orng.end
    
    
    strText = Trim(orng.Text) & "', '"
    Exit Do
    Loop
    End With
    Last edited by Wandem; 04-12-2020 at 05:05 AM.

  15. #15
    Knowledge Base Approver VBAX Guru macropod's Avatar
    Joined
    Jul 2008
    Posts
    4,435
    Location
    Quote Originally Posted by Wandem View Post
    I do not have control the way form is produced and it can have updates along the time.

    You're wasting your time if consistency cannot be assured.
    Quote Originally Posted by Wandem View Post
    I need to search strings between expressions. I cannot advance.
    That simply is not possible when the data are in a table - Find cannot span cell boundaries.
    Cheers
    Paul Edstein
    [Fmr MS MVP - Word]

  16. #16
    VBAX Regular
    Joined
    Apr 2020
    Posts
    12
    Location
    Quote Originally Posted by macropod View Post

    You're wasting your time if consistency cannot be assured.
    That simply is not possible when the data are in a table - Find cannot span cell boundaries.
    But I do not need data in a table. I can copy and paste without formatting.

  17. #17
    Knowledge Base Approver VBAX Guru macropod's Avatar
    Joined
    Jul 2008
    Posts
    4,435
    Location
    Regardless, as I have already demonstrated, there is no need for Find if the data are in a table.
    Cheers
    Paul Edstein
    [Fmr MS MVP - Word]

  18. #18
    VBAX Regular
    Joined
    Apr 2020
    Posts
    12
    Location
    It's the first best choice, Macropod. But I need to discover the second best way to solve my problem. I will keep trying. These posts will be so helpful.

    Quote Originally Posted by macropod View Post
    Regardless, as I have already demonstrated, there is no need for Find if the data are in a table.

    I have just found a way to do this.

    Sub FindIt()
        Dim blnFound As Boolean
        Dim rng1 As Range
        Dim rng2 As Range
        Dim rngFound As Range
        Dim strTheText As Variant
        Dim firstWd
        Dim lastWd
        
        firstWd = Array("Full name:", "ID:", "SOCIAL INSURANCE NUMBER:", "ADDRESS:", "Zip code:", "E-mail:", "Phone number:")
        
        lastWd = Array("2. ID:", "3. SOCIAL INSURANCE NUMBER:", "4. ADDRESS:", "4.1 Zip code:", "5. E-mail", "6. Phone number", "7. Graduate")
        
     
        Application.ScreenUpdating = False
       
        
        For i = 0 To UBound(firstWd)
             Selection.HomeKey wdStory
             Selection.Find.Text = firstWd(i)
             blnFound = Selection.Find.Execute
                If blnFound Then
                    Selection.MoveRight wdWord
                    Set rng1 = Selection.Range
                    Selection.Find.Text = lastWd(i)
                    blnFound = Selection.Find.Execute
                    If blnFound Then
                        Set rng2 = Selection.Range
                        Set rngFound = ActiveDocument.Range(rng1.Start, rng2.Start)
                        strTheText = strTheText & "|" & rngFound.Text
                        
                    End If
                End If
            Next
                        
                        MsgBox strTheText
            
        'move back to beginning
        Selection.HomeKey wdStory
        Application.ScreenUpdating = True
    End Sub

    Now I need to put this in Excel with previous file that exists and is open.
    Last edited by Wandem; 04-12-2020 at 07:18 AM.

  19. #19
    VBAX Regular
    Joined
    Apr 2020
    Posts
    12
    Location
    Using the code that Graham posted, i'm trying to improve my macro, but I think I have a problem with reading a range in excel from word.

    Orng is being setting as nothing.

    I would like to know if I can search in this code for another string like "-" or "#"

    If InStr(1, orng.Text, "x") > 0 Then

    Sub imporTableDataWord()
    
    Dim wdApp As Object, wdDoc As Object
    Dim strDocName As String
    Dim orng As Word.Range
    Dim bTrue As Boolean
    On Error Resume Next
    
    
        Set wdApp = GetObject(, "Word.Application")
            If Err.Number = 429 Then
            Err.Clear
            
        Set wdApp = CreateObject("Word.Application")
            End If
        wdApp.Visible = False
        strDocName = "/Users/USER/Downloads/sample3.docx"
        If Dir(strDocName) = "" Then
        MsgBox "The File" & strDocName & vbCrLf & " was not found in the folder " & vbCrLf & "/Users/USER/Downloads/.", vbExclamation, "Document not found!"
        Exit Sub
        End If
        
        wdApp.Activate
            
        Set wdDoc = wdApp.Documents(strDocName)
        If wdDoc Is Nothing Then wdApp.Documents.Open (strDocName)
        
        wdDoc.Activate
        
        
       Set orng = ActiveDocument.Range
        
        Do
        With orng.Find
                .MatchWildcards = True
                .Execute ("([\(])*([\)])*(SIM)*([\(])")
                orng.Collapse 0
                orng.MoveStartUntil ")"
                orng.End = orng.End + 1
                If InStr(1, orng.Text, "x") > 0 Then
                    strText = strText & "True" & "', '"
                    bTrue = True
                Else
                    strText = strText & "False" & "', '"
                End If
                orng.Collapse 0
        End With
                
              If orng.Find.Found Then
    
    
                  Else
        
        Exit Do
        End If
        Loop
    
    End Sub

  20. #20
    'If wdDoc Is Nothing Then wdApp.Documents.Open (strDocName) 'this line is unnecessary in Word
        'wdDoc.Activate 'as is this
        'the following line 
        'Set orng = ActiveDocument.Range
        'should be
        Set orng = wdDoc.Range
    Graham Mayor - MS MVP (Word) 2002-2019
    Visit my web site for more programming tips and ready made processes
    http://www.gmayor.com

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •