PDA

View Full Version : Find different expressions in same text (wildcard)



Wandem
04-10-2020, 06:25 PM
Hi, I would like to find expressions in a word document and extract them to a excel file.
The text have some terms that can be useful to find the expressions between two parts.
Like: 1. name Paul Sartre 2. id 20202-2 2.1 smoke ( x) yes ( ) no 4. ( x ) one day a week ( ) two days a week ( ) seven days a month.
Thank you for answering this.

I need to extract something like.

find "name * 2. id" - result -> Paul Sartre
find "2. id * 2.1" - result ->20202-2
find "smoke * no 4." - result -> ( x) yes ( ) no
find "4. ( * Thank you" - result -> ( x ) one day a week ( ) two days a week ( ) seven days a month.

I find something useful, but I do not now how to put it in excel and how to add another expressions to search

https://www.datanumen.com/blogs/extract-contents-two-specific-words-one-word-document-another/

Sub ExtractContentsBetweenTwoWords()
Dim strFirstWord As String
Dim strLastWord As String
Dim objDoc As Document
Dim objDocAdd As Document
Dim objRange As Range


' Initialize and create a new blank document.
Set objDoc = ActiveDocument
Set objDocAdd = Documents.Add
objDoc.Activate

' Enter the first and last words.
strFirstWord = name
strLastWord = 2. id


' Find and extract contents and insert them into the new document.
With Selection
.HomeKey Unit:=wdStory
With Selection.Find
.ClearFormatting
.Text = strFirstWord & "*" & strLastWord
.MatchWildcards = True
.MatchWholeWord = True

Do While .Execute
Selection.MoveStart Unit:=wdCharacter, Count:=Len(strFirstWord)
Selection.MoveEnd Unit:=wdCharacter, Count:=-Len(strLastWord)

objDocAdd.Range.InsertAfter Selection.Range & vbNewLine
Selection.Collapse wdCollapseEnd
Loop
End With
End With
End Sub

macropod
04-10-2020, 08:45 PM
It is far from apparent how your data are structured. Without a consistent structure - and knowing how the data are delineated, it's impossible to write any meaningful code.


Can you attach a document to a post with some representative data (delete anything sensitive)? You do this via the paperclip symbol on the 'Go Advanced' tab at the bottom of this screen.

gmayor
04-10-2020, 09:35 PM
If the document is as you describe, with the data all in one row, the following will extract the data from it to a worksheet (which it will create if it doesn't exist).
If the document is not as you describe, then you will find examples of how to extract data from forms and documents on my web site and a utility add-in for extracting data from forms.


Option Explicit
'Graham Mayor - https://www.gmayor.com - Last updated - 11 Apr 2020
Private Const strPath As String = "C:\DataPath\"
Private Const strWorkbook As String = "C:\DataPath\DataWorkbookName.xlsx"
Private Const strSheet As String = "Sheet1"

Sub ExtractData()
Dim orng As Range
Dim strText As String
Dim vText As Variant
Dim i As Integer
Dim bTrue As Boolean
strText = "": bTrue = False
Set orng = ActiveDocument.Range
With orng.Find
Do While .Execute("1. name ")
orng.Collapse 0
orng.MoveEndUntil "0123456789"
strText = Trim(orng.Text) & "', '"
Exit Do
Loop
End With
Set orng = ActiveDocument.Range
With orng.Find
Do While .Execute("smoke ")
orng.Collapse 0
orng.MoveEndUntil "("
orng.Collapse 0
orng.MoveEndUntil ")"
orng.End = orng.End + 1
If InStr(1, orng.Text, "x") > 0 Then
strText = strText & "True" & "', '"
bTrue = True
Else
strText = strText & "False" & "', '"
End If
Exit Do
Loop
End With
Set orng = ActiveDocument.Range
With orng.Find
Do While .Execute(" no 4. ")
orng.Collapse 0
For i = 1 To 3
orng.MoveEndUntil ")"
orng.End = orng.End + 1
Next i
orng.End = orng.End + 1
vText = Split(orng.Text, ")")
If bTrue = False Then
strText = strText & "0"
Else
For i = 0 To 2
If InStr(vText(i), "x") > 0 Then
Select Case i
Case 0: strText = strText & "1"
Case 1: strText = strText & "2"
Case 2: strText = strText & "7"
Case Else: strText = strText & "0"
End Select
End If
Next i
End If
Exit Do
Loop
WriteToXL strText
End With
lbl_Exit:
Set orng = Nothing
Exit Sub
End Sub

Sub WriteToXL(strValues As String)
'Graham Mayor - https://www.gmayor.com - Last updated - 11 Apr 2020
Dim xlApp As Object
Dim xlWB As Object
Dim bXLStarted As Boolean
CreateFolders strPath
If Not FileExists(strWorkbook) Then
On Error Resume Next
Set xlApp = GetObject(, "Excel.Application")
If Err <> 0 Then
Set xlApp = CreateObject("Excel.Application")
bXLStarted = True
End If
On Error GoTo 0
'Open the workbook to input the data
Set xlWB = xlApp.Workbooks.Add
With xlWB.Sheets(1)
.Range("A1") = "Name"
.Range("B1") = "Smokes"
.Range("C1") = "Frequency"
.Range("A1:C1").Style = "Accent1"
.Columns(1).ColumnWidth = 16
.Columns(1).NumberFormat = "General"
.Columns(2).ColumnWidth = 16
.Columns(2).NumberFormat = "General"
.Columns(3).ColumnWidth = 16
.Columns(3).NumberFormat = "General"
End With
xlWB.SaveAs strWorkbook
xlWB.Close 1
If bXLStarted Then
xlApp.Quit
Set xlApp = Nothing
Set xlWB = Nothing
End If
End If
WriteToWorksheet strWorkbook:=strWorkbook, strRange:="Sheet1", strValues:=strValues
DoEvents
lbl_Exit:
Exit Sub
End Sub

Private Function WriteToWorksheet(strWorkbook As String, _
strRange As String, _
strValues As String)
Dim ConnectionString As String
Dim strSQL As String
Dim CN As Object
ConnectionString = "Provider=Microsoft.ACE.OLEDB.12.0;" & _
"Data Source=" & strWorkbook & ";" & _
"Extended Properties=""Excel 12.0 Xml;HDR=YES;"";"
strSQL = "INSERT INTO [" & strRange & "$] VALUES('" & strValues & "')"
Set CN = CreateObject("ADODB.Connection")
Call CN.Open(ConnectionString)
Call CN.Execute(strSQL, , 1 Or 128)
CN.Close
Set CN = Nothing
lbl_Exit:
Exit Function
End Function

Private Sub CreateFolders(strPath As String)
'A Graham Mayor/Greg Maxey AddIn Utility Macro
Dim oFSO As Object
Dim lng_PathSep As Long
Dim lng_PS As Long
If Right(strPath, 1) <> "\" Then strPath = strPath & "\"
lng_PathSep = InStr(3, strPath, "\")
If lng_PathSep = 0 Then GoTo lbl_Exit
Set oFSO = CreateObject("Scripting.FileSystemObject")
Do
lng_PS = lng_PathSep
lng_PathSep = InStr(lng_PS + 1, strPath, "\")
If lng_PathSep = 0 Then Exit Do
If Len(Dir(Left(strPath, lng_PathSep), vbDirectory)) = 0 Then Exit Do
Loop
Do Until lng_PathSep = 0
If Not oFSO.FolderExists(Left(strPath, lng_PathSep)) Then
oFSO.CreateFolder Left(strPath, lng_PathSep)
End If
lng_PS = lng_PathSep
lng_PathSep = InStr(lng_PS + 1, strPath, "\")
Loop
lbl_Exit:
Set oFSO = Nothing
Exit Sub
End Sub

Private Function FileExists(strFullName As String) As Boolean
'Graham Mayor
'strFullName is the name with path of the file to check
Dim FSO As Object
Set FSO = CreateObject("Scripting.FileSystemObject")
If FSO.FileExists(strFullName) Then
FileExists = True
Else
FileExists = False
End If
lbl_Exit:
Set FSO = Nothing
Exit Function
End Function

Wandem
04-11-2020, 01:31 PM
My document is not well structured.

1. Name:
2. Id:
3. Smoke:

Wandem
04-11-2020, 01:33 PM
Hi, Thankyou very much!
I'm in doubt yet because "MoveEndUntil" search any caracter. But, I would like to search expressions, like ID.

"1. name Paul Sartre 2. id 20202-2 2.1 smoke ( x) yes ( ) no 4. ( x ) one day a week ( ) two days a week ( ) seven days a month.
Thank you for answering this."

I need to search from 2. id until 2.1 and have the result 20202-2.

It's a very long document...

macropod
04-11-2020, 03:23 PM
My document is not well structured.

1. Name:
2. Id:
3. Smoke:
It would be really helpful, then, if you attached a representative sample as requested.

Wandem
04-11-2020, 04:40 PM
It's a standard form with a lot of word cells, without patterns. There are about 20 cells and different information.
I can copy all the form and paste without format.
I understood Graham code, but i'm newbie in VBA and need now to understand how to extract string between two expressions.

My current difficult yet is extract the name. I don't want to use MoveEndUntil to do this, because It will not be enough to extract adress, for exemple - because if I put "Do While .Execute("address:"): orng.Collapse 0: orng.MoveEndUntil "4"" and the address has a number 4, it will be a problem.

After paste, without format, it's similar to this. It's not the real form, but it has the same method used in the real form.

I – PERSONAL INFORMATION:

1. Full name:
2. ID: 3. SOCIAL INSURANCE NUMBER:
4. Address: 4.1.Zip code:
5. E-mail: 6. Phone number:
7. Graduate:
8. Previous job: ()
8.1. Company name:


II – DO YOU WANT TO ANSWER PROFISSIONAL REQUESTS? ( ) YES ( ) NO


10. Is this your first job? ( ) YES ( ) NO
Inform the date you started working: _____ /_____ / _____

III – INFORMATION ABOUT HEALTH

11. Type of previus disease:
( ) headache;
( ) bellyache;
( ) elbow;
( ) another.
11.1. Describe syntoms:

_________________________________________________________________________
11.2. Name of hospital:

_________________________________________________________________________ 11.3. Period:
Date of sytoms started
_____ /_____ / _____
Date of syntonms end
_____ /_____ / _____

macropod
04-11-2020, 04:47 PM
I haven't posted any code; Graham did. Your reference to cells makes the document structure even less clear. If you want help, you're going to have to attach an actual representative document to a post.

Wandem
04-11-2020, 04:55 PM
Thanks for advice! I will thanks Graham.


If the document is as you describe, with the data all in one row, the following will extract the data from it to a worksheet (which it will create if it doesn't exist).
If the document is not as you describe, then you will find examples of how to extract data from forms and documents on my web site and a utility add-in for extracting data from forms.

Thank you very much! It's advanced for my knowledge, but It will be very usefull to treat the information between parantheses.

gmayor
04-11-2020, 08:34 PM
The document you have posted bears no relationship whatsoever to your previous request for help. :banghead:

Have you already sent out these forms? If not it would save you a lot of later time and effort if you used content controls for the data that you want to collect. You can then just read the controls into the worksheet. https://www.gmayor.com/ExtractDataFromForms.htm (http://www.gmayor.com/ExtractDataFromForms.htm) will help with that. I would suggest also that you run a spell check on your document.

Wandem
04-11-2020, 08:52 PM
The document you have posted bears no relationship whatsoever to your previous request for help. :banghead:

Have you already sent out these forms? If not it would save you a lot of later time and effort if you used content controls for the data that you want to collect. You can then just read the controls into the worksheet.https://www.gmayor.com/ExtractDataFromForms.htm will help with that. I would suggest also that you run a spell check on your document.

thank you again. Sorry about the poor English. I translated with bad accuracy from portugueses and changed some information.

I do not have control about the form type. It’s given.

I do not need full vba code, because I will last this month to use the code you wrote. What you wrote to me will be very useful.

but I need to understand how to search expressions inside text, between two other expressions that are standard.

sorry for any inconvenience.

macropod
04-11-2020, 10:34 PM
With a properly structured document, you don't have to search, you can extract the data explicitly. For example to extract the name & ID from a folder full of documents like your attachment:

Sub GetData()
'Note: this code requires a reference to the Word object model. See under the VBE's Tools|References.
Application.ScreenUpdating = False
Dim wdApp As New Word.Application, wdDoc As Word.Document
Dim strFolder As String, strFile As String, WkSht As Worksheet, r As Long
strFolder = GetFolder: If strFolder = "" Then GoTo ErrExit
Set WkSht = ActiveSheet: r = WkSht.Cells(WkSht.Rows.Count, 1).End(xlUp).Row
'Disable any auto macros in the documents being processed
wdApp.WordBasic.DisableAutoMacros
strFile = Dir(strFolder & "\*.doc", vbNormal)
While strFile <> ""
Set wdDoc = wdApp.Documents.Open(Filename:=strFolder & "\" & strFile, AddToRecentFiles:=False, Visible:=False)
r = r + 1
With wdDoc
With .Tables(1)
WkSht.Cells(r, 1).Value = Trim(Split(Split(.Cell(1, 1).Range.Text, ":")(1), vbCr)(0))
WkSht.Cells(r, 2).Value = Trim(Split(Split(.Cell(2, 1).Range.Text, ":")(1), vbCr)(0))
End With
.Close SaveChanges:=False
End With
strFile = Dir()
Wend
ErrExit:
wdApp.Quit
Set wdDoc = Nothing: Set wdApp = Nothing: Set WkSht = Nothing
Application.ScreenUpdating = True
End Sub

Function GetFolder() As String
Dim oFolder As Object
GetFolder = ""
Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
If (Not oFolder Is Nothing) Then GetFolder = oFolder.Items.Item.Path
Set oFolder = Nothing
End Function

gmayor
04-11-2020, 10:57 PM
As Paul says, starting with a well constructed form is half the battle to collating data. If your document is representative, then there is nothing to stop users from wrecking the form and thus making data recovery efforts difficult or impossible. For the future, you need to ensure that whoever is responsible for this form takes on board the need to recover data from it. Using my add-in (http://www.gmayor.com/insert_content_control_addin.htm)https://www.gmayor.com/insert_content_control_addin.htm it took but a few minutes to make your document more robust and provide a platform for ease of data extraction - see attached.

In the meantime Paul has explained how you might extract data from the table, assuming no one has altered the table.

Wandem
04-12-2020, 04:19 AM
People, thanks a lot for the help. But I do not have control the way form is produced and it can have updates along the time.
I need to search strings between expressions. I cannot advance.
I would like something like described here ( I think I am making horrible mistakes, but I need to overcome this to go ahead).


Set orng = ActiveDocument.Range (start:=selection.end, end:=activedocument.range.end)
With orng.Find
Do While .Execute("1. Full name:")
orng.Collapse
.Execute (“2. ID:”)
selection.end = orng.end


strText = Trim(orng.Text) & "', '"
Exit Do
Loop
End With

macropod
04-12-2020, 04:33 AM
I do not have control the way form is produced and it can have updates along the time.
You're wasting your time if consistency cannot be assured.

I need to search strings between expressions. I cannot advance.
That simply is not possible when the data are in a table - Find cannot span cell boundaries.

Wandem
04-12-2020, 04:56 AM
You're wasting your time if consistency cannot be assured.

That simply is not possible when the data are in a table - Find cannot span cell boundaries.
But I do not need data in a table. I can copy and paste without formatting.

macropod
04-12-2020, 05:02 AM
Regardless, as I have already demonstrated, there is no need for Find if the data are in a table.

Wandem
04-12-2020, 05:06 AM
It's the first best choice, Macropod. But I need to discover the second best way to solve my problem. I will keep trying. These posts will be so helpful.


Regardless, as I have already demonstrated, there is no need for Find if the data are in a table.


I have just found a way to do this.




Sub FindIt()
Dim blnFound As Boolean
Dim rng1 As Range
Dim rng2 As Range
Dim rngFound As Range
Dim strTheText As Variant
Dim firstWd
Dim lastWd

firstWd = Array("Full name:", "ID:", "SOCIAL INSURANCE NUMBER:", "ADDRESS:", "Zip code:", "E-mail:", "Phone number:")

lastWd = Array("2. ID:", "3. SOCIAL INSURANCE NUMBER:", "4. ADDRESS:", "4.1 Zip code:", "5. E-mail", "6. Phone number", "7. Graduate")


Application.ScreenUpdating = False


For i = 0 To UBound(firstWd)
Selection.HomeKey wdStory
Selection.Find.Text = firstWd(i)
blnFound = Selection.Find.Execute
If blnFound Then
Selection.MoveRight wdWord
Set rng1 = Selection.Range
Selection.Find.Text = lastWd(i)
blnFound = Selection.Find.Execute
If blnFound Then
Set rng2 = Selection.Range
Set rngFound = ActiveDocument.Range(rng1.Start, rng2.Start)
strTheText = strTheText & "|" & rngFound.Text

End If
End If
Next

MsgBox strTheText

'move back to beginning
Selection.HomeKey wdStory
Application.ScreenUpdating = True
End Sub




Now I need to put this in Excel with previous file that exists and is open.

Wandem
04-15-2020, 06:26 PM
Using the code that Graham posted, i'm trying to improve my macro, but I think I have a problem with reading a range in excel from word.

Orng is being setting as nothing.

I would like to know if I can search in this code for another string like "-" or "#"


If InStr(1, orng.Text, "x") > 0 Then



Sub imporTableDataWord()

Dim wdApp As Object, wdDoc As Object
Dim strDocName As String
Dim orng As Word.Range
Dim bTrue As Boolean
On Error Resume Next


Set wdApp = GetObject(, "Word.Application")
If Err.Number = 429 Then
Err.Clear

Set wdApp = CreateObject("Word.Application")
End If
wdApp.Visible = False
strDocName = "/Users/USER/Downloads/sample3.docx"
If Dir(strDocName) = "" Then
MsgBox "The File" & strDocName & vbCrLf & " was not found in the folder " & vbCrLf & "/Users/USER/Downloads/.", vbExclamation, "Document not found!"
Exit Sub
End If

wdApp.Activate

Set wdDoc = wdApp.Documents(strDocName)
If wdDoc Is Nothing Then wdApp.Documents.Open (strDocName)

wdDoc.Activate


Set orng = ActiveDocument.Range

Do
With orng.Find
.MatchWildcards = True
.Execute ("([\(])*([\)])*(SIM)*([\(])")
orng.Collapse 0
orng.MoveStartUntil ")"
orng.End = orng.End + 1
If InStr(1, orng.Text, "x") > 0 Then
strText = strText & "True" & "', '"
bTrue = True
Else
strText = strText & "False" & "', '"
End If
orng.Collapse 0
End With

If orng.Find.Found Then


Else

Exit Do
End If
Loop

End Sub

gmayor
04-15-2020, 09:08 PM
'If wdDoc Is Nothing Then wdApp.Documents.Open (strDocName) 'this line is unnecessary in Word
'wdDoc.Activate 'as is this
'the following line
'Set orng = ActiveDocument.Range
'should be
Set orng = wdDoc.Range

Wandem
04-16-2020, 03:03 AM
I'm using this macro in excel, not in word. And I discovered that it is a mac problem. Because this not happen in windows.

gmayor
04-16-2020, 08:28 PM
I don't know anything about Macs, but you are running Word from Excel VBA.