PDA

View Full Version : [SOLVED] import contents of a textbox in a document header into excel



ammarhm
07-09-2017, 02:45 PM
I have previoulsy opened a questin (solved now) about importing the contents of multiple word documents with tables into an excel file.
I have a similar but slightly different question.
So now, if each of the documents also has a textbox in the header section of the document, inside the textbox are the following info

Given Name: ******
Surname: ***XX
DOB: 11-22-1999


How do i import these contents into an . excel file please

gmayor
07-09-2017, 09:32 PM
The following Word macro code will read the text content of any text boxes in the header of the first section and if it matches your example it will write the values to three text variables. You can do what you like with those variables. You should be able to incorporate this method into your other macro.



Sub Macro1()
Dim oheader As HeaderFooter
Dim oShp As Shape
Dim oRng As Range
Dim oPara As Paragraph
Dim oParaRng As Range
Dim sName As String, sSurname As String, sDOB As String
For Each oheader In ActiveDocument.Sections(1).Headers
If oheader.Exists Then
For Each oShp In oheader.Range.ShapeRange
If oShp.Type = msoTextBox Then
Set oRng = oShp.TextFrame.TextRange
For Each oPara In oRng.Paragraphs
Set oParaRng = oPara.Range
oParaRng.End = oParaRng.End - 1
Select Case True
Case InStr(1, oParaRng.Text, "Given Name:") > 0
sName = Trim(Split(oParaRng.Text, ":")(1))
Case InStr(1, oParaRng.Text, "Surname:") > 0
sSurname = Trim(Split(oParaRng.Text, ":")(1))
Case InStr(1, oParaRng.Text, "DOB:") > 0
sDOB = Trim(Split(oParaRng.Text, ":")(1))
End Select
Next oPara
GoTo Values
End If
Next oShp
End If
Next oheader
Values:
'do something with sName, sSurname and sDOB
MsgBox sName & vbCr & sSurname & vbCr & sDOB
lbl_Exit:
Set oRng = Nothing
Set oParaRng = Nothing
Set oPara = Nothing
Set oShp = Nothing
Set oheader = Nothing
Exit Sub
End Sub

ammarhm
07-09-2017, 11:03 PM
Thank you gmayor
Here is how I am using your suggestion now, however it is returning an error:
I simply added your macro to my previous working code and just expected a message box to come with the variables , as i mentioned there is an error





Option Explicit

Dim j As Long
Dim Wrd As Object

Sub Test()
Dim pth As Variant
Dim Arr As Variant
Dim i As Integer
ActiveSheet.Range("A2:AZ2").ClearContents
pth = """C:\Users\home\Desktop\Uploaded\*.doc*"""

'Cells.ClearContents
Range("A2:AZ10000").ClearContents
j = 1
Arr = Filter(Split(CreateObject("wscript.shell").exec("cmd /c Dir " & pth & " /b /a-d /s").stdout.readall, vbCrLf), ".")

On Error Resume Next
Set Wrd = GetObject(, "Word.Application")
If Wrd Is Nothing Then Set Wrd = CreateObject("Word.Application")

Do
DoEvents
Loop Until Not Wrd Is Nothing

On Error GoTo 0
For i = LBound(Arr) To UBound(Arr)
Call ImportWordTable(Arr(i))
Next i
Wrd.Quit
Set Wrd = Nothing
End Sub


Sub ImportWordTable(wdFileName As Variant)
Dim wdDoc As Object
Dim tableNo As Integer 'table number in Word
Dim iRow As Long 'row index in Excel
Dim iCol As Integer 'column index in Excel
Dim x As Integer
Dim resultRow As Long
Dim tableStart As Integer
Dim tableTot As Integer
Dim oheader As HeaderFooter
Dim oShp As Shape
Dim oRng As Range
Dim oPara As Paragraph
Dim oParaRng As Range
Dim sName As String, sSurname As String, sDOB As String

Dim Arr, i
On Error Resume Next
If wdFileName = False Then Exit Sub
'MsgBox (wdFileName)
Set wdDoc = GetObject(wdFileName)
'Set wdDoc = Wrd.Documents.Open(wdFileName) 'Sometimes error here
' If tableNo = 1 Then

If wdDoc.tables.Count > 0 Then


With wdDoc
tableTot = wdDoc.tables.Count

With .tables(1)
For tableStart = 1 To tableTot
i = .Rows.Count * .Columns.Count
x = 1
ReDim Arr(1 To i)
For iRow = 1 To .Rows.Count
For iCol = 1 To .Columns.Count
On Error Resume Next
Arr(x) = WorksheetFunction.Clean(.cell(iRow, iCol).Range.Text)
On Error GoTo 0
x = x + 1
Next iCol
Next iRow
Next tableStart
End With


For Each oheader In ActiveDocument.Sections(1).Headers
If oheader.Exists Then
For Each oShp In oheader.Range.ShapeRange
If oShp.Type = msoTextBox Then
Set oRng = oShp.TextFrame.TextRange
For Each oPara In oRng.Paragraphs
Set oParaRng = oPara.Range
oParaRng.End = oParaRng.End - 1
Select Case True
Case InStr(1, oParaRng.Text, "Given Name:") > 0
sName = Trim(Split(oParaRng.Text, ":")(1))
Case InStr(1, oParaRng.Text, "Surname:") > 0
sSurname = Trim(Split(oParaRng.Text, ":")(1))
Case InStr(1, oParaRng.Text, "DOB:") > 0
sDOB = Trim(Split(oParaRng.Text, ":")(1))
End Select
Next oPara
GoTo Values
End If
Next oShp
End If
Next oheader
Values:
'do something with sName, sSurname and sDOB
MsgBox sName & vbCr & sSurname & vbCr & sDOB
lbl_Exit:
Set oRng = Nothing
Set oParaRng = Nothing
Set oPara = Nothing
Set oShp = Nothing
Set oheader = Nothing








End With



wdDoc.Close
Set wdDoc = Nothing
j = j + 1
Cells(j, 1).Resize(, i).Value = Arr
End If
Set wdDoc = Nothing
End Sub








The error message is pointing to oParaRng.End = oParaRng.End - 1

Any idea on how to solve this?

gmayor
07-10-2017, 04:05 AM
If you are running the code from Excel the Word variables should be defined as Objects


Dim oheader As Object
Dim oShp As Object
Dim oRng As Object
Dim oPara As Object
Dim oParaRng As Object

and the reference to ActiveDocument should be a reference to wdDoc

For Each oheader In wdDoc.Sections(1).Headers
The rest of the code I haven't checked.

ammarhm
07-10-2017, 05:23 AM
Thank you gmayor
Even though the code is not returning an error, the variables are not being filled
Even though oheader.Exists is returning true the rest of the code is not working to pull out the data from the textbox
I dont know if it helps to see the actual document, here is a link to a similar document (though the contents in the text box are author name instead of given bane etc

https://1drv.ms/w/s!AuL_dYmx22kl9RD61BfWGIgUur8P

Appreciate your help

mdmackillop
07-10-2017, 08:24 AM
Given Name: ******
Surname: ***XX
DOB: 11-22-1999

The subjects listed above do not appear in your sample and the Textbox is in the main body, not the header. Please ensure your samples match your requirements.

ammarhm
07-10-2017, 08:51 AM
Hi
First of all apologies for the mistake, I just realized that the textbox is not in the header section
I apologise if i wasted your time
I have update the document with the right details. As you can see the textbox is in the main body

https://1drv.ms/w/s!AuL_dYmx22kl9RD61BfWGIgUur8P

Kind regards

mdmackillop
07-10-2017, 10:07 AM
Try changing

For Each oheader In ActiveDocument.Sections(1).Headers
If oheader.Exists Then
For Each oShp In oheader.Range.ShapeRange
If oShp.Type = msoTextBox Then
to

For Each oShp In wdDoc.Shapes
If oShp.Type = msoTextBox Then

ammarhm
07-10-2017, 11:12 AM
Thank you mdmackillop
I am almost there with the code, here is the final combination:









Option Explicit

Dim j As Long
Dim Wrd As Object

Sub Test()
Dim pth As Variant
Dim Arr As Variant
Dim i As Integer
ActiveSheet.Range("A2:AZ2").ClearContents
pth = """C:\Users\home\Desktop\Uploaded\*.doc*"""

'Cells.ClearContents
Range("A:AZ").ClearContents
j = 1
Arr = Filter(Split(CreateObject("wscript.shell").exec("cmd /c Dir " & pth & " /b /a-d /s").stdout.readall, vbCrLf), ".")

On Error Resume Next
Set Wrd = GetObject(, "Word.Application")
If Wrd Is Nothing Then Set Wrd = CreateObject("Word.Application")

Do
DoEvents
Loop Until Not Wrd Is Nothing

On Error GoTo 0
For i = LBound(Arr) To UBound(Arr)
Call ImportWordTable(Arr(i))
Next i
Wrd.Quit
Set Wrd = Nothing
End Sub


Sub ImportWordTable(wdFileName As Variant)
Dim wdDoc As Object
Dim tableNo As Integer 'table number in Word
Dim iRow As Long 'row index in Excel
Dim iCol As Integer 'column index in Excel
Dim x As Integer
Dim resultRow As Long
Dim tableStart As Integer
Dim tableTot As Integer
Dim sName As String, sSurname As String, sDOB As String
Dim oheader As Object
Dim oShp As Object
Dim oRng As Object
Dim oPara As Object
Dim oParaRng As Object


Dim Arr, i
On Error Resume Next
If wdFileName = False Then Exit Sub


'Set wdDoc = GetObject(wdFileName)
Set wdDoc = Wrd.Documents.Open(wdFileName) 'Sometimes error here



If wdDoc.tables.Count > 0 Then


With wdDoc


tableTot = wdDoc.tables.Count
With .tables(1)
For tableStart = 1 To tableTot
i = .Rows.Count * .Columns.Count
x = 1
ReDim Arr(1 To i)
For iRow = 1 To .Rows.Count
For iCol = 1 To .Columns.Count
On Error Resume Next
Arr(x) = WorksheetFunction.Clean(.cell(iRow, iCol).Range.Text)
On Error GoTo 0
x = x + 1
Next iCol
Next iRow
Next tableStart
End With




For Each oShp In wdDoc.Shapes
If oShp.Type = msoTextBox Then
Set oRng = oShp.TextFrame.TextRange
For Each oPara In oRng.Paragraphs
Set oParaRng = oPara.Range
oParaRng.End = oParaRng.End - 1
Select Case True
Case InStr(1, oParaRng.Text, "Given Names:") > 0
sName = Trim(Split(oParaRng.Text, ":")(1))
Case InStr(1, oParaRng.Text, "Surname:") > 0
sSurname = Trim(Split(oParaRng.Text, ":")(1))
Case InStr(1, oParaRng.Text, "DOB: ") > 0
sDOB = Trim(Split(oParaRng.Text, ":")(1))
End Select
Next oPara
GoTo Values
End If
Next oShp


Values:

'Cells(j, 2) = sName
lbl_Exit:
Set oRng = Nothing
Set oParaRng = Nothing
Set oPara = Nothing
Set oShp = Nothing


End With



wdDoc.Close
Set wdDoc = Nothing
j = j + 1
Cells(j, 1).Resize(, i).Value = Arr
'Cells(j, 2) = sName '
End If
Set wdDoc = Nothing
End Sub




my question now is:
Is there a way to take the text from the textbos and slipt it up by spaces/enter and add it to the Arr so the it will be print into the same excel row as the contect of the table in the document added in the following step


Cells(j, 1).Resize(, i).Value = Arr

so the final result would be like "Given name" "******" "Surname" "***XX" "DOB" ..... Arr
to be more specific the text in the textbox contains more than just given name, surname, dob... as an example it contains 2 dob actually, one is empty. So I want all the string contents split by spaces into excel cells before the "Arr" array content for the same document
I really appreciate your help

ammarhm
07-10-2017, 11:29 AM
Here is what i have done, it dose the job though a bit slow....








Option Explicit

Dim j As Long
Dim Wrd As Object

Sub Test()
Dim pth As Variant
Dim Arr As Variant
Dim i As Integer
ActiveSheet.Range("A2:AZ2").ClearContents
pth = """C:\Users\home\Desktop\Uploaded\*.doc*"""



'Cells.ClearContents
Range("A:AZ").ClearContents
Cells(1, 1) = "Date"
Cells(1, 2) = "TextBox"
Cells(1, 3) = "Exam"
Cells(1, 7) = "Name"
Cells(1, 9) = "MRN"
Cells(1, 11) = "Referring_Doctor"
Cells(1, 13) = "IBD_Phenotype"
Cells(1, 15) = "Indication"
Cells(1, 19) = "Technique"
Cells(1, 23) = "Results"
Cells(1, 27) = "Conclusion"
Cells(1, 31) = "Recommendation"
Cells(1, 37) = "Examiner"

j = 1
Arr = Filter(Split(CreateObject("wscript.shell").exec("cmd /c Dir " & pth & " /b /a-d /s").stdout.readall, vbCrLf), ".")

On Error Resume Next
Set Wrd = GetObject(, "Word.Application")
If Wrd Is Nothing Then Set Wrd = CreateObject("Word.Application")

Do
DoEvents
Loop Until Not Wrd Is Nothing

On Error GoTo 0
For i = LBound(Arr) To UBound(Arr)
Call ImportWordTable(Arr(i))
Next i
Wrd.Quit
Set Wrd = Nothing
End Sub


Sub ImportWordTable(wdFileName As Variant)
Dim wdDoc As Object
Dim tableNo As Integer 'table number in Word
Dim iRow As Long 'row index in Excel
Dim iCol As Integer 'column index in Excel
Dim x As Integer
Dim resultRow As Long
Dim tableStart As Integer
Dim tableTot As Integer
Dim sName As String, sSurname As String, sDOB As String
Dim oheader As Object
Dim oShp As Object
Dim oRng As Object
Dim oPara As Object
Dim oParaRng As Object
Dim utu As String



Dim Arr, i
On Error Resume Next
If wdFileName = False Then Exit Sub


'Set wdDoc = GetObject(wdFileName)
Set wdDoc = Wrd.Documents.Open(wdFileName) 'Sometimes error here



If wdDoc.tables.Count > 0 Then


With wdDoc


tableTot = wdDoc.tables.Count
With .tables(1)
For tableStart = 1 To tableTot
i = .Rows.Count * .Columns.Count
x = 1
ReDim Arr(1 To i)
For iRow = 1 To .Rows.Count
For iCol = 1 To .Columns.Count
On Error Resume Next
Arr(x) = WorksheetFunction.Clean(.cell(iRow, iCol).Range.Text)
On Error GoTo 0
x = x + 1
Next iCol
Next iRow
Next tableStart
End With




For Each oShp In wdDoc.Shapes
If oShp.Type = msoTextBox Then
Set oRng = oShp.TextFrame.TextRange
For Each oPara In oRng.Paragraphs
Set oParaRng = oPara.Range
oParaRng.End = oParaRng.End - 1
utu = utu & oParaRng.Text


Next oPara
GoTo Values
End If
Next oShp


Values:

'Cells(j, 2) = sName
lbl_Exit:
Set oRng = Nothing
Set oParaRng = Nothing
Set oPara = Nothing
Set oShp = Nothing




End With



wdDoc.Close
Set wdDoc = Nothing
j = j + 1
Cells(j, 1).Resize(, i).Value = Arr
Cells(j, 2) = utu
End If
Set wdDoc = Nothing
End Sub





any suggestions about further optimization please? I am just putting all the textbox text into one cell in excel, which is fine

mdmackillop
07-11-2017, 05:38 AM
If you only have 1 table you can omit the looping code and refer to the table directly.
If you have one textbox only try

With wdDoc.StoryRanges(5) '(wdTextFrameStory)
txt = .Text
End With

ammarhm
07-11-2017, 06:10 AM
Thank you gmayor and mdmackillop

Here is the final code which is much simplified thanks for your help. i am including it in case anyone else came to this thread and had similar question.







Option Explicit

Dim j As Long
Dim Wrd As Object

Sub Test()
Dim pth As Variant
Dim arr As Variant
Dim i As Integer
pth = """C:\Users\home\Desktop\Uploaded\*.doc*"""



j = 1
arr = Filter(Split(CreateObject("wscript.shell").exec("cmd /c Dir " & pth & " /b /a-d /s").stdout.readall, vbCrLf), ".")

On Error Resume Next
Set Wrd = GetObject(, "Word.Application")
If Wrd Is Nothing Then Set Wrd = CreateObject("Word.Application")

Do
DoEvents
Loop Until Not Wrd Is Nothing

On Error GoTo 0
For i = LBound(arr) To UBound(arr)
Call ImportWordTable(arr(i))
Next i
Wrd.Quit
Set Wrd = Nothing
End Sub


Sub ImportWordTable(wdFileName As Variant)
Dim wdDoc As Object
Dim iRow As Long
Dim iCol As Integer
Dim x As Integer



Dim arr, i
On Error Resume Next
If wdFileName = False Then Exit Sub




Set wdDoc = Wrd.Documents.Open(wdFileName)



If wdDoc.tables.Count > 0 Then


With wdDoc



With .tables(1)

i = .Rows.Count * .Columns.Count
x = 1
ReDim arr(1 To i)
For iRow = 1 To .Rows.Count
For iCol = 1 To .Columns.Count
On Error Resume Next
arr(x) = WorksheetFunction.Clean(.cell(iRow, iCol).Range.Text)
On Error GoTo 0
x = x + 1
Next iCol
Next iRow

End With



With wdDoc.StoryRanges(5)
arr(2) = .Text
End With



End With


wdDoc.Close

Set wdDoc = Nothing

Cells(j, 1).Resize(, i).Value = arr
j = j + 1
End If
Set wdDoc = Nothing
End Sub