PDA

View Full Version : [SOLVED:] Copying the tables and corresponding section header from a word using excel vba



tmathew
06-09-2019, 11:47 PM
Hi,I am trying to copy the tables and corresponding the section number and heading from word documents to an excel sheet.
I am able to copy the tables to the excel sheet. But the code is not finding the corresponding section and section header.
It will be helpful if anyone have any idea!!!

eg: My tables are always under the heading 3. So in the vba, i am trying to loop through the tables and search backward for the heading 3, which is not happening. below is my VBA code.



Public Sub exportTables()
Dim t
Dim r
Dim ID As String
Dim prefix As String
Dim xR As Integer
Dim xROld As Integer
Dim chapter As String
Dim header As String
Dim foundSomething As Boolean
Dim firstCellText As String
Dim secondCellText As String
Dim Doc_path As String
Dim docList As String
Dim Workbook As Object
Dim wordApp As Object
Dim DocApp As Object
MsgBox "Please close all the Microsoft Word Applications"
Do
On Error Resume Next
Set wordApp = GetObject(, "Word.Application")
If Not wordApp Is Nothing Then
wordApp.Quit
Set wordApp = Nothing
End If
Loop Until wordApp Is Nothing
' track Excel worksheet row number
xR = 2
Doc_path = "C:\Users\Mathew\Documents"
' get the list of all documents in the folder
docList = Dir(Doc_path & "\*.doc", vbNormal)
ThisWorkbook.Sheets("Word Data").Activate
' Loop through all Word documents in the folder to get texts in the tables
While docList <> ""
' Create a Word object.
Set wordApp = CreateObject("Word.Application")
wordApp.Visible = False ' Do not show the file.
' Create a DocApp object and open the Word file.
Set DocApp = wordApp.Documents.Open(Filename:=Doc_path & "\" & docList, AddToRecentFiles:=False, Visible:=False)
With wordApp.ActiveDocument
' Loop through all tables in active Word document
For Each t In DocApp.Tables
On Error Resume Next
t.Range.Select
' Setup search to find header and corresponding section number
t.Range.Selection.Find.ClearFormatting
t.Range.Selection.Find.Style = "Heading 3"
With t.Range.Selection.Find
.Text = ""
.Replacement.Text = ""
.Forward = False
.Wrap = wdFindContinue
.Format = True
.MatchCase = False
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
foundSomething = t.Range.Selection.Find.Execute
If foundSomething = True Then
'Determine the string to be used for header(by trimming page break if any)
If InStr(t.Range.Selection.Selection.Text, Chr(12)) = 1 Then
header = Mid(t.Range.Selection.Text, 2, Len(t.Range.Selection.Text) - 2)
Else
header = Mid(t.Range.Selection.Text, 1, Len(t.Range.Selection.Text) - 1)
End If
' Determine the string for chapter
chapter = "Chapter " & t.Range.Selection.ListFormat.ListString
End If
' xR tracks the current row in the Excel worksheet
xROld = xR
' Loop through rows in current table
For Each r In t.Rows
' First cell's text
firstCellText = r.Cells(1).Range.Text
' Check if row is not empty
If Not (r Is Nothing) Then
' Get text from second cell of current row
secondCellText = r.Cells(2).Range.Text
' First column contains serial number
ThisWorkbook.ActiveSheet.Cells(xR, 1) = xR - 1
' Second column contains the document section no.
ThisWorkbook.ActiveSheet.Cells(xR, 3) = chapter
' Third column contains the header name
ThisWorkbook.ActiveSheet.Cells(xR, 4) = header
' fourth column contains the firstCellText
ThisWorkbook.ActiveSheet.Cells(xR, 5) = firstCellText
' fifth column contains the secondCellText
ThisWorkbook.ActiveSheet.Cells(xR, 6) = Left(secondCellText, Len(secondCellText) - 2)
' Increment to next row in Excel worksheet
xR = xR + 1
End If
Next r
Next t
End With
' Clean up.
DocApp.Close
wordApp.Quit
Set wordApp = Nothing
Set DocApp = Nothing
docList = Dir()
Wend
ThisWorkbook.ActiveSheet.Cells.EntireColumn.AutoFit
End Sub

macropod
06-10-2019, 06:15 PM
Since you're opening the document in a hidden state, there is no active document. There is also no need to kill any existing Word sessions. Your code could do with major improvements in other areas, too. Try something along the lines of (untested). Run with Word visible for testing, though.

Public Sub ImportWordTables()
Dim xR As Long
Dim xROld As Long
Dim r As Long
Dim ID As String
Dim prefix As String
Dim chapter As String
Dim header As String
Dim firstCellText As String
Dim secondCellText As String
Dim Doc_path As String
Dim docList As String
Dim xlSht As Worksheet
Dim wdApp As Object
Dim wdDoc As Object
Dim wdRng As Object
Dim wdTbl As Object
' track Excel worksheet row number
xR = 2
Set xlSht = ThisWorkbook.Sheets("Word Data")
Doc_path = "C:\Users\Mathew\Documents"
' get the list of all documents in the folder
docList = Dir(Doc_path & "\*.doc", vbNormal)
' Create a Word object.
Set wdApp = CreateObject("Word.Application")
wdApp.Visible = False ' Do not show Word.
' Loop through all Word documents in the folder to get texts in the tables
Do While docList <> ""
Set wdDoc = wdApp.Documents.Open(Filename:=Doc_path & "\" & docList, AddToRecentFiles:=False)
With wdDoc
With .Range
With .Find
.ClearFormatting
.Text = ""
.Replacement.Text = ""
.Forward = False
.Wrap = wdFindStop
.Format = True
.Style = "Heading 3"
.MatchCase = False
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
.Execute
End With
If .Find.Found = True Then
Set wdRng = .Duplicate.GoTo(What:=-1, Name:="\HeadingLevel") ' -1 = wdGoToBookmark
header = Split(wdRng.Paragraphs.First.Range.Text, vbCr)(0)
chapter = "Chapter " & wdRng.Paragraphs.First.Range.ListFormat.ListString
' Loop through all tables in active Word document
For Each wdTbl In wdRng.Tables
' xR tracks the current row in the Excel worksheet
xROld = xR
With wdTbl
' Loop through rows in current table
For r = 1 To .Rows.Count
' First cell's text
firstCellText = Split(.Cell(r, 1).Range.Text, vbCr)(0)
' Get text from second cell of current row
secondCellText = Split(.Cell(r, 2).Range.Text, vbCr)(0)
' First column contains serial number
xlSht.Cells(xR, 1) = xR - 1
' Second column contains the document section no.
xlSht.Cells(xR, 3) = chapter
' Third column contains the header name
xlSht.Cells(xR, 4) = header
' fourth column contains the firstCellText
xlSht.Cells(xR, 5) = firstCellText
' fifth column contains the secondCellText
xlSht.Cells(xR, 6) = secondCellText
' Increment to next row in Excel worksheet
xR = xR + 1
Next
End With
End If
End If
End With
.Close False
End With
docList = Dir()
Loop
' Clean up.
wdApp.Quit
Set wdTbl = Nothing: Set wdRng = Nothing: Set wdDoc = Nothing: Set wdApp = Nothing
ThisWorkbook.ActiveSheet.Cells.EntireColumn.AutoFit
End Sub