Consulting

Results 1 to 8 of 8

Thread: Extracting data in Word textboxes to Excel

  1. #1
    VBAX Newbie
    Joined
    Jul 2018
    Posts
    4
    Location

    Extracting data in Word textboxes to Excel

    Hello,

    I have a large amount of word documents (1000+) that have data stored in textboxes. I need to extract this data to individual cells in excel. Ideally, each word document would be recorded with a hyperlink to its path and a line for each data entry. I have found the following code written by Macropod that does pretty much exactly what I need, but it pulls from tables. See below

    Sub GetTableData()'Note: this code requires a reference to the Word object model.
    'See under the VBE's Tools|References.
    'Application.ScreenUpdating = False
    Dim wdApp As New Word.Application, wdDoc As Word.Document, wdTbl As Word.Table
    Dim strFolder As String, strFile As String, WkBk As Workbook, WkSht As Worksheet, r As Long
    strFolder = GetFolder: If strFolder = "" Then GoTo ErrExit
    Set WkBk = ActiveWorkbook
    'Disable any auto macros in the documents being processed
    wdApp.WordBasic.DisableAutoMacros
    strFile = Dir(strFolder & "\*.doc", vbNormal)
    While strFile <> ""
      Set wdDoc = wdApp.Documents.Open(Filename:=strFolder & "\" & strFile, AddToRecentFiles:=False, Visible:=False)
      Set WkSht = WkBk.Sheets.Add
      WkSht.Name = Split(strFile, ".doc")(0)
      With wdDoc
        For Each wdTbl In .Tables
          With wdTbl.Range.Find
            .ClearFormatting
            .Replacement.ClearFormatting
            .Text = "[^13^l]"
            .Replacement.Text = "¶"
            .Forward = True
            .Wrap = wdFindStop
            .Format = False
            .MatchWildcards = True
            .Execute Replace:=wdReplaceAll
          End With
          r = WkSht.Cells(WkSht.Rows.Count, 1).End(xlUp).Row
          If r > 1 Then r = r + 2
          wdTbl.Range.Copy
          WkSht.Paste Destination:=WkSht.Range("A" & r)
        Next
        WkSht.UsedRange.Replace What:="¶", Replacement:=Chr(10), LookAt:=xlPart, SearchOrder:=xlByRows
        .Close SaveChanges:=False
      End With
      strFile = Dir()
    Wend
    ErrExit:
    wdApp.Quit
    Set wdDoc = Nothing: Set wdApp = Nothing: Set WkSht = Nothing: Set WkBk = Nothing
    Application.ScreenUpdating = True
    End Sub
     
    Function GetFolder() As String
        Dim oFolder As Object
        GetFolder = ""
        Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
        If (Not oFolder Is Nothing) Then GetFolder = oFolder.Items.Item.Path
        Set oFolder = Nothing
    End Function
    Cheers,
    Mac

  2. #2
    Knowledge Base Approver VBAX Guru macropod's Avatar
    Joined
    Jul 2008
    Posts
    4,435
    Location
    Try:
    Sub GetTextBoxData()
    'Note: this code requires a reference to the Word object model.
    'See under the VBE's Tools|References.
    Application.ScreenUpdating = False
    Dim wdApp As New Word.Application, wdDoc As Word.Document, wdShp As Word.Shape
    Dim strFolder As String, strFile As String, WkBk As Workbook, WkSht As Worksheet, r As Long
    strFolder = GetFolder: If strFolder = "" Then GoTo ErrExit
    Set WkBk = ActiveWorkbook
    'Disable any auto macros in the documents being processed
    wdApp.WordBasic.DisableAutoMacros
    strFile = Dir(strFolder & "\*.doc", vbNormal)
    While strFile <> ""
      Set wdDoc = wdApp.Documents.Open(Filename:=strFolder & "\" & strFile, AddToRecentFiles:=False, Visible:=False)
      Set WkSht = WkBk.Sheets.Add
      WkSht.Name = Split(strFile, ".doc")(0)
      With wdDoc
        For Each wdShp In .Shapes
          With wdShp
            If .Type = msoTextBox Then
              With .TextFrame.TextRange.Find
                .ClearFormatting
                .Replacement.ClearFormatting
                .Text = "[^13^l]"
                .Replacement.Text = "¶"
                .Forward = True
                .Wrap = wdFindStop
                .Format = False
                .MatchWildcards = True
                .Execute Replace:=wdReplaceAll
              End With
              r = WkSht.Cells(WkSht.Rows.Count, 1).End(xlUp).Row
              If r > 1 Then r = r + 2
              .TextFrame.TextRange.Copy
              WkSht.Paste Destination:=WkSht.Range("A" & r)
            End If
          End With
        Next
        WkSht.UsedRange.Replace What:="¶", Replacement:=Chr(10), LookAt:=xlPart, SearchOrder:=xlByRows
        .Close SaveChanges:=False
      End With
      strFile = Dir()
    Wend
    ErrExit:
    wdApp.Quit
    Set wdDoc = Nothing: Set wdApp = Nothing: Set WkSht = Nothing: Set WkBk = Nothing
    Application.ScreenUpdating = True
    End Sub
     
    Function GetFolder() As String
        Dim oFolder As Object
        GetFolder = ""
        Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
        If (Not oFolder Is Nothing) Then GetFolder = oFolder.Items.Item.Path
        Set oFolder = Nothing
    End Function
    Cheers
    Paul Edstein
    [Fmr MS MVP - Word]

  3. #3
    Knowledge Base Approver VBAX Guru macropod's Avatar
    Joined
    Jul 2008
    Posts
    4,435
    Location
    ​Cross-posted at: https://www.mrexcel.com/forum/excel-...xes-excel.html
    Please read VBA Express' policy on Cross-Posting in item 3 of the rules: http://www.vbaexpress.com/forum/faq...._new_faq_item3
    Cheers
    Paul Edstein
    [Fmr MS MVP - Word]

  4. #4
    VBAX Newbie
    Joined
    Jul 2018
    Posts
    4
    Location
    Thank you Macropod. My apologies for the cross-posting. The code definitely runs smoother now, but the only shapes it is pulling are the pictures in the report. If it could pull the picture with the associated textbox, that would be amazing. However, it is really only the textbox's text that is important.
    See attached example report

    example.doc
    Last edited by macropod; 07-09-2018 at 03:36 PM.

  5. #5
    Knowledge Base Approver VBAX Guru macropod's Avatar
    Joined
    Jul 2008
    Posts
    4,435
    Location
    Try changing:
    If r > 1 Then r = r + 2 .TextFrame.TextRange.Copy WkSht.Paste Destination:=WkSht.Range("A" & r)
    to:
              r = r + 1
              WkSht.Range("A" & r).Value = .TextFrame.TextRange.Text
    Cheers
    Paul Edstein
    [Fmr MS MVP - Word]

  6. #6
    VBAX Newbie
    Joined
    Jul 2018
    Posts
    4
    Location
    Thanks Paul, much appreciated. Works great.

    I never realized this but the first page of each report is actually a Table. I added more code (found another one of your posts on a different website) to deal with this table and it is working great so far. Where I am having a particular problem is with the GetFolder Function. I would like this Function to deal with a parent folder that has multiple child folders. I would like it to cycle though each of the child folders. For example:

    Parent folder
    • Child folder 1
    • Child folder 2
    • Child folder 3
    • Child folder ....


    Where the word docs would be in the Child folders. If this should be posted as a new thread, please let me know.

    Here's the code I have so far

    Sub GetTextBoxData()'Note: this code requires a reference to the Word object model.
    'See under the VBE's Tools|References.
    Application.ScreenUpdating = False
    Dim wdApp As New Word.Application, wdDoc As Word.Document, wdShp As Word.Shape, wdTbl As Word.Table
    Dim strFolder As String, strFile As String, wkbk As Workbook, wksht As Worksheet, r As Long, t As Long
    strFolder = GetFolder: If strFolder = "" Then GoTo ErrExit
    Set wkbk = ActiveWorkbook
    'Disable any auto macros in the documents being processed
    wdApp.WordBasic.DisableAutoMacros
    strFile = Dir(strFolder & "\*.doc", vbNormal)
    While strFile <> ""
      Set wdDoc = wdApp.Documents.Open(Filename:=strFolder & "\" & strFile, ReadOnly:=True, AddToRecentFiles:=False, Visible:=False) 'ReadOnly=True if document is opened by someone else and to prevent any changes
      Set wksht = wkbk.Sheets.Add
        wksht.Name = Left(Split(strFile, ".doc")(0), 31) 'Excel will not allow sheet names to be >31 characters
        With wdDoc
        For Each wdTbl In .Tables
          Select Case 1
            Case 1
              With wdTbl.Range.Find
                .ClearFormatting
                .Replacement.ClearFormatting
                .Text = "[^13^l]"
                .Replacement.Text = "¶"
                .Forward = True
                .Wrap = wdFindStop
                .Format = False
                .MatchWildcards = True
                .Execute Replace:=wdReplaceAll
              End With
              r = wksht.Cells(wksht.Rows.count, 1).End(xlUp).row
              If r > 1 Then r = r + 2
              wdTbl.Range.Copy
              wksht.Paste Destination:=wksht.Range("A" & r)
          Exit For
          End Select
        Next
        For Each wdShp In .Shapes
          With wdShp
            If .Type = msoTextBox Then
              With .TextFrame.TextRange.Find
                .ClearFormatting
                .Replacement.ClearFormatting
                .Text = "[^13^l]"
                .Replacement.Text = "¶"
                .Forward = True
                .Wrap = wdFindStop
                .Format = False
                .MatchWildcards = True
                .Execute Replace:=wdReplaceAll
              End With
              r = wksht.Cells(wksht.Rows.count, 1).End(xlUp).row
              r = r + 1
              wksht.Range("A" & r).Value = .TextFrame.TextRange.Text
            End If
          End With
        Next
        wksht.UsedRange.Replace What:="¶", Replacement:=Chr(10), LookAt:=xlPart, SearchOrder:=xlByRows
        .Close SaveChanges:=False
      End With
      strFile = Dir()
    Wend
    ErrExit:
    wdApp.Quit
    Set wdDoc = Nothing: Set wdApp = Nothing: Set wksht = Nothing: Set wkbk = Nothing
    Application.ScreenUpdating = True
    End Sub
     
    Function GetFolder() As String
        Dim oFolder As Object
        GetFolder = ""
        Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
        If (Not oFolder Is Nothing) Then GetFolder = oFolder.Items.Item.Path
        Set oFolder = Nothing
    End Function
    Cheers,
    Mac

  7. #7
    Knowledge Base Approver VBAX Guru macropod's Avatar
    Joined
    Jul 2008
    Posts
    4,435
    Location
    It would have been helpful if you had mentioned the child folder at the outset, as that requires a significant re-write. For the required changes, see: http://www.msofficeforums.com/word-v...html#post47785
    Cheers
    Paul Edstein
    [Fmr MS MVP - Word]

  8. #8
    VBAX Newbie
    Joined
    Jul 2018
    Posts
    4
    Location
    That does the trick! Thanks again for all the help Paul

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •