Results 1 to 20 of 32

Thread: VBA code to extract images from word.

Threaded View

Previous Post Previous Post   Next Post Next Post
  1. #26
    Microsoft Word MVP 2003-2009 VBAX Guru gmaxey's Avatar
    Joined
    Sep 2005
    Posts
    3,411
    Location
    This is more complicated than I expected. While it doesn't occur in the document that you posted here, when I created and tested with a new document I discovered that when the HTML file is create that it will "typically" create two image files for each inlineshape in the active documet. This means if you have two images in the original file there will be four files in the extraction folder. I've managed to deal with a one for one count and a two for one count, but I can't be certain that there will always be two images files created for every inline shape. So, the following would fail in that situation.

    Just be sure that the caption text is in the paragraph immediately following the inlineshape. With the exception of the ":" which I've addressed in the code, don't use text that would be an invalid file name character.


    [vba]Sub ExtractImages()
    Dim strFileNameAndPath As String, strFileName As String, strName As String
    Dim strBasePath As String, strDate As String
    Dim strExtractionFolder As String, strImageName As String
    Dim bThumbnails As Boolean
    Dim lngIndex As Long
    Dim oRng As Word.Range
    Dim arrCaptions() As String
    Dim lngCount As Long

    'Store image caption names. The paragraph immediately following the shape paragraph defines the image caption.
    ReDim arrCaptions(ActiveDocument.InlineShapes.Count - 1)
    For lngIndex = 1 To ActiveDocument.InlineShapes.Count
    Set oRng = ActiveDocument.InlineShapes(lngIndex).Range
    oRng.Collapse wdCollapseEnd
    'Move range to start of caption paragraph text.
    oRng.Move wdParagraph, 1
    'Extend range to end of paragraph.
    oRng.MoveEndUntil Cset:=Chr(13), Count:=wdForward
    arrCaptions(lngIndex - 1) = oRng.Text
    Next lngIndex

    strFileNameAndPath = ActiveDocument.FullName
    'Define folder for extracted images.
    strBasePath = SpecialFolderPath & Application.PathSeparator
    strDate = Format(Now, "yyyy-mm-dd")
    strFileName = GetFileNameWithoutExtension(ActiveDocument.Name)
    strExtractionFolder = strDate & "_" & strFileName

    'Delete the folder if it exists.
    On Error Resume Next
    'Delete any files.
    Kill strBasePath & strExtractionFolder & "_files\*"
    RmDir strBasePath & strExtractionFolder & "_files"
    On Error GoTo 0

    'Save the current document.
    ActiveDocument.Save

    'Save document in HTML format. This creates the "_files\" folder in the Extraction Folder.
    ActiveDocument.SaveAs2 filename:=strBasePath & strExtractionFolder & ".html", FileFormat:=wdFormatHTML

    ActiveDocument.Close

    On Error Resume Next
    'Get rid of extraneous data files. Keep only the images.
    Kill strBasePath & strExtractionFolder & ".html"
    Kill strBasePath & strExtractionFolder & "_files\*.xml"
    Kill strBasePath & strExtractionFolder & "_files\*.html"
    Kill strBasePath & strExtractionFolder & "_files\*.thmx"
    On Error GoTo 0

    'Rename image files.
    'When converting to HTML format images in the original document may be duplicated as thumbnails in the HTML file.
    'We need to get if the image file count mathches the original InlineShape count of the document.
    lngCount = Count_Files(strBasePath & strExtractionFolder & "_files")
    Select Case True
    Case lngCount = UBound(arrCaptions) + 1
    'Image count matches so do a one for one rename.
    lngIndex = 0
    strName = Dir(strBasePath & strExtractionFolder & "_files\")
    While strName <> ""
    'Some characters are invalid in file names. A colon is invalid and could be used in the caption e.g., Screenshot 1: The Mona Lisa"
    strImageName = Replace(arrCaptions(lngIndex), ":", "-")
    Name strBasePath & strExtractionFolder & "_files\" & strName As strBasePath & strExtractionFolder & "_files\" & strImageName & ".png"
    strName = Dir()
    lngIndex = lngIndex + 1
    Wend
    Case lngCount = 2 * (UBound(arrCaptions) + 1)
    'Duplicates where created.
    lngIndex = 0
    bThumbnails = True
    strName = Dir(strBasePath & strExtractionFolder & "_files\")
    While strName <> ""
    'strExt = ".png"
    strImageName = Replace(arrCaptions(lngIndex), ":", "-")
    If bThumbnails Then
    Name strBasePath & strExtractionFolder & "_files\" & strName As strBasePath & strExtractionFolder & "_files\" & strImageName & ".png"
    Else
    Name strBasePath & strExtractionFolder & "_files\" & strName As strBasePath & strExtractionFolder & "_files\" & strImageName & " thumbnail" & ".png"
    End If
    strName = Dir()
    If Not bThumbnails Then lngIndex = lngIndex + 1
    bThumbnails = Not bThumbnails
    Wend
    Case Else
    MsgBox "File count mismatch occurred. Images were not renamed."
    End Select
    Word.Documents.Open (strFileNameAndPath)
    Word.Application.Visible = True
    Word.Application.Activate
    Exit Sub
    lbl_Exit:
    Exit Sub
    End Sub
    Function Count_Files(strFolder As String) As Long
    Dim strName As String
    Dim lngCount As Integer

    If Right(strFolder, 1) <> "\" Then strFolder = strFolder & "\"
    lngCount = 0
    strName = Dir(strFolder)
    While strName <> ""
    lngCount = lngCount + 1
    strName = Dir()
    Wend
    Count_Files = lngCount
    lbl_Exit:
    Exit Function
    End Function
    Function GetFileNameWithoutExtension(ByRef strFileName As String) As String
    On Error GoTo Err_NoExtension
    GetFileNameWithoutExtension = VBA.Left(strFileName, (InStrRev(strFileName, ".", -1, vbTextCompare) - 1))
    lbl_Exit:
    Exit Function
    Err_NoExtension:
    GetFileNameWithoutExtension = strFileName
    Resume lbl_Exit
    End Function
    Function SpecialFolderPath() As String
    Dim objWSHShell As Object
    Dim strSpecialFolderPath

    Set objWSHShell = CreateObject("WScript.Shell")
    SpecialFolderPath = objWSHShell.SpecialFolders("Desktop")
    Set objWSHShell = Nothing
    lbl_Exit:
    Exit Function
    End Function[/vba]
    Last edited by gmaxey; 05-12-2013 at 10:24 AM.
    Greg

    Visit my website: http://gregmaxey.com

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •