Consulting

Results 1 to 12 of 12

Thread: Search PDF files

  1. #1
    VBAX Expert
    Joined
    Jan 2005
    Posts
    574
    Location

    Search PDF files

    The piece of code below came from Dr J and im still impressed by it, It searches word docs for a key word and then lists the results in the spreadsheet

    Now im really asking but was wondering is it possible to do something similar with pdf acrobat files

    Being able to open pdf files from excel would be a start

    Anyway thought id ask and get you thinking cause this is over my head at the moment

    Thanks

    Gibbo


    Option Compare Text
    Option Explicit
     
    Private Const BIF_RETURNONLYFSDIRS As Long = &H1
    Private Const BIF_DONTGOBELOWDOMAIN As Long = &H2
    Private Const BIF_RETURNFSANCESTORS As Long = &H8
    Private Const BIF_BROWSEFORCOMPUTER As Long = &H1000
    Private Const BIF_BROWSEFORPRINTER As Long = &H2000
    Private Const BIF_BROWSEINCLUDEFILES As Long = &H4000
    Private Const MAX_PATH As Long = 260
    Type BrowseInfo
        hOwner                  As Long
        pidlRoot                As Long
        pszDisplayName          As String
        lpszINSTRUCTIONS        As String
        ulFlags                 As Long
        lpfn                    As Long
        lParam                  As Long
        iImage                  As Long
    End Type
    Type SHFILEOPSTRUCT
        hwnd                    As Long
        wFunc                   As Long
        pFrom                   As String
        pTo                     As String
        fFlags                  As Integer
        fAnyOperationsAborted   As Boolean
        hNameMappings           As Long
        lpszProgressTitle       As String
    End Type
    Declare Function SHGetPathFromIDListA Lib "shell32.dll" ( _
    ByVal pidl As Long, _
    ByVal pszBuffer As String) As Long
    Declare Function SHBrowseForFolderA Lib "shell32.dll" ( _
    lpBrowseInfo As BrowseInfo) As Long
     
    Function BrowseFolder(Optional Caption As String = "") As String
    Dim BrowseInfo      As BrowseInfo
    Dim FolderName      As String
    Dim ID              As Long
    Dim Res             As Long
    With BrowseInfo
        .hOwner = 0
        .pidlRoot = 0
        .pszDisplayName = String$(MAX_PATH, vbNullChar)
        .lpszINSTRUCTIONS = Caption
        .ulFlags = BIF_RETURNONLYFSDIRS
        .lpfn = 0
    End With
    FolderName = String$(MAX_PATH, vbNullChar)
    ID = SHBrowseForFolderA(BrowseInfo)
    If ID Then
        Res = SHGetPathFromIDListA(ID, FolderName)
        If Res Then
            BrowseFolder = Left$(FolderName, InStr(FolderName, _
            vbNullChar) - 1)
        End If
    End If
    End Function
     
    Sub ListText()
    Dim AppWrd          As New Word.Application
    Dim Doc             As Word.Document
    Dim Search          As String
    Dim Prompt          As String
    Dim Title           As String
    Dim PageX()         As Long
    Dim LineX()         As Long
    Dim FPath()         As String
    Dim FName()         As String
    Dim Row             As Long
    Dim Counter         As Long
    Dim Pos             As Double
    Dim Path            As String
    Dim FileName        As String
    Dim MyResponse      As VbMsgBoxResult
    Dim StartLine       As Long
    Dim StartPage       As Long
    Dim WS              As Worksheet
    WordBasic.DisableAutoMacros True
    '*** Get folder from user ***
    Prompt = "Select the folder with the files that you want to search through."
    Title = "Folder Selection"
    MsgBox Prompt, vbInformation, Title
    Path = BrowseFolder("Select A Folder")
    If Path = "" Then
        Prompt = "You didn't select a folder. The procedure has been canceled."
        Title = "Procedure Canceled"
        MsgBox Prompt, vbCritical, Title
        GoTo Canceled:
    End If
    '*** This code works with XP only and is also used to pick a folder ***
    'Application.FileDialog(msoFileDialogFolderPicker) .Show
    'Path = CurDir
    Prompt = "What do you want to search for?"
    Title = "Search Criteria"
    Search = InputBox(Prompt, Title)
    If Search = "" Then
        GoTo Canceled
    End If
    '*** Confirm the procedure before continuing ***
    Prompt = "Are you sure that you want to search all the files in the folder:" & _
        vbCrLf & Path & " for " & """" & Search & """" & "?"
    Title = "Confirm Procedure"
    MyResponse = MsgBox(Prompt, vbQuestion + vbYesNo, Title)
    If MyResponse = vbNo Then
        GoTo Canceled:
    End If
    Application.DisplayAlerts = False
    Application.ScreenUpdating = False
    '*** Loop through all Word documents and search each of
    '***them for the specified criteria***
    FileName = Dir(Path & "\*.doc", vbNormal)
    Do Until FileName = ""
        On Error Resume Next
        Set Doc = AppWrd.Documents.Open(Path & "\" & FileName, ReadOnly:=True, _
        PasswordDocument:="DRJWasHere")
        If Err <> 0 Then
            GoTo NextLoop:
        End If
        On Error GoTo 0
        With Doc
        AppWrd.Selection.Find.ClearFormatting
            With AppWrd.Selection.Find
            .Text = Search
            .Replacement.Text = ""
            .Forward = True
            .Wrap = wdFindContinue
            .Format = False
            .MatchCase = False
            .MatchWholeWord = False
            .MatchWildcards = False
            .MatchSoundsLike = False
            .MatchAllWordForms = False
        End With
        AppWrd.Selection.Find.Execute
        If AppWrd.Selection.Range.Text <> Search Then
            GoTo NextLoop:
        End If
    Pos = _
    AppWrd.Selection.Range.Information( _
    wdHorizontalPositionRelativeToPage)
    Counter = Counter + 1
    ReDim Preserve LineX(1 To Counter)
    ReDim Preserve PageX(1 To Counter)
    ReDim Preserve FPath(1 To Counter)
    ReDim Preserve FName(1 To Counter)
    LineX(Counter) = AppWrd.Selection.Range.Information( _
    wdFirstCharacterLineNumber)
    PageX(Counter) = AppWrd.Selection.Range.Information( _
    wdActiveEndPageNumber)
    FPath(Counter) = Doc.Path
    FName(Counter) = Doc.Name
    StartLine = AppWrd.Selection.Range.Information( _
    wdFirstCharacterLineNumber)
    StartPage = AppWrd.Selection.Range.Information( _
    wdActiveEndPageNumber)
    AppWrd.Selection.Find.Execute
    Do While Pos <> AppWrd.Selection.Range.Information( _
    wdHorizontalPositionRelativeToPage) Or _
    StartLine <> AppWrd.Selection.Range.Information( _
    wdFirstCharacterLineNumber) Or _
    StartPage <> AppWrd.Selection.Range.Information( _
    wdActiveEndPageNumber)
    Counter = Counter + 1
    ReDim Preserve LineX(1 To Counter)
    ReDim Preserve PageX(1 To Counter)
    ReDim Preserve FPath(1 To Counter)
    ReDim Preserve FName(1 To Counter)
    If LineX(Counter - 1) = _
    AppWrd.Selection.Range.Information( _
    wdFirstCharacterLineNumber) And _
    PageX(Counter - 1) = _
    AppWrd.Selection.Range.Information( _
    wdActiveEndPageNumber) And _
    Pos = _
    AppWrd.Selection.Range.Information( _
    wdHorizontalPositionRelativeToPage) Then
    Exit Do
    Else
    LineX(Counter) = _
    AppWrd.Selection.Range.Information( _
    wdFirstCharacterLineNumber)
    PageX(Counter) = _
    AppWrd.Selection.Range.Information( _
    wdActiveEndPageNumber)
    FPath(Counter) = Doc.Path
    FName(Counter) = Doc.Name
    Pos = _
    AppWrd.Selection.Range.Information( _
    wdHorizontalPositionRelativeToPage)
    AppWrd.Selection.Find.Execute
    End If
    Loop
    NextLoop:
    End With
    Doc.Close False
    Set Doc = Nothing
    On Error GoTo 0
    FileName = Dir()
    Loop
    If Counter = 0 Then
        MsgBox Search & " was not found.", vbInformation, "Zero Results For Search"
        GoTo Canceled:
    End If
    Set WS = ThisWorkbook.Sheets("Sheet1")
    With WS
        .Range("A1").Value = "Occurrences of the word " & """" & Search & """"
        .Range("A1:C1").Merge
        .Range("A2").Value = "Document Path"
        .Range("B2").Value = "Page Number"
        .Range("C2").Value = "Line Number"
        .Range("A1:C2").Font.Bold = True
        .Range("A1:C2").HorizontalAlignment = xlCenter
    For Row = UBound(LineX) To 1 Step -1
    If PageX(Row) = 0 Or LineX(Row) = 0 Then
    Else
    .Range("B" & Row + 2).Value = PageX(Row)
    .Range("C" & Row + 2).Value = LineX(Row)
    .Hyperlinks.Add Anchor:=.Range("A" & Row + 2), _
    Address:=FPath(Row) & "\" & FName(Row), TextToDisplay:=FName(Row)
    End If
    Next Row
    .Range("A:C").EntireColumn.AutoFit
    End With
    Canceled:
    AppWrd.Quit
    Set Doc = Nothing
    Set AppWrd = Nothing
    WordBasic.DisableAutoMacros False
    Application.DisplayAlerts = True
    Application.ScreenUpdating = True
    End Sub

  2. #2
    Administrator
    VP-Knowledge Base
    VBAX Master
    Joined
    Jan 2005
    Location
    Porto Alegre - RS - Brasil
    Posts
    1,219
    Location
    Hi Gibbo,

    in that code Dr J is instanciating a Word App to do the search using Microsoft Word as it can be "commanded" by VBA, but Adobe Acrobat Reader doesnt support VBA, nor any other macro language and none out of Microsoft Office Apps can read a pdf document so, it cant be done unless you find some kind of add-in that allow any office application to read a pdf file.

    This is the dept of my knowledge, maybe someone else knows a way to do it,
    Best Regards,

    Carlos Paleo.

    To every problem there is a solution, even if I dont know it, so this posting is provided "AS IS" with no warranties.

    If Debugging is harder than writing a program and your code is as good as you can possibly make
    it, then by definition you're not smart enough to debug it.




    http://www.mugrs.org

  3. #3
    Administrator
    Chat VP VBAX Guru johnske's Avatar
    Joined
    Jul 2004
    Location
    Townsville, Australia
    Posts
    2,872
    Location
    Quote Originally Posted by gibbo1715
    .....Being able to open pdf files from excel would be a start....

    Hi Gibbo,

    You can use the hyperlink method to open most things. For example, this opens a PDF document on my PC called "General_2004" that's in the same folder as the workbook calling it. All you need to do is set the path from your book to your PDF doc.

    (I dont know whether you can use the above code with it, though. However there are several programs around that convert PDF to Word - in rich text format - that may assist here also)

    Sub OpenWord_FollowHyperlink()
    On Error GoTo 1
    ActiveWorkbook.FollowHyperlink ActiveWorkbook.Path & _
    "\General_2004.pdf", NewWindow:=True
    Application.WindowState = xlNormal
    Exit Sub
    1: MsgBox Err.Description
    End Sub
    You know you're really in trouble when the light at the end of the tunnel turns out to be the headlight of a train hurtling towards you

    The major part of getting the right answer lies in asking the right question...


    Made your code more readable, use VBA tags (this automatically inserts [vba] at the start of your code, and [/vba ] at the end of your code) | Help those helping you by marking your thread solved when it is.

  4. #4
    Administrator
    VP-Knowledge Base VBAX Master
    Joined
    Jan 2005
    Location
    Porto Alegre - RS - Brasil
    Posts
    1,219
    Location
    Great John,

    well, if its converted to rtf then can easily be searched by word and if its converted from rtf to txt even by excel. Right John?
    Best Regards,

    Carlos Paleo.

    To every problem there is a solution, even if I dont know it, so this posting is provided "AS IS" with no warranties.

    If Debugging is harder than writing a program and your code is as good as you can possibly make
    it, then by definition you're not smart enough to debug it.




    http://www.mugrs.org

  5. #5
    Administrator
    Chat VP VBAX Guru johnske's Avatar
    Joined
    Jul 2004
    Location
    Townsville, Australia
    Posts
    2,872
    Location
    Quote Originally Posted by Paleo
    Great John,

    well, if its converted to rtf then can easily be searched by word and if its converted from rtf to txt even by excel. Right John?

    Without trying it - probably. However these conversion programs (such as the one I have) are usually very slow, it'd probably be best to just convert it into an RTF doc first...

    John
    You know you're really in trouble when the light at the end of the tunnel turns out to be the headlight of a train hurtling towards you

    The major part of getting the right answer lies in asking the right question...


    Made your code more readable, use VBA tags (this automatically inserts [vba] at the start of your code, and [/vba ] at the end of your code) | Help those helping you by marking your thread solved when it is.

  6. #6
    Site Admin
    Jedi Master
    VBAX Guru Jacob Hilderbrand's Avatar
    Joined
    Jun 2004
    Location
    Roseville, CA
    Posts
    3,712
    Location
    Try this.

    Option Explicit
    Option Compare Text
    Option Explicit
     
    Private Const BIF_RETURNONLYFSDIRS As Long = &H1
    Private Const BIF_DONTGOBELOWDOMAIN As Long = &H2
    Private Const BIF_RETURNFSANCESTORS As Long = &H8
    Private Const BIF_BROWSEFORCOMPUTER As Long = &H1000
    Private Const BIF_BROWSEFORPRINTER As Long = &H2000
    Private Const BIF_BROWSEINCLUDEFILES As Long = &H4000
    Private Const MAX_PATH As Long = 260
     
    Type BrowseInfo
        hOwner As Long
        pidlRoot As Long
        pszDisplayName As String
        lpszINSTRUCTIONS As String
        ulFlags As Long
        lpfn As Long
        lParam As Long
        iImage As Long
    End Type
    Type SHFILEOPSTRUCT
        hwnd As Long
        wFunc As Long
        pFrom As String
        pTo As String
        fFlags As Integer
        fAnyOperationsAborted As Boolean
        hNameMappings As Long
        lpszProgressTitle As String
    End Type
     
    Declare Function SHGetPathFromIDListA Lib "shell32.dll" ( _
    ByVal pidl As Long, _
    ByVal pszBuffer As String) As Long
     
    Declare Function SHBrowseForFolderA Lib "shell32.dll" ( _
    lpBrowseInfo As BrowseInfo) As Long
     
    Function BrowseFolder(Optional Caption As String = "") As String
    Dim BrowseInfo      As BrowseInfo
        Dim FolderName      As String
        Dim ID              As Long
        Dim Res             As Long
    With BrowseInfo
            .hOwner = 0
            .pidlRoot = 0
            .pszDisplayName = String$(MAX_PATH, vbNullChar)
            .lpszINSTRUCTIONS = Caption
            .ulFlags = BIF_RETURNONLYFSDIRS
            .lpfn = 0
        End With
        FolderName = String$(MAX_PATH, vbNullChar)
        ID = SHBrowseForFolderA(BrowseInfo)
        If ID Then
            Res = SHGetPathFromIDListA(ID, FolderName)
            If Res Then
                BrowseFolder = Left$(FolderName, InStr(FolderName, _
                vbNullChar) - 1)
            End If
        End If
    End Function
     
    Sub ListText()
    Dim AppWrd          As New Word.Application
        Dim Doc             As Word.Document
        Dim Search          As String
        Dim Prompt          As String
        Dim Title           As String
        Dim PageX()         As Long
        Dim LineX()         As Long
        Dim FPath()         As String
        Dim FName()         As String
        Dim Row             As Long
        Dim Counter         As Long
        Dim Pos             As Double
        Dim Path            As String
        Dim FileName        As String
        Dim MyResponse      As VbMsgBoxResult
        Dim StartLine       As Long
        Dim StartPage       As Long
        Dim WS              As Worksheet
    '*** Get folder from user ***
        Prompt = "Select the folder with the files that you want to search through."
        Title = "Folder Selection"
        MsgBox Prompt, vbInformation, Title
    Path = BrowseFolder("Select A Folder")
        If Path = "" Then
            Prompt = "You didn't select a folder. The procedure has been canceled."
            Title = "Procedure Canceled"
            MsgBox Prompt, vbCritical, Title
    GoTo Canceled:
        End If
    '*** This code works with XP only and is also used to pick a folder ***
         'Application.FileDialog(msoFileDialogFolderPicker)  .Show
         'Path = CurDir
    Prompt = "What do you want to search for?"
        Title = "Search Criteria"
        Search = InputBox(Prompt, Title)
        If Search = "" Then
    GoTo Canceled:
        End If
    '*** Confirm the procedure before continuing ***
        Prompt = "Are you sure that you want to search all the files in the folder:" & _
        vbCrLf & Path & " for " & """" & Search & """" & "?"
        Title = "Confirm Procedure"
        MyResponse = MsgBox(Prompt, vbQuestion + vbYesNo, Title)
        If MyResponse = vbNo Then
    GoTo Canceled:
        End If
    Application.DisplayAlerts = False
        Application.ScreenUpdating = False
    '*** Loop through all Word documents and search
         '***each of them for the specified criteria***
        FileName = Dir(Path & "\*.pdf", vbNormal)
        Do Until FileName = ""
            Set Doc = AppWrd.Documents.Add
            On Error Resume Next
            AppWrd.Selection.InsertFile FileName:=Path & "\" & FileName, _
            Range:="", ConfirmConversions:=False, Link:=False, Attachment:=False
            If Err <> 0 Then
    GoTo NextLoop:
            End If
            On Error GoTo 0
    With Doc
                AppWrd.Selection.Find.ClearFormatting
                With AppWrd.Selection.Find
                    .Text = Search
                    .Replacement.Text = ""
                    .Forward = True
                    .Wrap = wdFindContinue
                    .Format = False
                    .MatchCase = False
                    .MatchWholeWord = False
                    .MatchWildcards = False
                    .MatchSoundsLike = False
                    .MatchAllWordForms = False
                End With
                AppWrd.Selection.Find.Execute
                If AppWrd.Selection.Range.Text <> Search Then
    GoTo NextLoop:
                End If
                Pos = AppWrd.Selection.Range.Information( _
                wdHorizontalPositionRelativeToPage)
                Counter = Counter + 1
                ReDim Preserve LineX(1 To Counter)
                ReDim Preserve PageX(1 To Counter)
                ReDim Preserve FPath(1 To Counter)
                ReDim Preserve FName(1 To Counter)
                LineX(Counter) = AppWrd.Selection.Range.Information( _
                wdFirstCharacterLineNumber)
                PageX(Counter) = AppWrd.Selection.Range.Information( _
                wdActiveEndPageNumber)
                FPath(Counter) = Path & "\"
                FName(Counter) = FileName
                StartLine = AppWrd.Selection.Range.Information( _
                wdFirstCharacterLineNumber)
                StartPage = AppWrd.Selection.Range.Information( _
                wdActiveEndPageNumber)
                AppWrd.Selection.Find.Execute
                Do While Pos <> AppWrd.Selection.Range.Information( _
                wdHorizontalPositionRelativeToPage) Or _
                    StartLine <> AppWrd.Selection.Range.Information( _
                    wdFirstCharacterLineNumber) Or _
                    StartPage <> AppWrd.Selection.Range.Information( _
                    wdActiveEndPageNumber)
                    Counter = Counter + 1
                    ReDim Preserve LineX(1 To Counter)
                    ReDim Preserve PageX(1 To Counter)
                    ReDim Preserve FPath(1 To Counter)
                    ReDim Preserve FName(1 To Counter)
    If LineX(Counter - 1) = _
                    AppWrd.Selection.Range.Information( _
                    wdFirstCharacterLineNumber) And _
                    PageX(Counter - 1) = _
                    AppWrd.Selection.Range.Information( _
                    wdActiveEndPageNumber) And _
                    Pos = _
                    AppWrd.Selection.Range.Information( _
                    wdHorizontalPositionRelativeToPage) Then
                        Exit Do
                    Else
                        LineX(Counter) = _
                        AppWrd.Selection.Range.Information( _
                        wdFirstCharacterLineNumber)
                        PageX(Counter) = _
                        AppWrd.Selection.Range.Information( _
                        wdActiveEndPageNumber)
                        FPath(Counter) = Path & "\"
                        FName(Counter) = FileName
                        Pos = _
                        AppWrd.Selection.Range.Information( _
                        wdHorizontalPositionRelativeToPage)
                        AppWrd.Selection.Find.Execute
                    End If
    Loop
    NextLoop:
    End With
            Doc.Close False
            Set Doc = Nothing
            On Error GoTo 0
            FileName = Dir()
        Loop
    If Counter = 0 Then
            MsgBox Search & " was not found.", vbInformation, "Zero Results For Search"
    GoTo Canceled:
        End If
    Set WS = ThisWorkbook.Sheets("Sheet1")
        With WS
            .Range("A1").Value = "Occurrences of the word " & """" & Search & """"
            .Range("A1:C1").Merge
            .Range("A2").Value = "Document Path"
            .Range("B2").Value = "Page Number"
            .Range("C2").Value = "Line Number"
            .Range("A1:C2").Font.Bold = True
            .Range("A1:C2").HorizontalAlignment = xlCenter
    For Row = UBound(LineX) To 1 Step -1
                If PageX(Row) = 0 Or LineX(Row) = 0 Then
                Else
                    .Range("B" & Row + 2).Value = PageX(Row)
                    .Range("C" & Row + 2).Value = LineX(Row)
                    .Hyperlinks.Add Anchor:=.Range("A" & Row + 2), _
                    Address:=FPath(Row) & "\" & FName(Row), TextToDisplay:=FName(Row)
                End If
            Next Row
            .Range("A:C").EntireColumn.AutoFit
        End With
    Canceled:
    AppWrd.Quit
        Set Doc = Nothing
        Set AppWrd = Nothing
    Application.DisplayAlerts = True
        Application.ScreenUpdating = True
    End Sub

  7. #7
    VBAX Expert
    Joined
    Jan 2005
    Posts
    574
    Location
    Well I thought this might be impossible but you solved it in a few hours, you guys really are the fountain of all knowledge!!!!!

    thankyou

  8. #8
    Site Admin
    Jedi Master
    VBAX Guru Jacob Hilderbrand's Avatar
    Joined
    Jun 2004
    Location
    Roseville, CA
    Posts
    3,712
    Location
    You're Welcome

    Now it's up to you to find something to challenge us.

    Take Care

  9. #9
    VBAX Expert
    Joined
    Jan 2005
    Posts
    574
    Location
    I'll try next time Jake, thought that one might challenge u!!!

    Just quickly before I leave you in peace how do I activate the hyperlinklink from my listbox on my userform?

  10. #10
    Site Admin
    Jedi Master
    VBAX Guru Jacob Hilderbrand's Avatar
    Joined
    Jun 2004
    Location
    Roseville, CA
    Posts
    3,712
    Location
    [VBA]Range("A1").Hyperlinks(1).Follow NewWindow:=False, AddHistory:=True[/VBA]

    Of course you will have to determine the actual range of the hyperlink and replace "A1" with that cell.

  11. #11
    VBAX Master Killian's Avatar
    Joined
    Nov 2004
    Location
    London
    Posts
    1,132
    Location

    Controlling Acrobat from Office

    but Adobe Acrobat Reader doesnt support VBA
    Well I think that depends on what you have installed. With just Acrobat reader installed, I don't think there's much available but with other versions, part of the Acrobat object model is exposed. Have a look in your references list in VBA, you may be surprised to see what you can get hold of. I posted a reply to a (vaguely) similar question:
    http://www.vbaexpress.com/forum/showthread.php?t=1554

    www.planetpdf.com is also a good resource.

    Enjoy
    K :-)

  12. #12
    Administrator
    VP-Knowledge Base VBAX Master
    Joined
    Jan 2005
    Location
    Porto Alegre - RS - Brasil
    Posts
    1,219
    Location
    Hi Killian,

    this is great, thanks.
    Best Regards,

    Carlos Paleo.

    To every problem there is a solution, even if I dont know it, so this posting is provided "AS IS" with no warranties.

    If Debugging is harder than writing a program and your code is as good as you can possibly make
    it, then by definition you're not smart enough to debug it.




    http://www.mugrs.org

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •