PDA

View Full Version : Convert PDF data into Excel or txt using VBA



mleblanc001
01-05-2018, 10:34 PM
Hi Gurus,
I found this code online which looks great but when I try to run it, I get the following error message. Can you please tell me why I am getting this error message. I figured out that it has to do with the library but I can't figure out what to check in the library to fix it. Your help would be greatly appreciated.

The error message:
Microsoft Visual Basic for applications
Compile error:
Can't find project or library

The code is as follows:


Option Explicit
Sub Main_Import()
frm_pdfimp.Show
End Sub

Sub Imp_Into_XL(PDF_File As String, Each_Sheet As Boolean)
'This procedure get the PDF data into excel by following way
'1.Open PDF file
'2.Looping through pages
'3.get the each PDF page data into individual _
sheets or single sheet as defined in Each_Sheet Parameter
Dim AC_PD As Acrobat.AcroPDDoc 'access pdf file
Dim AC_Hi As Acrobat.AcroHiliteList 'set selection word count
Dim AC_PG As Acrobat.AcroPDPage 'get the particular page
Dim AC_PGTxt As Acrobat.AcroPDTextSelect 'get the text of selection area
Dim WS_PDF As Worksheet
Dim RW_Ct As Long 'row count
Dim Col_Num As Integer 'column count
Dim Li_Row As Long 'Maximum rows limit for one column
Dim Yes_Fir As Boolean 'to identify beginning of page
Li_Row = Rows.Count
Dim Ct_Page As Long 'count pages in pdf file
Dim i As Long, j As Long, k As Long 'looping variables
Dim T_Str As String
Dim Hld_Txt As Variant 'get PDF total text into array
RW_Ct = 0 'set the intial value
Col_Num = 1 'set the intial value
Application.ScreenUpdating = False
Set AC_PD = New Acrobat.AcroPDDoc
Set AC_Hi = New Acrobat.AcroHiliteList
'set maximum selection area of PDF page
AC_Hi.Add 0, 32767
With AC_PD
'open PDF file
.Open PDF_File
'get the number of pages of PDF file
Ct_Page = .GetNumPages
'if get pages is failed exit sub
If Ct_Page = -1 Then
MsgBox "Pages Cannot determine in PDF file '" & PDF_File & "'"
.Close
GoTo h_end
End If
'add sheet only one time if Data retrive in one sheet
If Each_Sheet = False Then
Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))
WS_PDF.Name = "PDF2Text"
End If
'looping through sheets
For i = 1 To Ct_Page
T_Str = ""
'get the page
Set AC_PG = .AcquirePage(i - 1)
'get the full page selection
Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi)
'if text selected successfully get the all the text into T_Str string
If Not AC_PGTxt Is Nothing Then
With AC_PGTxt
For j = 0 To .GetNumText - 1
T_Str = T_Str & .GetText(j)
Next j
End With
End If
If Each_Sheet = True Then
'add each sheet for each page
Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))
End If
'transfer PDF data into sheet
With WS_PDF
If Each_Sheet = True Then
.Name = "Page-" & I
'get the PDF data into each sheet for each PDF page
'if text accessed successfully then split T_Str by VbCrLf
'and get into array Hld_Txt and looping through array and fill sheet with PDF data
If T_Str <> "" Then
Hld_Txt = Split(T_Str, vbCrLf)
For k = 0 To UBound(Hld_Txt)
T_Str = CStr(Hld_Txt(k))
If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
.Cells(k + 1, 1).Value = T_Str
Next k
Else
'information if text not retrive from PDF page
.Cells(1, 1).Value = "No text found in page " & I
End If
Else
'get the pdf data into single sheet
If T_Str <> "" Then
Hld_Txt = Split(T_Str, vbCrLf)
Yes_Fir = True
For k = 0 To UBound(Hld_Txt)
RW_Ct = RW_Ct + 1
'check begining of page if yes enter PDF page number for any idenfication
If Yes_Fir Then
RW_Ct = RW_Ct + 1
.Cells(RW_Ct, Col_Num).Value = "Text In Page - " & I
RW_Ct = RW_Ct + 2
Yes_Fir = False
End If
'check for maximum rows if exceeds start from next column
If RW_Ct > Li_Row Then
RW_Ct = 1
Col_Num = Col_Num + 1
End If
T_Str = CStr(Hld_Txt(k))
If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
.Cells(RW_Ct, Col_Num).Value = T_Str
Next k
Else
RW_Ct = RW_Ct + 1
.Cells(RW_Ct, Col_Num).Value = "No text found in page " & I
RW_Ct = RW_Ct + 1
End If
End If
End With
Next i
.Close
End With
Application.ScreenUpdating = True
MsgBox "Imported"
h_end:
Set WS_PDF = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing
End Sub

Sub Imp_Into_Txt(T_PDF_File As String, Fol_Path As String, Each_Page As Boolean)
'same as above procedure instead of sheets use text files
Dim AC_PD As Acrobat.AcroPDDoc
Dim AC_Hi As Acrobat.AcroHiliteList
Dim AC_PG As Acrobat.AcroPDPage
Dim AC_PGTxt As Acrobat.AcroPDTextSelect
Dim OS_FSO As Object
Dim OS_TxtFile As Object
Set OS_FSO = CreateObject("Scripting.filesystemobject")
Dim Ct_Page As Long
Dim i As Long, j As Long, k As Long
Dim T_Str As String
Dim Hld_Txt As Variant
Set AC_PD = New Acrobat.AcroPDDoc
Set AC_Hi = New Acrobat.AcroHiliteList
AC_Hi.Add 0, 32767
With AC_PD
.Open T_PDF_File
Ct_Page = .GetNumPages
If Ct_Page = -1 Then
MsgBox "Pages Cannot determine in PDF file '" & T_PDF_File & "'"
.Close
GoTo h_end
End If
If Each_Page = False Then
Set OS_TxtFile = OS_FSO.createtextfile(Fol_Path & "\pdf2text.txt")
End If
For i = 1 To Ct_Page
T_Str = ""
Set AC_PG = .AcquirePage(i - 1)
Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi)
If Not AC_PGTxt Is Nothing Then
With AC_PGTxt
For j = 0 To .GetNumText - 1
T_Str = T_Str & .GetText(j)
Next j
End With
End If
If T_Str = "" Then T_Str = "No text found in page " & I
If Each_Page = True Then
Set OS_TxtFile = OS_FSO.createtextfile(Fol_Path & "\Page-" & i & ".txt")
OS_TxtFile.write T_Str
OS_TxtFile.Close
Set OS_TxtFile = Nothing
Else
T_Str = vbCrLf & vbCrLf & "Text In Page - " & i & vbCrLf & vbCrLf & T_Str
OS_TxtFile.write T_Str
End If
Next i
If Each_Page = False Then OS_TxtFile.Close
.Close
End With
MsgBox "Imported"
h_end:
Set OS_TxtFile = Nothing
Set OS_FSO = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing
End Sub

macropod
01-06-2018, 04:12 AM
The code you posted requires you to have Adobe Acrobat Pro installed, not just the free Reader. If you have Office 2013 or later, you could automate Word and use that to open the file for data extraction.

PS: When posting code, please use the code tags, indicated by the# button on the posting menu. Without them, your code loses whateverstructure it had.

mleblanc001
01-06-2018, 08:33 AM
Is there an example of Word to open the file for data extraction?

macropod
01-06-2018, 12:52 PM
There are many posts on this board for automating Word from Excel. You might start by looking in: http://www.vbaexpress.com/forum/forumdisplay.php?21-Integration-Automation-of-Office-Applications-Help