PDA

View Full Version : PDF Data to Excel Adobe XI



patterson198
02-04-2016, 03:21 AM
Hi,


i am using this code



Option Explicit


Sub Main_Import()


frm_pdfimp.Show


End Sub


Sub Imp_Into_XL(PDF_File As String, Each_Sheet As Boolean)


'This procedure get the PDF data into excel by following way


'1.Open PDF file
'2.Looping through pages
'3.get the each PDF page data into individual _
sheets or single sheet as defined in Each_Sheet Parameter




Dim AC_PD As Acrobat.AcroPDDoc 'access pdf file
Dim AC_Hi As Acrobat.AcroHiliteList 'set selection word count
Dim AC_PG As Acrobat.AcroPDPage 'get the particular page
Dim AC_PGTxt As Acrobat.AcroPDTextSelect 'get the text of selection area


Dim WS_PDF As Worksheet
Dim RW_Ct As Long 'row count
Dim Col_Num As Integer 'column count
Dim Li_Row As Long 'Maximum rows limit for one column
Dim Yes_Fir As Boolean 'to identify beginning of page


Li_Row = Rows.Count


Dim Ct_Page As Long 'count pages in pdf file
Dim i As Long, j As Long, k As Long 'looping variables
Dim T_Str As String


Dim Hld_Txt As Variant 'get PDF total text into array


RW_Ct = 0 'set the intial value
Col_Num = 1 'set the intial value


Application.ScreenUpdating = False


Set AC_PD = New Acrobat.AcroPDDoc
Set AC_Hi = New Acrobat.AcroHiliteList


'set maximum selection area of PDF page


AC_Hi.Add 0, 32767


With AC_PD

'open PDF file

.Open PDF_File

'get the number of pages of PDF file

Ct_Page = .GetNumPages

'if get pages is failed exit sub

If Ct_Page = -1 Then
MsgBox "Pages Cannot determine in PDF file '" & PDF_File & "'"
.Close
GoTo h_end
End If

'add sheet only one time if Data retrive in one sheet

If Each_Sheet = False Then
Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))
WS_PDF.Name = "PDF2Text"
End If

'looping through sheets

For i = 1 To Ct_Page

T_Str = ""
'get the page
Set AC_PG = .AcquirePage(i - 1)

'get the full page selection
Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi)

'if text selected successfully get the all the text into T_Str string

If Not AC_PGTxt Is Nothing Then

With AC_PGTxt

For j = 0 To .GetNumText - 1
T_Str = T_Str & .GetText(j)
Next j

End With

End If


If Each_Sheet = True Then

'add each sheet for each page

Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))

End If

'transfer PDF data into sheet

With WS_PDF

If Each_Sheet = True Then

.Name = "Page-" & i

'get the PDF data into each sheet for each PDF page

'if text accessed successfully then split T_Str by VbCrLf
'and get into array Hld_Txt and looping through array and fill sheet with PDF data

If T_Str <> "" Then
Hld_Txt = Split(T_Str, vbCrLf)

For k = 0 To UBound(Hld_Txt)
T_Str = CStr(Hld_Txt(k))
If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
.Cells(k + 1, 1).Value = T_Str
Next k
Else

'information if text not retrive from PDF page

.Cells(1, 1).Value = "No text found in page " & i
End If

Else

'get the pdf data into single sheet

If T_Str <> "" Then
Hld_Txt = Split(T_Str, vbCrLf)

Yes_Fir = True

For k = 0 To UBound(Hld_Txt)

RW_Ct = RW_Ct + 1

'check begining of page if yes enter PDF page number for any idenfication

If Yes_Fir Then
RW_Ct = RW_Ct + 1
.Cells(RW_Ct, Col_Num).Value = "Text In Page - " & i
RW_Ct = RW_Ct + 2
Yes_Fir = False
End If

'check for maximum rows if exceeds start from next column

If RW_Ct > Li_Row Then
RW_Ct = 1
Col_Num = Col_Num + 1
End If

T_Str = CStr(Hld_Txt(k))
If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
.Cells(RW_Ct, Col_Num).Value = T_Str

Next k

Else

RW_Ct = RW_Ct + 1
.Cells(RW_Ct, Col_Num).Value = "No text found in page " & i
RW_Ct = RW_Ct + 1

End If

End If

End With
Next i

.Close

End With

Application.ScreenUpdating = True


MsgBox "Imported"

h_end:

Set WS_PDF = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing

End Sub




Sub Imp_Into_Txt(T_PDF_File As String, Fol_Path As String, Each_Page As Boolean)


'same as above procedure instead of sheets use text files


Dim AC_PD As Acrobat.AcroPDDoc
Dim AC_Hi As Acrobat.AcroHiliteList
Dim AC_PG As Acrobat.AcroPDPage
Dim AC_PGTxt As Acrobat.AcroPDTextSelect


Dim OS_FSO As Object
Dim OS_TxtFile As Object


Set OS_FSO = CreateObject("Scripting.filesystemobject")


Dim Ct_Page As Long
Dim i As Long, j As Long, k As Long
Dim T_Str As String


Dim Hld_Txt As Variant


Set AC_PD = New Acrobat.AcroPDDoc
Set AC_Hi = New Acrobat.AcroHiliteList
AC_Hi.Add 0, 32767


With AC_PD

.Open T_PDF_File
Ct_Page = .GetNumPages

If Ct_Page = -1 Then
MsgBox "Pages Cannot determine in PDF file '" & T_PDF_File & "'"
.Close
GoTo h_end
End If

If Each_Page = False Then
Set OS_TxtFile = OS_FSO.createtextfile(Fol_Path & "\pdf2text.txt")
End If

For i = 1 To Ct_Page

T_Str = ""
Set AC_PG = .AcquirePage(i - 1)

Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi)

If Not AC_PGTxt Is Nothing Then

With AC_PGTxt

For j = 0 To .GetNumText - 1
T_Str = T_Str & .GetText(j)
Next j

End With

End If

If T_Str = "" Then T_Str = "No text found in page " & i

If Each_Page = True Then
Set OS_TxtFile = OS_FSO.createtextfile(Fol_Path & "\Page-" & i & ".txt")
OS_TxtFile.write T_Str
OS_TxtFile.Close
Set OS_TxtFile = Nothing
Else

T_Str = vbCrLf & vbCrLf & "Text In Page - " & i & vbCrLf & vbCrLf & T_Str
OS_TxtFile.write T_Str
End If
Next i

If Each_Page = False Then OS_TxtFile.Close
.Close

End With


MsgBox "Imported"


h_end:


Set OS_TxtFile = Nothing
Set OS_FSO = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing


End Sub













but i get a 429 error and ActiveX component cant create the object


this is the line causing it


Set AC_PD = New Acrobat.AcroPDDoc



please can someone assist?