PDA

View Full Version : extract html data (not in table) from website to excel



b.hill
05-05-2012, 10:06 PM
I am trying to extract data within tags <makeid></makeid> and <date></date> from a webpage. I would then like to use the makeid info to either select that worksheet or if there is not one then to create a new worksheet with the makeid as the name. The date data I would just paste in cell A1. Can anyone help me with this?

Kenneth Hobs
05-06-2012, 01:29 PM
You did not provide specific enough data to give you detailed help. Besides the brute strength method below, one can use MSIE object methods or similar method and get the data back as xml and use xml parsing methods.

'http://www.vbaexpress.com/forum/showthread.php?t=26305

Sub FillDistTimes()
Dim theDistance As Double, theHours As Integer, theMinutes As Integer
Dim r As Range
Set r = Range("A1")
Do Until IsEmpty(r.Offset(1))
Set r = r.Offset(1)
D1to2 r.Value, r.Offset(0, 1).Value, theDistance, theHours, theMinutes
r.Offset(0, 2).Value = theDistance
r.Offset(0, 3).Value = theHours
r.Offset(0, 4).Value = theMinutes
Loop
End Sub


'early binding
Sub D1to2(z1, z2, ByRef dist As Double, ByRef tHour As Integer, ByRef tMin As Integer)
'requires reference to winhttp.dll in Microsoft WinHTTP Services, version 5.1
Dim Request As New WinHttpRequest, s As String
Dim sa() As String

'CountryCode=1 is United Kingdom
Request.Open "GET", _
"http://www.multimap.com/directions/?qs_1=" & CStr(z1) & _
"&countryCode_1=GB&qs_2=" & CStr(z2) & _
"&countryCode_2=GB&mode=driving&optimizeFor=time", _
False
Request.Send
Request.WaitForResponse

'Get body of text and parse to byref variables
s = Request.ResponseText
dist = MidStr(s, ":", "," & """miles") 'Distance in km
sa = Split(Mid(MidStr(s, "duration""" & ":{", "},""bounds"), 10), ":")
tHour = GetNumber(sa(1))
tMin = GetNumber(sa(2))
'tday = GetNumber(sa(3)) 'How to get days
End Sub


'Finds mid string from sTo and then back to sFrom. So, make sTo unique.
Function MidStr(str As String, sFrom As String, sTo As String, Optional toOffset As Integer = 0) As String
Dim strSub As String, sBegPos As Long, sEndPos As Long

sEndPos = InStr(str, sTo) - toOffset
strSub = Left(str, sEndPos)
sBegPos = InStrRev(strSub, sFrom) + 1

MidStr = Mid(strSub, sBegPos, sEndPos - sBegPos)
End Function


'Derk, ozgrid.com, 65763
Function GetNumber(s As String)
Dim j As Long
While Not IsNumeric(Left(s, 1))
If Len(s) <= 1 Then
Exit Function
Else
s = Mid(s, 2)
End If
Wend
GetNumber = Val(s)
End Function

b.hill
05-08-2012, 08:11 AM
Thank you Kenneth. Sometimes I forget you guys are miracle workers, not mind readers. I wish I could provide more information, but seeing as this is for work and behind a password protected site I do not think it is a good ideqa

The text I am trying to pull into cell A1 of sheet 1 of "autoanalytics.xlsm" is (in this case) 724316 so it looks like this in the web page html:


<makeid>724316</makeid>

Would using one of innertext, innerhtml, outertext, or outerhtml to extract the data work or are those functions for something else?

Kenneth Hobs
05-08-2012, 11:30 AM
Using the method that I posted, obviously you need to change the url from multipmap to your website to get source code. Of course this method requires that the string that you want is in the source code.

For the MSIE object method that you are referencing, something along these lines I would think. See after this code block another method for parsing strings from source code.

'http://www.vbaexpress.com/forum/showthread.php?t=34422
Sub ClickIE()
Dim ie As Object, doc As Object, lnk As Object, i As Long

Set ie = CreateObject("internetexplorer.application")

ie.Visible = True

ie.Navigate "google.com"

'wait for page to load
Do While ie.Busy And Not ie.readyState = 4:
Application.Wait (Now + TimeValue("0:00:02"))
DoEvents
Loop

'write search keyword (for example apple)
ie.document.getElementById("q").Value = "apple"

'click on search button
ie.document.getElementById("btnG").Click

'wait for page to load
Do While ie.Busy And Not ie.readyState = 4:
Application.Wait (Now + TimeValue("0:00:02"))
DoEvents
Loop

'Put frame names into immediate window.
For i = 0 To ie.document.frames.Length - 1
Debug.Print ie.document.frames(i).Name
Next i
'GoTo Cleanup

'Set the docuement for the frame with links.
Set doc = ie.document.frames("wgjf")
'MsgBox doc.formname
'Write link information to the current sheet from row 1 to end.
i = 0
For Each lnk In ie.document.frames("wgjf").document.body.Links
i = i + 1
Range("A" & i).Value = lnk.Classname
Range("B" & i).Value = lnk.innerText
Range("C" & i).Value = lnk
Next lnk

'Quit ie and cleanup.
Cleanup:
' ie.Quit
Set lnk = Nothing
Set doc = Nothing
Set ie = Nothing

MsgBox "End macro"

End Sub

Sub Test_pGoog()
Dim s As String, sDistance As String, sDuration As String
s = "<duration>" & vbCrLf & _
"<value>16</value>" & vbCrLf & _
"<text>1 min</text>" & vbCrLf & _
"</duration>" & vbCrLf & _
"<html_instructions>Make a &lt;b&gt;U-turn&lt;/b&gt; at &lt;b&gt;Palm Dr&lt;/b&gt;&lt;div style=&quot;font-size:0.9em&quot;&gt;Destination will be on the right&lt;/div&gt;</html_instructions>" & vbCrLf & _
"<distance>" & vbCrLf & _
"<value>44</value>" & vbCrLf & _
"<text>144 ft/text>" & vbCrLf & _
"</distance>" & vbCrLf & _
"</step>" & vbCrLf & _
"<duration>" & vbCrLf & _
"<value>164061</value>" & vbCrLf & _
"<text>1 day 22 hours</text>" & vbCrLf & _
"</duration>" & vbCrLf & _
"<distance>" & vbCrLf & _
"<value>4553964</value>" & vbCrLf & _
"<text>2,830 mi</text>" & vbCrLf & _
"</distance>" & vbCrLf & _
"<start_location>"
sDistance = pGoog("distance", s)
'sDuration = pGoog("duration", s)
MsgBox sDistance
End Sub

Private Function pGoog(strSearch As String, strHTML As String) As String
Dim s As String
s = pRevTags(strSearch, strHTML)
pGoog = pRevTags("text", s)
End Function

Private Function pRevTags(strSearch As String, strHTML As String) As String
Dim s As String, p1 As Long, p2 As Long, lss As Integer
p1 = InStrRev(strHTML, "<" & strSearch & ">")
If p1 = 0 Then
pRevTags = "Not Found"
Exit Function
End If
p2 = InStrRev(strHTML, "</" & strSearch & ">")
lss = Len(strSearch)
s = Mid(strHTML, p1 + lss + 2, p2 - p1 - 2 - lss)
pRevTags = s
End Function


'Old method:
Private Function parseGoog(strSearch As String, strHTML As String) As String
strSearch = strSearch & ":'"
If InStr(1, strHTML, strSearch) = 0 Then parseGoog = "Not Found": Exit Function
parseGoog = Mid(strHTML, InStr(1, strHTML, strSearch) + Len(strSearch))
parseGoog = Mid(parseGoog, 1, InStr(1, parseGoog, "'") - 1)
End Function

Function NumberPart(aString As String) As Long
Dim s As String, i As Integer, mc As String
For i = 1 To Len(aString)
mc = Mid(aString, i, 1)
If Asc(mc) >= 48 And Asc(mc) <= 57 Then s = s & mc
Next i
NumberPart = CLng(s)
End Function

Sub Test_Numberpart()
Dim s As String
s = "A5BC123"
Debug.Print s, NumberPart(s)
End Sub

Kenneth Hobs
05-09-2012, 04:26 PM
Here is another DOM example. It shows how to login to Yahoo mail.

Sub Test_LoginYahoo()
LoginYahoo ThisWorkbook.Worksheets("Main").Range("A1").Value2, _
ThisWorkbook.Worksheets("Main").Range("A2").Value2
End Sub

' Add references in Tools > References for:
' Microsoft HTML Object Library
' Microsoft Forms 2.0 Object Library
Sub LoginYahoo(username As String, password As String)
Const strURL_c As String = "http://mail.yahoo.com"
Dim objIE As SHDocVw.InternetExplorer
Dim ieDoc As MSHTML.HTMLDocument
Dim tbxPwdFld As MSHTML.HTMLInputElement
Dim tbxUsrFld As MSHTML.HTMLInputElement
Dim btnSubmit As MSHTML.HTMLInputElement

Excel.Application.Cursor = xlWait
If InStr(username, "@") = 0 Then username = username & "@yahoo.com"

On Error GoTo Err_Hnd

'Create Internet Explorer Object
Set objIE = New SHDocVw.InternetExplorer
'Navigate the URL
objIE.Navigate strURL_c
objIE.Visible = False
'Wait for page to load
Do Until objIE.ReadyState = READYSTATE_COMPLETE: Loop
'Do While objIE.Busy Or objIE.ReadyState <> READYSTATE_COMPLETE
' DoEvents
'Loop
'Set document object
Set ieDoc = objIE.Document
ieDoc.getElementsByName("passwd").Item(0).Value = password
ieDoc.getElementsByName("username").Item(0).Value = username
ieDoc.forms("Login_form").Submit

Err_Hnd: '(Fail gracefully)
objIE.Visible = True
On Error GoTo 0
Excel.Application.Cursor = xlDefault
End Sub