Consulting

Results 1 to 5 of 5

Thread: extract html data (not in table) from website to excel

  1. #1
    VBAX Regular
    Joined
    Apr 2012
    Posts
    22
    Location

    extract html data (not in table) from website to excel

    I am trying to extract data within tags <makeid></makeid> and <date></date> from a webpage. I would then like to use the makeid info to either select that worksheet or if there is not one then to create a new worksheet with the makeid as the name. The date data I would just paste in cell A1. Can anyone help me with this?

  2. #2
    VBAX Guru Kenneth Hobs's Avatar
    Joined
    Nov 2005
    Location
    Tecumseh, OK
    Posts
    4,956
    Location
    You did not provide specific enough data to give you detailed help. Besides the brute strength method below, one can use MSIE object methods or similar method and get the data back as xml and use xml parsing methods.

    [VBA]'http://www.vbaexpress.com/forum/showthread.php?t=26305

    Sub FillDistTimes()
    Dim theDistance As Double, theHours As Integer, theMinutes As Integer
    Dim r As Range
    Set r = Range("A1")
    Do Until IsEmpty(r.Offset(1))
    Set r = r.Offset(1)
    D1to2 r.Value, r.Offset(0, 1).Value, theDistance, theHours, theMinutes
    r.Offset(0, 2).Value = theDistance
    r.Offset(0, 3).Value = theHours
    r.Offset(0, 4).Value = theMinutes
    Loop
    End Sub


    'early binding
    Sub D1to2(z1, z2, ByRef dist As Double, ByRef tHour As Integer, ByRef tMin As Integer)
    'requires reference to winhttp.dll in Microsoft WinHTTP Services, version 5.1
    Dim Request As New WinHttpRequest, s As String
    Dim sa() As String

    'CountryCode=1 is United Kingdom
    Request.Open "GET", _
    "http://www.multimap.com/directions/?qs_1=" & CStr(z1) & _
    "&countryCode_1=GB&qs_2=" & CStr(z2) & _
    "&countryCode_2=GB&mode=driving&optimizeFor=time", _
    False
    Request.Send
    Request.WaitForResponse

    'Get body of text and parse to byref variables
    s = Request.ResponseText
    dist = MidStr(s, ":", "," & """miles") 'Distance in km
    sa = Split(Mid(MidStr(s, "duration""" & ":{", "},""bounds"), 10), ":")
    tHour = GetNumber(sa(1))
    tMin = GetNumber(sa(2))
    'tday = GetNumber(sa(3)) 'How to get days
    End Sub


    'Finds mid string from sTo and then back to sFrom. So, make sTo unique.
    Function MidStr(str As String, sFrom As String, sTo As String, Optional toOffset As Integer = 0) As String
    Dim strSub As String, sBegPos As Long, sEndPos As Long

    sEndPos = InStr(str, sTo) - toOffset
    strSub = Left(str, sEndPos)
    sBegPos = InStrRev(strSub, sFrom) + 1

    MidStr = Mid(strSub, sBegPos, sEndPos - sBegPos)
    End Function


    'Derk, ozgrid.com, 65763
    Function GetNumber(s As String)
    Dim j As Long
    While Not IsNumeric(Left(s, 1))
    If Len(s) <= 1 Then
    Exit Function
    Else
    s = Mid(s, 2)
    End If
    Wend
    GetNumber = Val(s)
    End Function
    [/VBA]

  3. #3
    VBAX Regular
    Joined
    Apr 2012
    Posts
    22
    Location
    Thank you Kenneth. Sometimes I forget you guys are miracle workers, not mind readers. I wish I could provide more information, but seeing as this is for work and behind a password protected site I do not think it is a good ideqa

    The text I am trying to pull into cell A1 of sheet 1 of "autoanalytics.xlsm" is (in this case) 724316 so it looks like this in the web page html:

    HTML Code:
    <makeid>724316</makeid>
    Would using one of innertext, innerhtml, outertext, or outerhtml to extract the data work or are those functions for something else?

  4. #4
    VBAX Guru Kenneth Hobs's Avatar
    Joined
    Nov 2005
    Location
    Tecumseh, OK
    Posts
    4,956
    Location
    Using the method that I posted, obviously you need to change the url from multipmap to your website to get source code. Of course this method requires that the string that you want is in the source code.

    For the MSIE object method that you are referencing, something along these lines I would think. See after this code block another method for parsing strings from source code.

    [vba]'http://www.vbaexpress.com/forum/showthread.php?t=34422
    Sub ClickIE()
    Dim ie As Object, doc As Object, lnk As Object, i As Long

    Set ie = CreateObject("internetexplorer.application")

    ie.Visible = True

    ie.Navigate "google.com"

    'wait for page to load
    Do While ie.Busy And Not ie.readyState = 4:
    Application.Wait (Now + TimeValue("0:00:02"))
    DoEvents
    Loop

    'write search keyword (for example apple)
    ie.document.getElementById("q").Value = "apple"

    'click on search button
    ie.document.getElementById("btnG").Click

    'wait for page to load
    Do While ie.Busy And Not ie.readyState = 4:
    Application.Wait (Now + TimeValue("0:00:02"))
    DoEvents
    Loop

    'Put frame names into immediate window.
    For i = 0 To ie.document.frames.Length - 1
    Debug.Print ie.document.frames(i).Name
    Next i
    'GoTo Cleanup

    'Set the docuement for the frame with links.
    Set doc = ie.document.frames("wgjf")
    'MsgBox doc.formname
    'Write link information to the current sheet from row 1 to end.
    i = 0
    For Each lnk In ie.document.frames("wgjf").document.body.Links
    i = i + 1
    Range("A" & i).Value = lnk.Classname
    Range("B" & i).Value = lnk.innerText
    Range("C" & i).Value = lnk
    Next lnk

    'Quit ie and cleanup.
    Cleanup:
    ' ie.Quit
    Set lnk = Nothing
    Set doc = Nothing
    Set ie = Nothing

    MsgBox "End macro"

    End Sub[/vba]

    [VBA]Sub Test_pGoog()
    Dim s As String, sDistance As String, sDuration As String
    s = "<duration>" & vbCrLf & _
    "<value>16</value>" & vbCrLf & _
    "<text>1 min</text>" & vbCrLf & _
    "</duration>" & vbCrLf & _
    "<html_instructions>Make a &lt;b&gt;U-turn&lt;/b&gt; at &lt;b&gt;Palm Dr&lt;/b&gt;&lt;div style=&quot;font-size:0.9em&quot;&gt;Destination will be on the right&lt;/div&gt;</html_instructions>" & vbCrLf & _
    "<distance>" & vbCrLf & _
    "<value>44</value>" & vbCrLf & _
    "<text>144 ft/text>" & vbCrLf & _
    "</distance>" & vbCrLf & _
    "</step>" & vbCrLf & _
    "<duration>" & vbCrLf & _
    "<value>164061</value>" & vbCrLf & _
    "<text>1 day 22 hours</text>" & vbCrLf & _
    "</duration>" & vbCrLf & _
    "<distance>" & vbCrLf & _
    "<value>4553964</value>" & vbCrLf & _
    "<text>2,830 mi</text>" & vbCrLf & _
    "</distance>" & vbCrLf & _
    "<start_location>"
    sDistance = pGoog("distance", s)
    'sDuration = pGoog("duration", s)
    MsgBox sDistance
    End Sub

    Private Function pGoog(strSearch As String, strHTML As String) As String
    Dim s As String
    s = pRevTags(strSearch, strHTML)
    pGoog = pRevTags("text", s)
    End Function

    Private Function pRevTags(strSearch As String, strHTML As String) As String
    Dim s As String, p1 As Long, p2 As Long, lss As Integer
    p1 = InStrRev(strHTML, "<" & strSearch & ">")
    If p1 = 0 Then
    pRevTags = "Not Found"
    Exit Function
    End If
    p2 = InStrRev(strHTML, "</" & strSearch & ">")
    lss = Len(strSearch)
    s = Mid(strHTML, p1 + lss + 2, p2 - p1 - 2 - lss)
    pRevTags = s
    End Function


    'Old method:
    Private Function parseGoog(strSearch As String, strHTML As String) As String
    strSearch = strSearch & ":'"
    If InStr(1, strHTML, strSearch) = 0 Then parseGoog = "Not Found": Exit Function
    parseGoog = Mid(strHTML, InStr(1, strHTML, strSearch) + Len(strSearch))
    parseGoog = Mid(parseGoog, 1, InStr(1, parseGoog, "'") - 1)
    End Function

    Function NumberPart(aString As String) As Long
    Dim s As String, i As Integer, mc As String
    For i = 1 To Len(aString)
    mc = Mid(aString, i, 1)
    If Asc(mc) >= 48 And Asc(mc) <= 57 Then s = s & mc
    Next i
    NumberPart = CLng(s)
    End Function

    Sub Test_Numberpart()
    Dim s As String
    s = "A5BC123"
    Debug.Print s, NumberPart(s)
    End Sub

    [/VBA]

  5. #5
    VBAX Guru Kenneth Hobs's Avatar
    Joined
    Nov 2005
    Location
    Tecumseh, OK
    Posts
    4,956
    Location
    Here is another DOM example. It shows how to login to Yahoo mail.

    [VBA]Sub Test_LoginYahoo()
    LoginYahoo ThisWorkbook.Worksheets("Main").Range("A1").Value2, _
    ThisWorkbook.Worksheets("Main").Range("A2").Value2
    End Sub

    ' Add references in Tools > References for:
    ' Microsoft HTML Object Library
    ' Microsoft Forms 2.0 Object Library
    Sub LoginYahoo(username As String, password As String)
    Const strURL_c As String = "http://mail.yahoo.com"
    Dim objIE As SHDocVw.InternetExplorer
    Dim ieDoc As MSHTML.HTMLDocument
    Dim tbxPwdFld As MSHTML.HTMLInputElement
    Dim tbxUsrFld As MSHTML.HTMLInputElement
    Dim btnSubmit As MSHTML.HTMLInputElement

    Excel.Application.Cursor = xlWait
    If InStr(username, "@") = 0 Then username = username & "@yahoo.com"

    On Error GoTo Err_Hnd

    'Create Internet Explorer Object
    Set objIE = New SHDocVw.InternetExplorer
    'Navigate the URL
    objIE.Navigate strURL_c
    objIE.Visible = False
    'Wait for page to load
    Do Until objIE.ReadyState = READYSTATE_COMPLETE: Loop
    'Do While objIE.Busy Or objIE.ReadyState <> READYSTATE_COMPLETE
    ' DoEvents
    'Loop
    'Set document object
    Set ieDoc = objIE.Document
    ieDoc.getElementsByName("passwd").Item(0).Value = password
    ieDoc.getElementsByName("username").Item(0).Value = username
    ieDoc.forms("Login_form").Submit

    Err_Hnd: '(Fail gracefully)
    objIE.Visible = True
    On Error GoTo 0
    Excel.Application.Cursor = xlDefault
    End Sub[/VBA]

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •