Consulting

Results 1 to 2 of 2

Thread: Fetch search result from javascripted web page?

  1. #1

    Fetch search result from javascripted web page?

    Code will enter search term, activate the search but cannot copy the results back. Webpage appears to be Javascripted. Here's my code below: what I've tried previously is commented out. Below that is an example of a search result.

     Sub GetASTM()
        ' had to activate library Microsoft HTML object library (MSHTML.TLB) via
        ' "Tools" > "References".  Then get the IHTML items
        
        Dim ie As Object
        Dim Std As Object
        Dim ElementCol As Object
        Dim btnInput As Object
        Dim el, f
        Dim s As String
        Dim r As Range
        Dim Element As IHTMLElement
        Dim Elements As IHTMLElementCollection
        Dim doc1 As MSHTML.HTMLDocument
        Dim xxml As MSXML2.xmlHTTP
        
        Set xxml = GetMSXML
        Set ie = CreateObject("InternetExplorer.Application")
        With ie
            .navigate "http://compass.astm.org/CUSTOMERS/fi....cgi?index.frm"
            .Visible = True
        End With
        Do While ie.Busy
        Loop
        Set Std = ie.Document.getElementsByName("search")
        Std(0).Value = "B33"
        
        Set el = ie.Document.getElementById("main-search-btn")
        el.Click
        
        Do While ie.Busy
        Loop
        
        
            .Open
    '    s = ie.Document.getElementsBy
    '    s = doc1.body.innerText
        
    '    Set Elements = ie.Document.getElementById("var")
    '    For Each Element In Elements
    '        Debug.Print Element.innerText'
    '    Next Element
        
    '    s = ie.Document.All("mc_results").innerText
    '    s = ie.Document.getElementsByName("title").innerText
    '    Set r = s
        
    '    With r.Find
    '        .Text = "Standard Specification for"
    '        .Forward = True
    '        .Wrap = wdFindStop
    '        .Execute
    '    End With
    '    With r
    '        .MoveEndUntil Cset:=""", Count:=wdForward"
    '    End With
            
    End Sub
    Resulting web page content:

    HTML Code:
    <!
    DOCTYPE html>
    <
    html>
    <head>
     
    <title>ASTM Compass</title>
            
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
            
    <meta charset="utf-8" />
            
    <link rel="shortcut icon" href="/favicon.ico?v2" />
            
    <link rel="stylesheet" type="text/css" href="/CUSTOMERS/css/combined.css?v1a" />
            
    <!--[if IE]>
            <link rel="stylesheet" type="text/css" href="/css/ie.css" />
            <![endif]-->
            
    <!--[if lte IE 7]>
            <link rel="stylesheet" type="text/css" href="/css/ie-7.css" />
            <![endif]-->
            
    <script type="text/javascript" src="/CUSTOMERS/js/complete-lite.js?v2a"></script>
    <script type="text/javascript" src="/CUSTOMERS/search/js/search-lite.js?r3g"></script>
    <script type="text/javascript">
    The above is the top part of the web page result. Here's what I need to copy:
    HTML Code:
     
    
    var 
    
     mc_results = {"resSet":[    {    "results":[{
     
     
    
    "res" 
    :{    "url":"http://compass.astm.org/BioDiesel/B33.htm",
     
    
         
    "title":"Standard Specification for Tin-Coated Soft or Annealed Copper Wire for Electrical Purposes",
     
    
         
    "snippet":"1 | Scope Previous  Next  |  Top  Bottom 1.1 This specification covers tin-coated, round, soft, or annealed copper wire for electrical purposes. 1.2 The values stated in inch-pound units are to be regarded as standard. The values given in parentheses are mathematical conversions to SI units that are provided for information only and are not considered standard. 1.2.1 Exceptions—The ",
     
    
         
    "meta":{"gs_designation":"B33","mc_section":"02","mc_maincomm":"B01","mc_date":"2014","title":"Standard Specification for Tin-Coated Soft or Annealed Copper Wire for Electrical Purposes","mc_dltype":"allstd,active","mc_doctype":"Active Standard","mc_addtocart":"PDF-B33","gs_year":"10

  2. #2

    Can scrape web page only once, how to clear previous entry and re-enter add'l terms?

    Okay, I've managed to figure out how to parse the information I need ONCE however I will have several items to look up. I need to clear the entry that was previously input ("Std.Value") and enter the next one. I've tried SENDKEYS BKSP which was pretty hilarious as it sent the BKSP to the debugger window. I've tried Std.Value = "" and Nothing and " ". How do I clear the entry on the web page? Whiteout?

    Sub Notmuch()
    Dim temparr(4) As String
    Dim ResMtrx1() As String
    temparr(0) = "ASTM C31"
    temparr(1) = "ASTM C33"
    temparr(2) = "ASTM C309"
    temparr(3) = "ASTM C595"
    temparr(4) = "ASTM D1752"
    Call ASTM(temparr(), ResMtrx1())
    End Sub
    Function ASTM(temparr() As String, ResMtrx() As String) As String
        ' different web page.
        ' had to activate library Microsoft HTML object library (MSHTML.TLB) via
        ' "Tools" > "References".  Then get the IHTML items
        
        Dim Std As Object
        Dim el As Object
        Dim s As String
        Dim Element As IHTMLElement 'IHTMLElement
        Dim Elements As IHTMLElementCollection ' IHTMLElementCollection
        Dim r() As String, Stdarr(0) As String, Rat() As String
        Dim i As Long, j As Long, k As Long, l As Long
        
        l = 0 ' counter for result
        ReDim Rat(5)
        Set browser = CreateObject("InternetExplorer.Application")
        Set browser = New InternetExplorer
        browser.Visible = True
        browser.navigate "http://www.astm.org/Standard/standar...lications.html"
        
        Set ojbb = CreateObject("WScript.shell")
        ReDim ResMtrx(UBound(temparr), 2)
        
        On Error GoTo Errhandler
        Do While browser.Busy
        Loop
        For j = LBound(temparr) To UBound(temparr)
        
            Set Std = browser.Document.getElementsByName("query")
            Do While browser.Busy
            Loop
        
    Start:
            If j <> 0 Then
                Std(j).Value = "  "
    '            browser.SendKeys "{BKSP 6}", True
            End If
            Std(j).Value = temparr(j)
        
            For Each el In browser.Document.getElementsByClassName("primary-button nofloat")
                el.Click
            Next el
            Do While browser.Busy
            Loop
            
            Set Elements = browser.Document.getElementsByTagName("script")
            Do While browser.Busy
            Loop
            
            If Elements Is Nothing Then GoTo Start
        
            Do While browser.Busy
            Loop
            
            For Each Element In Elements
                Do While browser.Busy
                Loop
                
                ' get the info off the internet and into an array
                ' try to avoid Error 70
                If Element.innerText <> "" Then
                    Rat(i) = Element.innerText
                    If i = UBound(Rat) Then
                        ReDim Preserve Rat(i * 2)
                    End If
                    i = i + 1
                End If
            Next Element
            
    Errhandler:
            Select Case Err
                Case 70
                    Do While Err.Number = 70
                    Loop
                    GoTo Start
                    Resume
            End Select
            
            For i = LBound(Rat) To UBound(Rat)
                r = Split(Rat(i), Chr(34), -1, vbBinaryCompare) 'split on quotation mark, Chr(34)
                q = LBound(r) ' want a variable as is a sequential search. Don't need to go back _
                  through old.
                
                For k = LBound(r) To UBound(r)
                    If InStr(1, r(k), "Standard Specification for ", vbBinaryCompare) > 0 Then
                        ResMtrx(l, 0) = Mid(r(k), 27, Len(r(k)) - 26)
                    ElseIf InStr(1, r(k), "Standard Test Method for ") > 0 Then
                        ResMtrx(l, 0) = Mid(r(k), 26, Len(r(k)) - 25)
                    ElseIf InStr(1, r(k), "Standard Practice for ") > 0 Then
                        ResMtrx(l, 0) = Mid(r(k), 23, Len(r(k)) - 22)
                    ElseIf r(k) = "gs_designation" Then
                        ResMtrx(l, 1) = r(k + 2)
                    ElseIf r(k) = "mc_date" Then
                        ResMtrx(l, 2) = r(k + 2)
                    ElseIf ResMtrx(l, 2) <> "" Then 'array row is full, exit, get new std
                        Exit For
                    End If
                Next k
                If ResMtrx(l, 2) <> "" Then
                    l = l + 1
                    Exit For
                End If
            Next i
        Next j
        ResMtrx1 = ResMtrx()
        
    End Function

Tags for this Thread

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •