Consulting

Results 1 to 7 of 7

Thread: Web scraping

  1. #1
    VBAX Newbie
    Joined
    Jun 2018
    Posts
    4
    Location

    Web scraping

    Hi all

    Very new to the art of VBA and trying to develop some web scraping over a supermarket site to return current prices from a spreadsheet list of items. I have the below but can't get the data to return properly - am seeing the Runtime Error 438 at the point I'm trying to get data from the class element "pricePerUnit". Any help gratefully received!

    Thanks

    Sub test()

    Dim objIE As InternetExplorer 'special object variable representing the IE browser
    Dim aEle As HTMLLinkElement 'special object variable for an <a> (link) element
    Dim bEle As HTMLLinkElement 'special object variable for an <b> (link) element
    Dim y As Integer 'integer variable we'll use as a counter
    Dim result As String 'string variable that will hold our result link




    'initiating a new instance of Internet Explorer and asigning it to objIE
    Set objIE = New InternetExplorer

    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = True

    'navigate IE to this web page (a pretty neat search engine really)
    objIE.navigate "https://www.sainsburys.co.uk/webapp/wcs/stores/servlet/gb/groceries"
    '"search?query=" & Sheets("Sheet1").Range("A" & y).Value

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

    Application.ScreenUpdating = False

    With Worksheets("Prices")

    row_no = 2

    Do Until .Cells(row_no, 2) = ""

    Application.Wait (Now + TimeValue("0:00:03"))
    'in the search box put cell "A2" value, the word "in" and cell "C1" value
    objIE.document.getElementById("search").Value = _
    Sheets("Prices").Range("G" & row_no).Value

    'click the 'go' button
    'objIE.document.getElementsByClass("submit").Click

    objIE.document.forms(0).submit


    'wait again for the browser
    Do While objIE.readyState <> 4: DoEvents: Loop

    Application.Wait (Now + TimeValue("0:00:03"))

    'for each <a> element in the collection of objects with class of 'result__a'...
    For Each aEle In objIE.document.getElementsByClassName("pricePerUnit")


    '...get the text within the element and print it to the sheet in col D
    Sheets("Prices").Range("H" & row_no).Value = aEle.innerText
    Debug.Print aEle.innerText

    Next
    Application.Wait (Now + TimeValue("0:00:03"))

    'for each <a> element in the collection of objects with class of 'result__a'...

    For Each bEle In objIE.document.getElementsByClassName("pricePerMeasure")

    '...get the text within the element and print it to the sheet in col D
    Sheets("Prices").Range("I" & row_no).Value = bEle.innerText
    Debug.Print bEle.innerText

    Next

    row_no = row_no + 1

    Loop

    End With

    'close the browser
    objIE.Quit



    End Sub

  2. #2
    Moderator VBAX Master georgiboy's Avatar
    Joined
    Mar 2008
    Location
    Kent, England
    Posts
    1,188
    Location
    Hi there, welcome to the forum.

    Give this a try, should help:
    Public Declare Sub Sleep Lib "kernel32" (ByVal Milliseconds As Long)
    
    Sub GetVal()
        Dim ie As Object
        Dim url As String
        Dim Doc As HTMLDocument
    
        url = "https://www.sainsburys.co.uk/webapp/wcs/stores/servlet/gb/groceries"
        Set ie = CreateObject("InternetExplorer.Application")
    
        With ie
            .Visible = True
            .navigate url
            Do While ie.readyState <> 4: Sleep (200): Loop
            Sleep (2000)
            ie.document.getElementById("search").Value = "Cheese"
            ie.document.forms(0).submit
            Do While ie.readyState <> 4: Sleep (200): Loop
            Sleep (2000)
        End With
        
        Set Doc = ie.document
        Set myPoints = Doc.getElementsByClassName("pricePerUnit")
        
        For x = 1 To myPoints.Length
            Sheet1.Range("A" & x).Value = myPoints(x - 1).innerText
        Next x
        
        ie.Quit
        Set ie = Nothing
    
    End Sub
    Hope this helps
    Click here for a guide on how to add code tags
    Click here for a guide on how to mark a thread as solved

    Excel 365, Version 2403, Build 17425.20146

  3. #3
    VBAX Newbie
    Joined
    Jun 2018
    Posts
    4
    Location
    Thank you - very kind!

  4. #4
    VBAX Newbie
    Joined
    Jun 2018
    Posts
    4
    Location
    georgiboy, have been able to put your code into my project with a couple of tweaks to reference cells on my sheet - works really well with this example thanks. However, when I try to clone for a different url (see below), I'm back to the runtime error on the first variable I'm trying to get from the website... I'm not understanding the difference and what I need to change - any chance you could help again please???

    Public Declare Sub Sleep Lib "kernel32" (ByVal Milliseconds As Long)
    Sub GetVal()
    Dim ie As Object
    Dim url As String
    Dim Doc As HTMLDocument
    url = "https://www.tesco.com/groceries/en-GB/"
    Set ie = CreateObject("InternetExplorer.Application")
    With ie
    .Visible = True
    .navigate url
    Do While ie.readyState <> 4: Sleep (200): Loop
    Sleep (2000)
    End With

    With Worksheets("Prices")
    row_no = 2
    Do Until .Cells(row_no, 2) = ""

    ie.document.getElementById("search-input").Value = _
    Sheets("Prices").Range("B" & row_no).Value
    ie.document.forms(0).submit
    Do While ie.readyState <> 4: Sleep (200): Loop
    Sleep (2000)


    Set Doc = ie.document
    Set myPoints = Doc.getElementsByClassName("price-per-sellable-unit")

    Sheets("Prices").Range("C" & row_no).Value = myPoints.innerText

    Set myPoints = Doc.getElementsByClassName("price-per-quantity-weight")

    Sheets("Prices").Range("D" & row_no).Value = myPoints.innerText

    row_no = row_no + 1
    Loop
    End With

    ie.Quit
    Set ie = Nothing

  5. #5
    Moderator VBAX Master georgiboy's Avatar
    Joined
    Mar 2008
    Location
    Kent, England
    Posts
    1,188
    Location
    myPoints is holding an array, therefore you will not be able to use "myPoints.innerText"

    In my original code:
    For x = 1 To myPoints.Length    
        Sheet1.Range("A" & x).Value = myPoints(x - 1).innerText
    Next x


    "myPoints(x - 1).innerText" needs to have the number in brackets in order to work.

    "Doc.getElementsByClassName("price-per-sellable-unit")" contains every "price-per-sellable-unit" loaded on that page.

    "
    myPoints.Length" counts how many "price-per-sellable-unit" there are in myPoints.

    This is why we have the loop "
    For x = 1 To myPoints.Length"

    You may also need to change pages and load the next set of "price-per-sellable-unit"

    Worth noting that web scraping is tricky and can be unreliable in the sense that these websites do get updated regularly and can cause issues with your code.

    Hope this helps

    Click here for a guide on how to add code tags
    Click here for a guide on how to mark a thread as solved

    Excel 365, Version 2403, Build 17425.20146

  6. #6
    VBAX Newbie
    Joined
    Jun 2018
    Posts
    4
    Location
    Thanks for the explanation around arrays - makes sense and now having popped that code in it works well. Appreciate it.

  7. #7
    If you are looking for help with thesis writing then I've got just the website for you - it's cheap, it's fast, it've got amazing tech support and professionals working on your projects, so you should feel safe and assured while using this.

Tags for this Thread

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •