Private Sub btnRetrieveData_Click()
' using Selenium to grab the date from the website
' requires Selenium be installed from https://github.com/florentbr/SeleniumBasic/releases/tag/v2.0.9.0
' and a reference to Selenium Type Librarybe made
Dim cDriver As chromeDriver
Dim rfqDatesDict As Scripting.Dictionary
Dim rfqDate As Variant
Dim rfqResult As String
Dim logRow As Long
' is the date to run from an actual date?
On Error Resume Next
If CDate(Sheet1.Cells(2, 2).Value) > Date Then
If Err > 0 Then
MsgBox "The 'Since' date is invalid"
Exit Sub
End If
End If
On Error GoTo 0
' where to start writing the progress information
logRow = Sheet1.Cells(Sheet1.Rows.Count, "A").End(xlUp).Row + 1
' create this and use this one instance throught the code
Set cDriver = New chromeDriver
' get the dates and their associated links, any error here stops everything
On Error Resume Next
Sheet1.Cells(logRow, 1).Value = Format(Date, "MM-DD-YYYY") & " checking for RFQ dates from " & Sheet1.Cells(2, 2).Value
Sheet1.Cells(logRow, 1).Font.Bold = True
logRow = logRow + 1
DoEvents
Set rfqDatesDict = GetRFQDates(cDriver)
If Err <> 0 Then
MsgBox "Error collecting the RFQ dates! Error: " & Error(Err), vbCritical, "ERROR"
cDriver.Quit
Set cDriver = Nothing
Exit Sub
End If
Sheet1.Cells(logRow, 1).Value = Format(Date, "MM-DD-YYYY") & " checking for RFQ dates since " & Sheet1.Cells(2, 2).Value & " found " & CStr(rfqDatesDict.Count)
On Error GoTo 0
' loop the links to extract the data from their page into
' a new sheet named as the date
For Each rfqDate In rfqDatesDict.Keys
' show the progress
Sheet1.Range("A" & CStr(logRow)).EntireRow.Font.Color = vbBlack ' reset font color
Sheet1.Cells(logRow, 2).Value = rfqDate
Sheet1.Cells(logRow, 3).Value = "Reading date's data..."
DoEvents
' call the function that retrieves the RFQ info & creates a new sheet
rfqResult = GetRFQDatesData(rfqDatesDict(rfqDate), cDriver, rfqDate)
' anything but "" is an error
If rfqResult <> "" Then
' error returned, write it to the log and color it red
Sheet1.Cells(logRow, 3).Value = rfqResult
Sheet1.Range("A" & CStr(logRow)).EntireRow.Font.Color = vbRed
Else
' successful processing
Sheet1.Cells(logRow, 3).Value = "New sheet created for " & rfqDate
End If
DoEvents
logRow = logRow + 1
Next rfqDate
' clean up
On Error Resume Next
cDriver.Close
Set cDriver = Nothing
Set rfqDatesDict = Nothing
Sheet1.Cells(logRow, 1).Value = "All dates available have been processed"
Sheet1.Cells(logRow, 1).Font.Bold = True
DoEvents
MsgBox "Done."
End Sub
Private Function GetRFQDates(cDriver As chromeDriver) As Scripting.Dictionary
' first process is to gather the dates and their associated URLs
Dim tblRow As WebElement
Dim tblData As WebElement
Dim rfqDateURLs As Scripting.Dictionary
Dim fromDate As Date
Dim pageDate As Date
cDriver.Get "https://www.dibbs.bsm.dla.mil/RFQ/RfqDates.aspx?category=close"
' wait 5 seconds to allow for chrome to load and display the page
cDriver.Wait 5000
cDriver.FindElementById("butAgree").Click ' try to click the Ok button
' wait 5 seconds to allow for chrome move to the next page
cDriver.Wait 5000
fromDate = CDate(Sheet1.Cells(2, 2).Value)
Set rfqDateURLs = New Scripting.Dictionary
' get the table rows, loop through them to retrieve the data
For Each tblRow In cDriver.FindElementById("ctl00_cph1_dtlDateList").FindElementsByTag("tr")
' get each row of data from the web page table
For Each tblData In tblRow.FindElementsByTag("td")
' if the date is equal to or great than the date
' specified on sheet1 add it to the dictionary for processing
If Trim(tblData.Text) <> "" Then
' there is a date in the table cell
pageDate = CDate(tblData.Text)
If pageDate >= fromDate Then
rfqDateURLs.Add Format(pageDate, "MM-DD-YYYY"), tblData.FindElementByTag("a").Attribute("href")
End If
End If
Next tblData
Next tblRow
Set GetRFQDates = rfqDateURLs
End Function
Private Function GetRFQDatesData(dateURL As Variant, cDriver As chromeDriver, sheetName As Variant) As String
' each dates URL is passed in, this creates a new sheet, and then
' writes the table data found on the URL
Dim rfqTable As WebElement
Dim resultString As String
cDriver.Get dateURL
' wait 5 seconds to allow for chrome to load and display the page
cDriver.Wait 5000
' get the number of records found for the date
Sheet1.Cells(Sheet1.Cells(Sheet1.Rows.Count, "C").End(xlUp).Row, "E").Value = cDriver.FindElementById("ctl00_cph1_lblRecCount").Text
' first look to see if the RFQ grid is available
On Error Resume Next
Set rfqTable = cDriver.FindElementById("ctl00_cph1_grdRfqSearch")
If rfqTable Is Nothing Then
' the required table isn't available, is the okay button in the way?
cDriver.FindElementById("butAgree").Click ' try to click the Ok button
If Err = 0 Then
' wait 5 seconds to allow for chrome to load and display the page
cDriver.Wait 5000
' successfully clicked the okay button, try to get the table again to move on
Set rfqTable = cDriver.FindElementById("ctl00_cph1_grdRfqSearch")
If Not rfqTable Is Nothing Then
' all good go for it
resultString = CreateSheetWriteData(rfqTable, CStr(sheetName), cDriver)
Else
' give up on this page
resultString = "Issue during processing no Okay button or RFQ data grid found! Error: " & Error(Err)
GoTo UnableToProcessTheURLData
End If
Else
' encountered an unknown page or the link is wrong
resultString = "Issue during processing no Okay button or RFQ data grid found! Error: " & Error(Err)
GoTo UnableToProcessTheURLData
End If
Else
' the expected table is available
resultString = CreateSheetWriteData(rfqTable, CStr(sheetName), cDriver)
End If
Set rfqTable = Nothing
UnableToProcessTheURLData:
' there was a problem trying to process the information on the page
' passed in for the RFQ date
If Err <> 0 And Err <> 7 Then resultString = "Encountered an error : " & Error(Err)
GetRFQDatesData = resultString
End Function
Private Function CreateSheetWriteData(rfqTable As WebElement, sheetName As String, cDriver As chromeDriver) As String
Dim rowNum As Long
Dim columCount As Integer
Dim tblRow As WebElement
Dim rfqDateWS As Worksheet
Dim rfqTablePages As WebElement
Dim cl As Range
' create a new sheet for the dates data
On Error Resume Next
Set rfqDateWS = ThisWorkbook.Sheets(sheetName)
If Err <> 0 Then
' no sheet with this name exists, create it
Set rfqDateWS = ThisWorkbook.Sheets.Add(After:=ThisWorkbook.Sheets(ThisWorkbook.Sheets.Count))
rfqDateWS.Name = sheetName
' display sheet1 again
Sheet1.Activate
DoEvents
Else
' there is a sheet in this workbook with this date
If MsgBox("A sheet named '" & sheetName & "' exists. Delete all its data and rewrite the data?", vbYesNo, "Existing Sheet") = vbNo Then
' the user has selected to not overwrite the existing sheets data
CreateSheetWriteData = "User cancelled overwrite of sheet: " & sheetName
Set rfqDateWS = Nothing
Exit Function
Else
' clear out the current data on the existing worksheet
rfqDateWS.Range("A1:I" & Sheet1.Cells(Sheet1.Rows.Count, "A").End(xlUp).Row).Clear
End If
End If
' format the table header row
rfqDateWS.Cells(1, 1).ColumnWidth = 10
rfqDateWS.Cells(1, 2).ColumnWidth = 20
rfqDateWS.Cells(1, 3).ColumnWidth = 30
rfqDateWS.Cells(1, 4).ColumnWidth = 15
rfqDateWS.Cells(1, 5).ColumnWidth = 17
rfqDateWS.Cells(1, 6).ColumnWidth = 15
rfqDateWS.Cells(1, 7).ColumnWidth = 15
rfqDateWS.Cells(1, 8).ColumnWidth = 12
rfqDateWS.Cells(1, 9).ColumnWidth = 12
For Each cl In rfqDateWS.Range("A1:I1").Cells
cl.Interior.Color = RGB(22, 54, 92)
cl.Font.Color = vbWhite
Next cl
' loop through the tables rows to find the data to write to the sheet
Sheet1.Cells(Sheet1.Cells(Sheet1.Rows.Count, "C").End(xlUp).Row, 3).Value = "Reading Page 1"
DoEvents
rowNum = 1
For Each tblRow In rfqTable.FindElementsByTag("tr")
' there are specific table rows to get data from
If tblRow.Attribute("class") = "AwdRecs" Or tblRow.Attribute("class") = "BgWhite" Or tblRow.Attribute("class") = "BgSilver" Then
' get the column headers from the web page table
On Error GoTo 0
columCount = 1
For Each tblHeader In tblRow.FindElementsByTag("th")
rfqDateWS.Cells(rowNum, columCount).Value = tblHeader.Text
columCount = columCount + 1
Next tblHeader
' get each row of data from the web page table
columCount = 1
For Each tblData In tblRow.FindElementsByTag("td")
rfqDateWS.Cells(rowNum, columCount).Value = tblData.Text
columCount = columCount + 1
Next tblData
rowNum = rowNum + 1
End If
Next tblRow
Set rfqTablesPages = Nothing
' check for pages on the RFQ data sheet
Set rfqTablesPages = cDriver.FindElementByClass("pagination")
If Not IsEmpty(rfqTablesPages) Then
' there is a page table present
' call the procedure that will handle writting each page to the current sheet
' starting at the last row used above
WriteDataFromPages cDriver, rfqDateWS, rowNum, 2
Set rfqTablesPages = Nothing
End If
Set rfqTablesPages = Nothing
Set rfqDateWS = Nothing
On Error GoTo 0
CreateSheetWriteData = ""
End Function
Private Function WriteDataFromPages(cDriver As chromeDriver, rfqDateWS As Worksheet, rowNum As Long, rfqLoadPageNumber As Integer) As String
' the page for the RFQ date has a table that contains multiple pages
Dim rfqTablesPages As WebElement
Dim rfqPage As WebElement
Dim rfqTable As WebElement
Dim tblRow As WebElement
Dim columCount As Integer
Dim resultString As String
Dim ignoreFirstElipse As Boolean
On Error GoTo PageProcessingError:
ignoreFirstElipse = False
Sheet1.Cells(Sheet1.Cells(Sheet1.Rows.Count, "C").End(xlUp).Row, 3).Value = "Reading Page " & CStr(rfqLoadPageNumber)
DoEvents
Set rfqTablesPages = cDriver.FindElementByClass("pagination")
For Each rfqPage In rfqTablesPages.FindElementsByTag("td")
If rfqPage.Text = CStr(rfqLoadPageNumber) Then
' make the website load the pagination page
Set rfqTablesPages = Nothing
rfqPage.FindElementByTag("a").Click
cDriver.Wait 5000
' get the rows from the refreshed table for the page number
Set rfqTable = cDriver.FindElementById("ctl00_cph1_grdRfqSearch")
' skipping the TH elements because they are already in the sheet
For Each tblRow In rfqTable.FindElementsByTag("tr")
' only include the rows of data as indicated by their background
If tblRow.Attribute("class") = "BgWhite" Or tblRow.Attribute("class") = "BgSilver" Then
' get each row of data from the web page table
columCount = 1
For Each tblData In tblRow.FindElementsByTag("td")
rfqDateWS.Cells(rowNum, columCount).Value = tblData.Text
columCount = columCount + 1
Next tblData
rowNum = rowNum + 1
End If
Next tblRow
Set rfqTable = Nothing
rfqLoadPageNumber = rfqLoadPageNumber + 1
Set rfqTablesPages = Nothing
WriteDataFromPages cDriver, rfqDateWS, rowNum, rfqLoadPageNumber
ElseIf rfqPage.Text = "..." And ignoreFirstElipse = False Then
' encountered the "next group of pages" button
rfqPage.FindElementByTag("a").Click
cDriver.Wait 5000
rfqLoadPageNumber = rfqLoadPageNumber + 1
Set rfqTablesPages = Nothing
WriteDataFromPages cDriver, rfqDateWS, rowNum, rfqLoadPageNumber
ElseIf rfqPage.Text = "First" Then
' there is sometimes an elipses after the "First" navigation button, this can ge ignored
ignoreFirstElipse = True
End If
Next rfqPage
Set rfqTablesPages = Nothing
Exit Function
PageProcessingError:
resultString = Error(Err)
End Function