Consulting

Results 1 to 2 of 2

Thread: Getting <Title> out of HTML source code

  1. #1
    VBAX Newbie
    Joined
    Apr 2008
    Posts
    2
    Location

    Getting <Title> out of HTML source code

    Hello

    Kind of new to VBA and need some tips on how to get what is in between <Title>....</Title> tags in a text file containing HTML source code into column 3. The VBA code searches through many folders and subfolders finding any file that is .html and reading it. I tried parsing but can't seem to get it to work. Any ideas? Here is the code that I have (that doesn't work)

    Sub CheckTextFilesForHREFs()
    MsgBox "Press OK to begin report"
    Dim WholeLine As String
    Dim myPath As String
    Dim workfile As String
    Dim myR As Long

    myPath = "C:\Exelon\"
    workfile = Dir(myPath & "*.html")
    'sLine = WholeLine

    Set fs = Application.FileSearch
    With fs
    .LookIn = "C:\Exelon"
    .Filename = ".html"
    .SearchSubFolders = True
    '.FileType = mosFileTypeAllFiles
    If .Execute(SortBy:=msoSortByFileName, _
    SortOrder:=msoSortOrderAscending) > 0 Then
    MsgBox "There were " & .FoundFiles.Count & _
    " file(s) found."
    For i = 1 To .FoundFiles.Count
    ParseURL .FoundFiles(i)
    ParseTitle .FoundFiles(i)
    ParseLink .FoundFiles(i)
    Next i

    Else
    MsgBox "There were no files found."
    End If
    End With

    Sub ParseTitle(strFile As String)
    Dim strTxt As String, lngTxt As Long, i As Long, oMatches
    Dim ws As Worksheet, j As Long, k As Long, m As Long, oMatches2
    Dim reg, oMatches3, reg2
    i = FreeFile
    'strFile = "c:\Users\Richard\Documents\Htmltest.html"
    lngTxt = FileLen(strFile)
    strTxt = Space(lngTxt)
    Open strFile For Binary Access Read As #i
    Get #i, , strTxt
    Close #i
    Debug.Print strTxt
    With CreateObject("vbscript.regexp")
    .Global = True
    .ignorecase = True
    .Pattern = vbCrLf & ".*?title.*?(?=" & vbCrLf & ")"
    If .test(strTxt) Then
    Set oMatches = .Execute(strTxt)
    For i = 0 To oMatches.Count - 1
    Set reg = CreateObject("vbscript.regexp")
    With reg
    .Global = True
    .ignorecase = True
    .Pattern = "<title>\""(.*?)\"""
    k = Cells(Rows.Count, 1).End(xlUp).Offset(1).Row
    Cells(k, 1).Value = strFile
    If .test(oMatches(i)) Then
    Set oMatches2 = .Execute(oMatches(i))
    For j = 0 To oMatches2.Count - 1
    Cells(k, j + 3) = .Replace(oMatches2(j), "$1")
    Next j
    End If
    End With
    Next i
    End If
    End With
    End Sub

  2. #2
    Distinguished Lord of VBAX VBAX Grand Master Bob Phillips's Avatar
    Joined
    Apr 2005
    Posts
    25,453
    Location
    Check out Find in VBA help.
    ____________________________________________
    Nihil simul inventum est et perfectum

    Abusus non tollit usum

    Last night I dreamed of a small consolation enjoyed only by the blind: Nobody knows the trouble I've not seen!
    James Thurber

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •