Consulting

Results 1 to 8 of 8

Thread: Solved: Check the file encode type?

  1. #1
    VBAX Newbie
    Joined
    Dec 2008
    Posts
    5
    Location

    Question Solved: Check the file encode type?

    Is there any way to check the file encode type? (ANSI or Unicode / UTF=8 etc)

    This can be simply checked by open the file with NOTEPAD and click SAVEAS, then check in the "ENCODE" field.

    Just don't know how to check it with vba, I have tried "ADODB.Stream.Charset" but it always return "Unicode" for all files with different encoder. And "Scripting.FileSystemObject" also can't do that.

    Any idea?

  2. #2
    Knowledge Base Approver VBAX Master Oorang's Avatar
    Joined
    Jan 2007
    Posts
    1,135
    Location
    Great question, here is a rudimentary way to do this:
    [vba]Option Explicit

    Public Enum abCharsets
    abError = 0
    abANSI = 1
    abUnicode = 2
    abUnicodeBigEndian = 3
    abUTF8 = 4
    ebUnknown = 5
    End Enum

    Public Sub Example()
    Dim objStream As ADODB.Stream
    Dim strPath As String
    strPath = "C:\test\Charset_Test\UTF8.txt"
    Set objStream = New ADODB.Stream
    With objStream
    .Open
    .Charset = CharsetToString(ReturnCharset(strPath))
    .LoadFromFile "C:\test\Charset_Test\UTF8.txt"
    MsgBox .ReadText
    End With
    End Sub

    Public Function ReturnCharset(ByVal filePath As String, Optional verifyANSI As Boolean = True) As abCharsets
    Const bytByte0Unicode_c As Byte = 255
    Const bytByte1Unicode_c As Byte = 254
    Const bytByte0UnicodeBigEndian_c As Byte = 254
    Const bytByte1UnicodeBigEndian_c As Byte = 255
    Const bytByte0UTF8_c As Byte = 239
    Const bytByte1UTF8_c As Byte = 187
    Const bytByte2UTF8_c As Byte = 191
    Const lngByte0 As Long = 0
    Const lngByte1 As Long = 1
    Const lngByte2 As Long = 2
    Dim bytHeader() As Byte
    Dim eRtnVal As abCharsets
    On Error GoTo Err_Hnd
    bytHeader() = GetFileBytes(filePath, lngByte2)
    Select Case bytHeader(lngByte0)
    Case bytByte0Unicode_c
    If bytHeader(lngByte1) = bytByte1Unicode_c Then
    eRtnVal = abCharsets.abUnicode
    End If
    Case bytByte0UnicodeBigEndian_c
    If bytHeader(lngByte1) = bytByte1UnicodeBigEndian_c Then
    eRtnVal = abCharsets.abUnicodeBigEndian
    End If
    Case bytByte0UTF8_c
    If bytHeader(lngByte1) = bytByte1UTF8_c Then
    If bytHeader(lngByte2) = bytByte2UTF8_c Then
    eRtnVal = abCharsets.abUTF8
    End If
    End If
    End Select
    If Not CBool(eRtnVal) Then
    If verifyANSI Then
    If IsANSI(filePath) Then
    eRtnVal = abCharsets.abANSI
    Else
    eRtnVal = abCharsets.ebUnknown
    End If
    Else
    eRtnVal = abCharsets.abANSI
    End If
    End If
    Exit_Proc:
    On Error Resume Next
    Erase bytHeader
    ReturnCharset = eRtnVal
    Exit Function
    Err_Hnd:
    eRtnVal = abCharsets.abError
    Resume Exit_Proc
    End Function

    Private Function IsANSI(ByVal filePath As String) As Boolean
    Const lngKeyCodeNullChar_c As Long = 0
    Dim bytFile() As Byte
    Dim lngIndx As Long
    Dim lngUprBnd As Long
    bytFile = GetFileBytes(filePath)
    lngUprBnd = UBound(bytFile)
    For lngIndx = 0 To lngUprBnd
    If bytFile(lngIndx) = lngKeyCodeNullChar_c Then
    Exit For
    End If
    Next
    Erase bytFile
    IsANSI = (lngIndx > lngUprBnd)
    End Function

    Public Function GetFileBytes(ByVal path As String, Optional ByVal truncateToByte As Long = -1) As Byte()
    Dim lngFileNum As Long
    Dim bytRtnVal() As Byte
    If truncateToByte < 0 Then
    truncateToByte = FileLen(path) - 1
    End If
    lngFileNum = FreeFile
    If FileExists(path) Then
    Open path For Binary Access Read As lngFileNum
    ReDim bytRtnVal(truncateToByte) As Byte
    Get lngFileNum, , bytRtnVal
    Close lngFileNum
    End If
    GetFileBytes = bytRtnVal
    Erase bytRtnVal
    End Function

    Public Function FileExists(ByVal filePath As String) As Boolean
    FileExists = CBool(LenB(Dir(filePath, vbHidden + vbNormal + vbSystem + vbReadOnly + vbArchive)))
    End Function
    Public Function CharsetToString(ByVal value As abCharsets) As String
    Dim strRtnVal As String
    Select Case value
    Case abCharsets.abANSI
    strRtnVal = "us-ascii"
    Case abCharsets.abUTF8
    strRtnVal = "utf-8"
    Case Else
    strRtnVal = "Unicode"
    End Select
    CharsetToString = strRtnVal
    End Function
    [/vba]
    Cordially,
    Aaron



    Keep Our Board Clean!
    • Please Mark your thread "Solved" if you get an acceptable response (under thread tools).
    • Enclose your code in VBA tags then it will be formatted as per the VBIDE to improve readability.

  3. #3
    VBAX Newbie
    Joined
    Dec 2008
    Posts
    5
    Location
    Thanks you so much, I've tested with the files and here below's the result:

    XML File:
    ANSI / Unicode: 5


    TEXT File:
    ANSI: 5
    Unicode: 2
    UTF-8: 4

    According to the result code definetion, the code works well with TXT file (Except ANSI) but not XML file. And if strPath is pointing to a file doesn't exist, a file with same name and zero size will be created (This can be solved by "(Dir(strPath) > "")" check at beginning)

    abANSI = 1
    abUnicode = 2
    abUnicodeBigEndian = 3
    abUTF8 = 4
    ebUnknown = 5

  4. #4
    Knowledge Base Approver VBAX Master Oorang's Avatar
    Joined
    Jan 2007
    Posts
    1,135
    Location
    Good catches. Fixed
    Cordially,
    Aaron



    Keep Our Board Clean!
    • Please Mark your thread "Solved" if you get an acceptable response (under thread tools).
    • Enclose your code in VBA tags then it will be formatted as per the VBIDE to improve readability.

  5. #5
    VBAX Newbie
    Joined
    Dec 2008
    Posts
    5
    Location
    Thanks so much! It really works for XML and TXT files now.

    I also find some problems are from my XML files which are auto-generated and zipped from SUN Microsystems. I have to unzip, open and save (without change) to make VBA return correct charset.


    I am sure if I should open a new post: (I will if you suggest)

    (1) Just quick open and save files:
    Use ADODB.Stream right? or any better suggestion?

    (2) Save with another encoder
    Is "ADODB.Stream.Charset" the best way to do it?

  6. #6
    Knowledge Base Approver VBAX Master Oorang's Avatar
    Joined
    Jan 2007
    Posts
    1,135
    Location
    I've updated the code in above to include an example of how to make it all work with an ado stream. As far as "better" goes, it depends on the requirements. The ado stream object should be fine for most of your needs. Unless you find plan on optimizing for speed, I'd stick with it But ado should be fast enough for the majority of tasks I could forsee.
    Cordially,
    Aaron



    Keep Our Board Clean!
    • Please Mark your thread "Solved" if you get an acceptable response (under thread tools).
    • Enclose your code in VBA tags then it will be formatted as per the VBIDE to improve readability.

  7. #7
    VBAX Newbie
    Joined
    Dec 2008
    Posts
    5
    Location
    Great works !

  8. #8
    Thanks for some great code!

    I only needed one change.

    I guess in europe this line:
    strRtnVal = "us-ascii"
    should be:
    strRtnVal = "windows-1252"

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •