Page 1 of 1

Save textbox text to unicode and utf8

Posted: 21 Mar 2014, 15:15
by bsekirarski
How to!
Save textbox - text to unicode and utf8 files

Thanks a lot

Re: Save textbox text to unicode and utf8

Posted: 22 Mar 2014, 16:27
by TiKu
Hi,

The following program requires two command buttons and a text box. It reads the file C:\MyFile.txt, which can be either UTF-16 with BOM, UTF-8 with Signature or UTF-8. And it writes the content of the text box to three different files - one for each encoding.

Code: Select all

Option Explicit
  
  Private Const FILE_ATTRIBUTE_NORMAL = &H80
  Private Const FILE_SHARE_READ = &H1
  Private Const GENERIC_READ = &H80000000
  Private Const GENERIC_WRITE = &H40000000
  Private Const INVALID_HANDLE_VALUE = -1
  Private Const CREATE_ALWAYS = 2
  Private Const OPEN_EXISTING = 3
  
  
  Private Declare Function CloseHandle Lib "kernel32.dll" (ByVal hObject As Long) As Long
  Private Declare Function CreateFile Lib "kernel32.dll" Alias "CreateFileW" (ByVal lpFileName As Long, ByVal dwDesiredAccess As Long, ByVal dwShareMode As Long, ByVal lpSecurityAttributes As Long, ByVal dwCreationDisposition As Long, ByVal dwFlagsAndAttributes As Long, ByVal hTemplateFile As Long) As Long
  Private Declare Function GetFileSize Lib "kernel32.dll" (ByVal hFile As Long, ByVal lpFileSizeHigh As Long) As Long
  Private Declare Function MultiByteToWideChar Lib "kernel32.dll" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpMultiByteStr As Long, ByVal cbMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
  Private Declare Function lstrcpyn Lib "kernel32.dll" Alias "lstrcpynW" (ByVal lpString1 As Long, ByVal lpString2 As Long, ByVal iMaxLength As Long) As Long
  Private Declare Function lstrlen Lib "kernel32.dll" Alias "lstrlenW" (ByVal lpString As Long) As Long
  Private Declare Function ReadFile Lib "kernel32.dll" (ByVal hFile As Long, ByVal lpBuffer As Long, ByVal nNumberOfBytesToRead As Long, ByVal lpNumberOfBytesRead As Long, ByVal lpOverlapped As Long) As Long
  Private Declare Function SetFilePointer Lib "kernel32.dll" (ByVal hFile As Long, ByVal lDistanceToMove As Long, ByVal lpDistanceToMoveHigh As Long, ByVal dwMoveMethod As Long) As Long
  Private Declare Function WideCharToMultiByte Lib "kernel32.dll" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByVal lpMultiByteStr As Long, ByVal cbMultiByte As Long, ByVal lpDefaultChar As Long, ByVal lpUsedDefaultChar As Long) As Long
  Private Declare Function WriteFile Lib "kernel32.dll" (ByVal hFile As Long, ByVal lpBuffer As Long, ByVal nNumberOfBytesToWrite As Long, ByVal lpNumberOfBytesWritten As Long, ByVal lpOverlapped As Long) As Long

Private Sub cmdLoad_Click()
  Const CP_UTF8 As Long = 65001
  Const FILE_BEGIN As Long = 0
  Dim bytesRead As Long
  Dim filePath As String
  Dim fileSize As Long
  Dim hFile As Long
  Dim buffer() As Byte
  Dim txt As String
  
  filePath = "C:\MyFile.txt"
  hFile = CreateFile(StrPtr(filePath), GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0)
  If hFile <> INVALID_HANDLE_VALUE Then
    fileSize = GetFileSize(hFile, 0)
    If fileSize > 0 Then
      ReDim buffer(0 To fileSize - 1) As Byte
      ' try to read the BOM/UTF-8 signature
      Call ReadFile(hFile, VarPtr(buffer(0)), 3, VarPtr(bytesRead), 0)
      If bytesRead = 3 Then
        If buffer(0) = 255 And buffer(1) = 254 Then
          ' UTF-16 (LE) - just read directly to the String variable
          Call SetFilePointer(hFile, 2, 0, FILE_BEGIN)
          txt = String$(fileSize - 2, 0)     ' skip the BOM
          Call ReadFile(hFile, StrPtr(txt), fileSize - 2, VarPtr(bytesRead), 0)
        ElseIf buffer(0) = 239 And buffer(1) = 187 And buffer(2) = 191 Then
          ' UTF-8 - needs conversion
          Call ReadFile(hFile, VarPtr(buffer(0)), fileSize - 3, VarPtr(bytesRead), 0)
          txt = String$(fileSize - 3, 0)     ' skip the BOM
          Call MultiByteToWideChar(CP_UTF8, 0, VarPtr(buffer(0)), bytesRead, StrPtr(txt), fileSize - 3)
        End If
      End If
      If Len(txt) = 0 Then
        ' assume UTF-8 without signature
        Call SetFilePointer(hFile, 0, 0, FILE_BEGIN)
        Call ReadFile(hFile, VarPtr(buffer(0)), fileSize, VarPtr(bytesRead), 0)
        txt = String$(fileSize, 0)
        Call MultiByteToWideChar(CP_UTF8, 0, VarPtr(buffer(0)), bytesRead, StrPtr(txt), fileSize)
      End If
    End If
    Call CloseHandle(hFile)
  End If
  
  TextBox1.Text = txt
End Sub

Private Sub cmdSave_Click()
  Const CP_UTF8 As Long = 65001
  Dim bytesWritten As Long
  Dim filePath As String
  Dim hFile As Long
  Dim buffer() As Byte
  Dim txt As String
  Dim l As Long
  
  txt = TextBox1.Text
  
  filePath = "D:\UTF16 BOM.txt"
  hFile = CreateFile(StrPtr(filePath), GENERIC_WRITE, FILE_SHARE_READ, 0, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0)
  If hFile <> INVALID_HANDLE_VALUE Then
    ' UTF-16 (LE), write the BOM
    ReDim buffer(0 To 1) As Byte
    buffer(0) = 255
    buffer(1) = 254
    Call WriteFile(hFile, VarPtr(buffer(0)), 2, VarPtr(bytesWritten), 0)
    ' now write the text
    Call WriteFile(hFile, StrPtr(txt), LenB(txt), VarPtr(bytesWritten), 0)
    Call CloseHandle(hFile)
  End If
  
  filePath = "D:\UTF8 Signature.txt"
  hFile = CreateFile(StrPtr(filePath), GENERIC_WRITE, FILE_SHARE_READ, 0, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0)
  If hFile <> INVALID_HANDLE_VALUE Then
    ' UTF-8, write the signature
    ReDim buffer(0 To 2) As Byte
    buffer(0) = 239
    buffer(1) = 187
    buffer(2) = 191
    Call WriteFile(hFile, VarPtr(buffer(0)), 3, VarPtr(bytesWritten), 0)
    ' now convert the text
    l = WideCharToMultiByte(CP_UTF8, 0, StrPtr(txt), Len(txt), 0, 0, 0, 0)
    ReDim buffer(0 To l - 1) As Byte
    Call WideCharToMultiByte(CP_UTF8, 0, StrPtr(txt), Len(txt), VarPtr(buffer(0)), l, 0, 0)
    ' and write it to the file
    Call WriteFile(hFile, VarPtr(buffer(0)), l, VarPtr(bytesWritten), 0)
    Call CloseHandle(hFile)
  End If
  
  filePath = "D:\UTF8.txt"
  hFile = CreateFile(StrPtr(filePath), GENERIC_WRITE, FILE_SHARE_READ, 0, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0)
  If hFile <> INVALID_HANDLE_VALUE Then
    ' UTF-8 without signature
    ' convert the text
    l = WideCharToMultiByte(CP_UTF8, 0, StrPtr(txt), Len(txt), 0, 0, 0, 0)
    ReDim buffer(0 To l - 1) As Byte
    Call WideCharToMultiByte(CP_UTF8, 0, StrPtr(txt), Len(txt), VarPtr(buffer(0)), l, 0, 0)
    ' and write it to the file
    Call WriteFile(hFile, VarPtr(buffer(0)), l, VarPtr(bytesWritten), 0)
    Call CloseHandle(hFile)
  End If
End Sub
Regards
TiKu