// ============================================================================
// Functions for character set conversion
// Copyright (c) 2002, Juergen Haible. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
// ============================================================================

unit uCharsets;

interface

uses Windows, SysUtils, Classes, uCharmaps8, uUnicoding;

type
   Char8  = AnsiChar;  String8  = AnsiString;
   Char16 = WideChar;  String16 = WideString;

const
   // marker for undefined positions
   UNDEFINED_CHAR16 = Char16( $FFFF );

type
   // list of charset names (SP separated, preferred name first)
   TCharsetNamesList = string;

   // base class for all charset handlers
   TCustomCharsetHandler = class
      protected
         FBitsPerChar: Integer;
         FNamesList  : TCharsetNamesList;

         function GetPreferredName: String;

      public
         property PreferredName: String  read GetPreferredName;
         property NamesList    : String  read FNamesList;
         property BitsPerChar  : Integer read FBitsPerChar;

         function To16  ( const In8  : String8;
                          out   Out16: String16 ): Boolean; virtual; abstract;
         function From16( const In16 : String16;
                          out   Out8 : String8  ): Boolean; virtual; abstract;

         constructor Create( const ANamesList: TCharsetNamesList );
   end;

   TCharsets = class
      private
         FSetNames  : TStringList; // of ( Name, TCustomCharsetHandler )
         FHandlers  : TList;       // of TCustomCharsetHandler
         FACPHandler: TCustomCharsetHandler;

         procedure InitCharsets;
         function GetHandler( const CharsetName: String ): TCustomCharsetHandler;
         function GetWindowsACPHandler: TCustomCharsetHandler;

      public
         property CharsetsList: TStringList read FSetNames;
         property HandlersList: TList       read FHandlers;
         property Handler[ const CharsetName: String ]: TCustomCharsetHandler read GetHandler;
         property WindowsACPHandler: TCustomCharsetHandler read GetWindowsACPHandler;

         function Convert( const InCSH : TCustomCharsetHandler;
                           const OutCSH: TCustomCharsetHandler;
                           var   Str8  : String8 ): Boolean; overload;
         function Convert( const InCSN : String;
                           const OutCSN: String;
                           var   Str8  : String8 ): Boolean; overload;
         function Convert( const InCSN : String;
                           const OutCSH: TCustomCharsetHandler;
                           var   Str8  : String8 ): Boolean; overload;
         function Convert( const InCSH : TCustomCharsetHandler;
                           const OutCSN: String;
                           var   Str8  : String8 ): Boolean; overload;

         constructor Create;
         destructor Destroy; override;
   end;

function Charsets: TCharsets;

implementation

uses uCharsets8, uCharsetsU, uCharsetsW;

var
   FCharsets: TCharsets = nil;

function Charsets: TCharsets;
begin
   if not Assigned( FCharsets ) then FCharsets := TCharsets.Create;
   Result := FCharsets;
end;


{ TCustomCharsetHandler }

constructor TCustomCharsetHandler.Create( const ANamesList: TCharsetNamesList );
begin
   inherited Create;
   FNamesList := ANamesList;
   FBitsPerChar := 8;
end;

function TCustomCharsetHandler.GetPreferredName: String;
var  i: Integer;
begin
   i := Pos( ' ', FNamesList );
   if i > 0 then Result := copy( FNamesList, 1, i-1 )
            else Result := FNamesList;
end;

{ TCharsets }

function TCharsets.Convert( const InCSH, OutCSH: TCustomCharsetHandler;
                            var   Str8: String8 ): Boolean;
var  Tmp16: String16;
begin
   Result := InCSH.To16( Str8, Tmp16 );
   Result := Result and OutCSH.From16( Tmp16, Str8 );
end;

function TCharsets.Convert( const InCSN, OutCSN: String;
                            var Str8: String8 ): Boolean;
var  InCSH, OutCSH: TCustomCharsetHandler;
begin
   InCSH  := Handler[ InCSN  ];
   OutCSH := Handler[ OutCSN ];
   if Assigned( InCSH ) and Assigned( OutCSH ) then begin
      Result := Convert( InCSH, OutCSH, Str8 );
   end else begin
      Result := False;
   end;
end;

function TCharsets.Convert( const InCSN: String;
                            const OutCSH: TCustomCharsetHandler;
                            var Str8: String8 ): Boolean;
var  InCSH: TCustomCharsetHandler;
begin
   InCSH  := Handler[ InCSN  ];
   if Assigned( InCSH ) and Assigned( OutCSH ) then begin
      Result := Convert( InCSH, OutCSH, Str8 );
   end else begin
      Result := False;
   end;
end;

function TCharsets.Convert( const InCSH: TCustomCharsetHandler;
                            const OutCSN: String;
                            var Str8: String8 ): Boolean;
var  OutCSH: TCustomCharsetHandler;
begin
   OutCSH := Handler[ OutCSN ];
   if Assigned( InCSH ) and Assigned( OutCSH ) then begin
      Result := Convert( InCSH, OutCSH, Str8 );
   end else begin
      Result := False;
   end;
end;

constructor TCharsets.Create;
begin
   inherited;

   FHandlers := TList.Create;

   FSetNames := TStringList.Create;
   FSetNames.Sorted := True;

   FACPHandler := nil;

   InitCharsets;
end;

destructor TCharsets.Destroy;
var  i: Integer;
begin
   for i:=0 to FHandlers.Count-1 do TCustomCharsetHandler(FHandlers[i]).Free;
   FHandlers.Free;
   FSetNames.Free;
   inherited;
end;

function TCharsets.GetHandler( const CharsetName: String ): TCustomCharsetHandler;
var  i: Integer;
begin
   i := FSetNames.IndexOf( CharsetName );
   if i >= 0 then Result := TCustomCharsetHandler( FSetNames.Objects[ i ] )
             else Result := nil;
end;

function TCharsets.GetWindowsACPHandler: TCustomCharsetHandler;
var  acp: Integer;
begin
   if not Assigned( FACPHandler ) then begin
      acp := GetACP;
      FACPHandler := Handler[ 'windows-' + inttostr(acp) ];
      if not Assigned(FACPHandler) then FACPHandler := Handler['windows-1252'];
   end;

   Result := FACPHandler;
end;

procedure TCharsets.InitCharsets;

   procedure CheckAdd( const CH: TCustomCharsetHandler );
   var  Used: Boolean;
        s, h: String;
        i, k: Integer;
   begin
      Used := False;

      try
         if CH.PreferredName <> '' then begin

            s := CH.NamesList;
            while length( s ) > 0 do begin
               i := Pos( ' ', s );
               if i > 0 then begin
                  h := copy( s, 1, i-1 );
                  s := Trim( copy( s, i+1, MaxInt ) );
               end else begin
                  h := s;
                  s := '';
               end;

               k := FSetNames.IndexOf( h );
               if k < 0 then begin // name not in list yet
                  if not Used then FHandlers.Add( CH );
                  Used := True;
                  FSetNames.AddObject( h, CH );
               end;
            end;

         end;
      finally
         if not Used then CH.Free;
      end;
   end;

var  SR: TSearchRec;
     CodePageNumbers: TStringList;
     i, cp: Integer;
begin
   // add UTF internal ones
   CheckAdd( TUTF7CharsetHandler   .Create );
   CheckAdd( TUTF8CharsetHandler   .Create );
   CheckAdd( TUTF16CharsetHandler  .Create );
   CheckAdd( TUTF16BECharsetHandler.Create );
   CheckAdd( TUTF16LECharsetHandler.Create );

   // add external ones (.csd files in app's directory)
   if FindFirst( '*.csd', faAnyFile, SR ) = 0 then try
      repeat
         CheckAdd( TByteCharsetHandler.Create( SR.Name ) );
      until FindNext( SR ) <> 0;
   finally FindClose( SR ) end;

   // add internal ones, unless already added as external ones
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_US_ASCII,     LOWERMAP_US_ASCII, HALFMAP_UNDEFINED     ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_WINDOWS_1250, LOWERMAP_US_ASCII, UPPERMAP_WINDOWS_1250 ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_WINDOWS_1251, LOWERMAP_US_ASCII, UPPERMAP_WINDOWS_1251 ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_WINDOWS_1252, LOWERMAP_US_ASCII, UPPERMAP_WINDOWS_1252 ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_WINDOWS_1253, LOWERMAP_US_ASCII, UPPERMAP_WINDOWS_1253 ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_WINDOWS_1254, LOWERMAP_US_ASCII, UPPERMAP_WINDOWS_1254 ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_WINDOWS_1255, LOWERMAP_US_ASCII, UPPERMAP_WINDOWS_1255 ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_WINDOWS_1256, LOWERMAP_US_ASCII, UPPERMAP_WINDOWS_1256 ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_WINDOWS_1257, LOWERMAP_US_ASCII, UPPERMAP_WINDOWS_1257 ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_WINDOWS_1258, LOWERMAP_US_ASCII, UPPERMAP_WINDOWS_1258 ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_1,   LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_1   ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_2,   LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_2   ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_3,   LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_3   ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_4,   LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_4   ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_5,   LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_5   ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_6,   LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_6   ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_7,   LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_7   ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_8,   LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_8   ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_9,   LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_9   ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_10,  LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_10  ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_13,  LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_13  ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_14,  LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_14  ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_15,  LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_15  ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_ISO_8859_16,  LOWERMAP_US_ASCII, UPPERMAP_ISO_8859_16  ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_IBM437,       LOWERMAP_US_ASCII, UPPERMAP_IBM437       ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_IBM850,       LOWERMAP_US_ASCII, UPPERMAP_IBM850       ) );
   CheckAdd( TByteCharsetHandler.Create( SETNAMES_KOI8_R,       LOWERMAP_US_ASCII, UPPERMAP_KOI8_R       ) );

   // add installed Windows codepages, unless already added above
   CodePageNumbers := TStringList.Create;
   try
      EnumInstalledWindowsCodePages( CodePageNumbers );
      for i := 0 to CodePageNumbers.Count - 1 do begin
         cp := strtointdef( CodePageNumbers[i], -1 );
         if cp > 0 then CheckAdd( TWindowsCharsetHandler.Create( cp, GetWindowsACPHandler ) );
      end;
   finally CodePageNumbers.Free end;
end;

initialization

finalization
   if Assigned( FCharsets ) then FCharsets.Free;


end.
