// ============================================================================
// Handler for 8-bit character sets
// Copyright (c) 2002, Juergen Haible. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
// ============================================================================

unit uCharsets8;

interface

uses SysUtils, Classes, uCharsets, uCharmaps8, uUnicoding;

type
   // mapping of byte charsets to unicode
   TByteCharsetMap      = array[ Char8 ] of Char16;
   TByteCharsetLowerMap = array[   #0 .. #127 ] of Char16;
   TByteCharsetUpperMap = array[ #128 .. #255 ] of Char16;

   // handler for 8 bit charsets (ISO-8859-1, windows-1252, ...)
   TByteCharsetHandler = class( TCustomCharsetHandler )
      private
         FMap: TByteCharsetMap;

         procedure SetFromDef ( const DefList: TStringList );
         procedure SetFromFile( const Filename: String );

      public
         property Map: TByteCharsetMap read FMap;

         function To16  ( const In8  : String8;
                          out   Out16: String16 ): Boolean; override;
         function From16( const In16 : String16;
                          out   Out8 : String8  ): Boolean; override;

         constructor Create( const Filename: String ); overload;
         constructor Create( const DefList: TStringList ); overload;
         constructor Create( const ANamesList: TCharsetNamesList;
                             const AMap      : TByteCharsetMap ); overload;
         constructor Create( const ANamesList: TCharsetNamesList;
                             const LMap, UMap: TByteCharsetHalfMap ); overload;
   end;


implementation

{ TByteCharsetHandler }

constructor TByteCharsetHandler.Create( const ANamesList: TCharsetNamesList;
                                        const AMap      : TByteCharsetMap );
begin
   inherited Create( ANamesList );
   Move( AMap, FMap, sizeof( TByteCharsetMap ) );
end;

constructor TByteCharsetHandler.Create( const ANamesList: TCharsetNamesList;
                                        const LMap, UMap: TByteCharsetHalfMap);
begin
   inherited Create( ANamesList );
   Move( LMap, FMap, sizeof( TByteCharsetHalfMap ) );
   Move( UMap, FMap[#128], sizeof( TByteCharsetHalfMap ) );
end;

constructor TByteCharsetHandler.Create( const Filename: String );
var  i: Integer;
begin
   inherited Create( '' );
   for i := 0 to 255 do FMap[ chr(i) ] := UNDEFINED_CHAR16;
   SetFromFile( Filename );
end;

constructor TByteCharsetHandler.Create( const DefList: TStringList );
var  i: Integer;
begin
   inherited Create( '' );
   for i := 0 to 255 do FMap[ chr(i) ] := UNDEFINED_CHAR16;
   SetFromDef( DefList );
end;

procedure TByteCharsetHandler.SetFromDef( const DefList: TStringList );
var  i, k, p: Integer;
     s: String;
begin
   if DefList.Count < 18 then exit; // 1 Marker, 1 Names, 16 Mapping
   if UpperCase( DefList[0] ) <> '.CS1' then exit;
   if DefList[1] = '' then exit; // Names
   for i:= 2 to 17 do if length(DefList[i]) < 4+15*(1+4) then exit; // Mapping

   FNamesList := DefList[1];

   for p := 0 to 255 do FMap[ chr(p) ] := UNDEFINED_CHAR16;
   p := 0;
   for i := 2 to 17 do begin
      for k := 0 to 15 do begin
         s := copy( DefList[i], k*5+1, 4 );
         FMap[ chr(p) ] := char16( strtoint( '$' + s ) and $FFFF );
         inc( p );
      end;
   end;
end;

procedure TByteCharsetHandler.SetFromFile( const Filename: String );
var  slFile, slDef: TStringList;
     iFile: Integer;
     InDef: Boolean;
     s: String;
begin
   slFile := TStringList.Create;
   slDef  := TStringList.Create;

   try
      try
         slFile.LoadFromFile( Filename );
      except
         exit;
      end;

      iFile := 0;
      InDef := False;
      slDef.Clear;

      while iFile < slFile.Count do begin
         s := slFile[ iFile ];
         if copy( s, 1, 1 ) = '#' then s := '';

         if s <> '' then begin
            if s[1] = '.' then begin
               if InDef then break;
               InDef := True;
            end;

            if InDef then slDef.Add( s );
         end;

         inc( iFile );
      end;

      if slDef.Count > 0 then SetFromDef( slDef );

   finally
      slDef.Free;
      slFile.Free;
   end;
end;

function TByteCharsetHandler.To16( const In8  : String8;
                                   out   Out16: String16 ): Boolean;
var  InLen, i: Integer;
begin
   Result := True;

   InLen := Length( In8 );
   SetLength( Out16, InLen );

   for i := 1 to InLen do begin
      Out16[ i ] := FMap[ In8[i] ];
      if Out16[ i ] = UNDEFINED_CHAR16 then Result := False;
   end;
end;

function TByteCharsetHandler.From16( const In16: String16;
                                     out   Out8: String8 ): Boolean;
var  i, k, Bytes: Integer;
     c16: UTF16;
     fnd: Boolean;
begin
   Result := True;

   Bytes := length( In16 );
   SetLength( Out8, Bytes );

   for i := 1 to Bytes do begin
      c16 := UTF16( In16[ i ] );

      if ( c16 <= $7F ) then begin // ASCII
         Out8[ i ] := chr( c16 );
      end else if ( c16 <= $FF ) and // same pos?
                  ( UTF16( FMap[chr(c16)] ) = c16 ) then begin
         Out8[ i ] := chr( c16 );
      end else begin // scan map
         fnd := False;
         for k := 255 downto 0 do begin
            if UTF16( FMap[chr(k)] ) = c16 then begin
               if c16 = UTF16( UNDEFINED_CHAR16 ) then begin
                  Result := False;
               end else begin
                  Out8[ i ] := chr(k);
                  fnd := True;
               end;
               break;
            end;
         end;
         if not fnd then begin Out8[ i ] := '?'; Result := False end;
      end;
   end;
end;

end.
