restemplate/indy/Protocols/IdHeaderCoder2022JP.pas

278 lines
9.3 KiB
Plaintext

unit IdHeaderCoder2022JP;
interface
{$i IdCompilerDefines.inc}
{RLebeau: TODO - move this logic into an IIdTextEncoding implementation}
uses
IdGlobal, IdHeaderCoderBase;
type
TIdHeaderCoder2022JP = class(TIdHeaderCoder)
public
class function Decode(const ACharSet: string; const AData: TIdBytes): String; override;
class function Encode(const ACharSet, AData: String): TIdBytes; override;
class function CanHandle(const ACharSet: String): Boolean; override;
end;
// RLebeau 4/17/10: this forces C++Builder to link to this unit so
// RegisterHeaderCoder can be called correctly at program startup...
{$IFDEF HAS_DIRECTIVE_HPPEMIT_LINKUNIT}
{$HPPEMIT LINKUNIT}
{$ELSE}
{$HPPEMIT '#pragma link "IdHeaderCoder2022JP"'}
{$ENDIF}
implementation
uses
SysUtils;
const
// RLebeau 1/7/09: using integers for #128-#255 because in D2009, the compiler
// may change characters >= #128 from their Ansi codepage value to their true
// Unicode codepoint value, depending on the codepage used for the source code.
// For instance, #128 may become #$20AC...
kana_tbl : array[161..223{#$A1..#$DF}] of Word = (
$2123,$2156,$2157,$2122,$2126,$2572,$2521,$2523,$2525,$2527,
$2529,$2563,$2565,$2567,$2543,$213C,$2522,$2524,$2526,$2528,
$252A,$252B,$252D,$252F,$2531,$2533,$2535,$2537,$2539,$253B,
$253D,$253F,$2541,$2544,$2546,$2548,$254A,$254B,$254C,$254D,
$254E,$254F,$2552,$2555,$2558,$255B,$255E,$255F,$2560,$2561,
$2562,$2564,$2566,$2568,$2569,$256A,$256B,$256C,$256D,$256F,
$2573,$212B,$212C);
vkana_tbl : array[161..223{#$A1..#$DF}] of Word = (
$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$2574,$0000,
$0000,$252C,$252E,$2530,$2532,$2534,$2536,$2538,$253A,$253C,
$253E,$2540,$2542,$2545,$2547,$2549,$0000,$0000,$0000,$0000,
$0000,$2550,$2553,$2556,$2559,$255C,$0000,$0000,$0000,$0000,
$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
$0000,$0000,$0000);
sj1_tbl : array[128..255{#128..#255}] of byte = (
$00,$21,$23,$25,$27,$29,$2B,$2D,$2F,$31,$33,$35,$37,$39,$3B,$3D,
$3F,$41,$43,$45,$47,$49,$4B,$4D,$4F,$51,$53,$55,$57,$59,$5B,$5D,
$00,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,
$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,
$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,
$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,
$5F,$61,$63,$65,$67,$69,$6B,$6D,$6F,$71,$73,$75,$77,$79,$7B,$7D,
$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$00,$00,$00);
sj2_tbl : array[0..255{#0..#255}] of Word = (
$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
$0000,$0000,$0000,$0000,$0021,$0022,$0023,$0024,$0025,$0026,
$0027,$0028,$0029,$002A,$002B,$002C,$002D,$002E,$002F,$0030,
$0031,$0032,$0033,$0034,$0035,$0036,$0037,$0038,$0039,$003A,
$003B,$003C,$003D,$003E,$003F,$0040,$0041,$0042,$0043,$0044,
$0045,$0046,$0047,$0048,$0049,$004A,$004B,$004C,$004D,$004E,
$004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,
$0059,$005A,$005B,$005C,$005D,$005E,$005F,$0000,$0060,$0061,
$0062,$0063,$0064,$0065,$0066,$0067,$0068,$0069,$006A,$006B,
$006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074,$0075,
$0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$0121,
$0122,$0123,$0124,$0125,$0126,$0127,$0128,$0129,$012A,$012B,
$012C,$012D,$012E,$012F,$0130,$0131,$0132,$0133,$0134,$0135,
$0136,$0137,$0138,$0139,$013A,$013B,$013C,$013D,$013E,$013F,
$0140,$0141,$0142,$0143,$0144,$0145,$0146,$0147,$0148,$0149,
$014A,$014B,$014C,$014D,$014E,$014F,$0150,$0151,$0152,$0153,
$0154,$0155,$0156,$0157,$0158,$0159,$015A,$015B,$015C,$015D,
$015E,$015F,$0160,$0161,$0162,$0163,$0164,$0165,$0166,$0167,
$0168,$0169,$016A,$016B,$016C,$016D,$016E,$016F,$0170,$0171,
$0172,$0173,$0174,$0175,$0176,$0177,$0178,$0179,$017A,$017B,
$017C,$017D,$017E,$0000,$0000,$0000);
class function TIdHeaderCoder2022JP.Decode(const ACharSet: String; const AData: TIdBytes): String;
var
T : string;
I, L : Integer;
isK : Boolean;
K1, K2 : Byte;
K3 : Byte;
begin
T := ''; {Do not Localize}
isK := False;
L := Length(AData);
I := 0;
while I < L do
begin
if AData[I] = 27 then
begin
Inc(I);
if (I+1) < L then
begin
if (AData[I] = Ord('$')) and (AData[I+1] = Ord('B')) then begin {do not localize}
isK := True;
end
else if (AData[I] = Ord('(')) and (AData[I+1] = Ord('B')) then begin {do not localize}
isK := False;
end;
Inc(I, 2); { TODO -oTArisawa : Check RFC 1468}
end;
end
else if isK then
begin
if (I+1) < L then
begin
K1 := AData[I];
K2 := AData[I+1];
K3 := (K1 - 1) shr 1;
if K1 < 95 then begin
K3:= K3 + 113;
end else begin
K3 := K3 + 177;
end;
if (K1 mod 2) = 1 then
begin
if K2 < 96 then begin
K2 := K2 + 31;
end else begin
K2 := K2 + 32;
end;
end
else begin
K2 := K2 + 126;
end;
T := T + Char(K3) + Char(k2);
Inc(I, 2);
end
else begin
Inc(I); { invalid DBCS }
end;
end
else
begin
T := T + Char(AData[I]);
Inc(I);
end;
end;
Result := T;
end;
class function TIdHeaderCoder2022JP.Encode(const ACharSet, AData: String): TIdBytes;
const
desig_asc: array[0..2] of Byte = (27, Ord('('), Ord('B')); {Do not Localize}
desig_jis: array[0..2] of Byte = (27, Ord('$'), Ord('B')); {Do not Localize}
var
T: TIdBytes;
I, L: Integer;
isK: Boolean;
K1: Byte;
K2, K3: Word;
begin
SetLength(T, 0);
isK := False;
L := Length(AData);
I := 1;
while I <= L do
begin
if Ord(AData[I]) < 128 then {Do not Localize}
begin
if isK then
begin
AppendByte(T, 27);
AppendByte(T, Ord('(')); {Do not Localize}
AppendByte(T, Ord('B')); {Do not Localize}
isK := False;
end;
AppendByte(T, Ord(AData[I]));
Inc(I);
end else
begin
K1 := sj1_tbl[Ord(AData[I])];
case K1 of
0: Inc(I); { invalid SBCS }
2: Inc(I, 2); { invalid DBCS }
1:
begin { halfwidth katakana }
if not isK then begin
AppendByte(T, 27);
AppendByte(T, Ord('$')); {Do not Localize}
AppendByte(T, Ord('B')); {Do not Localize}
isK := True;
end;
{ simple SBCS -> DBCS conversion }
K2 := kana_tbl[Ord(AData[I])];
if (I < L) and ((Ord(AData[I+1]) and $FE) = $DE) then
begin { convert kana + voiced mark to voiced kana }
K3 := vkana_tbl[Ord(AData[I])];
// This is an if and not a case because of a D8 bug, return to
// case when d8 patch is released
// RLebeau 1/7/09: using Char() for #128-#255 because in D2009, the compiler
// may change characters >= #128 from their Ansi codepage value to their true
// Unicode codepoint value, depending on the codepage used for the source code.
// For instance, #128 may become #$20AC...
if AData[I+1] = Char($DE) then begin { voiced }
if K3 <> 0 then
begin
K2 := K3;
Inc(I);
end;
end
else if AData[I+1] = Char($DF) then begin { semivoiced }
if (K3 >= $2550) and (K3 <= $255C) then
begin
K2 := K3 + 1;
Inc(I);
end;
end;
end;
AppendByte(T, K2 shr 8);
AppendByte(T, K2 and $FF);
Inc(I);
end;
else { DBCS }
if (I < L) then begin
K2 := sj2_tbl[Ord(AData[I+1])];
if K2 <> 0 then
begin
if not isK then begin
AppendByte(T, 27);
AppendByte(T, Ord('$')); {Do not Localize}
AppendByte(T, Ord('B')); {Do not Localize}
isK := True;
end;
AppendByte(T, K1 + K2 shr 8);
AppendByte(T, K2 and $FF);
end;
end;
Inc(I, 2);
end;
end;
end;
if isK then begin
AppendByte(T, 27);
AppendByte(T, Ord('(')); {Do not Localize}
AppendByte(T, Ord('B')); {Do not Localize}
end;
Result := T;
end;
class function TIdHeaderCoder2022JP.CanHandle(const ACharSet: String): Boolean;
begin
Result := TextIsSame(ACharSet, 'ISO-2022-JP'); {do not localize}
end;
initialization
RegisterHeaderCoder(TIdHeaderCoder2022JP);
finalization
UnregisterHeaderCoder(TIdHeaderCoder2022JP);
end.