194 lines
5.7 KiB
Plaintext
194 lines
5.7 KiB
Plaintext
unit IdUriUtils;
|
|
|
|
interface
|
|
|
|
{$i IdCompilerDefines.inc}
|
|
|
|
{$IFDEF DOTNET}
|
|
{$DEFINE HAS_ConvertToUtf32}
|
|
{$ENDIF}
|
|
{$IFDEF HAS_TCharacter}
|
|
{$DEFINE HAS_ConvertToUtf32}
|
|
{$ENDIF}
|
|
{$IFDEF HAS_Character_TCharHelper}
|
|
{$DEFINE HAS_ConvertToUtf32}
|
|
{$ENDIF}
|
|
|
|
{$IFDEF DOTNET}
|
|
{$DEFINE HAS_String_IndexOf}
|
|
{$ENDIF}
|
|
{$IFDEF HAS_SysUtils_TStringHelper}
|
|
{$DEFINE HAS_String_IndexOf}
|
|
{$ENDIF}
|
|
|
|
uses
|
|
IdGlobal
|
|
{$IFNDEF DOTNET}
|
|
{$IFDEF HAS_ConvertToUtf32}
|
|
, Character
|
|
{$ELSE}
|
|
, IdException
|
|
{$ENDIF}
|
|
{$IFDEF HAS_String_IndexOf}
|
|
, SysUtils
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
;
|
|
|
|
{$IFNDEF HAS_ConvertToUtf32}
|
|
type
|
|
//for .NET, we use Char.ConvertToUtf32() as-is
|
|
//for XE3.5+, we use TCharHelper.ConvertToUtf32() as-is
|
|
//for D2009+, we use TCharacter.ConvertToUtf32() as-is
|
|
EIdUTF16Exception = class(EIdException);
|
|
EIdUTF16IndexOutOfRange = class(EIdUTF16Exception);
|
|
EIdUTF16InvalidHighSurrogate = class(EIdUTF16Exception);
|
|
EIdUTF16InvalidLowSurrogate = class(EIdUTF16Exception);
|
|
EIdUTF16MissingLowSurrogate = class(EIdUTF16Exception);
|
|
{$ENDIF}
|
|
|
|
// calculates character length, including surrogates
|
|
function CalcUTF16CharLength(const AStr: {$IFDEF STRING_IS_UNICODE}string{$ELSE}TIdWideChars{$ENDIF}; const AIndex: Integer): Integer;
|
|
function WideCharIsInSet(const ASet: TIdUnicodeString; const AChar: WideChar): Boolean;
|
|
function GetUTF16Codepoint(const AStr: {$IFDEF STRING_IS_UNICODE}string{$ELSE}TIdWideChars{$ENDIF}; const AIndex: Integer): Integer;
|
|
|
|
implementation
|
|
|
|
{$IFNDEF HAS_ConvertToUtf32}
|
|
uses
|
|
IdResourceStringsProtocols,
|
|
IdResourceStringsUriUtils;
|
|
{$ENDIF}
|
|
|
|
function CalcUTF16CharLength(const AStr: {$IFDEF STRING_IS_UNICODE}string{$ELSE}TIdWideChars{$ENDIF};
|
|
const AIndex: Integer): Integer;
|
|
{$IFDEF DOTNET}
|
|
var
|
|
C: Integer;
|
|
{$ELSE}
|
|
{$IFDEF HAS_ConvertToUtf32}
|
|
{$IFDEF USE_INLINE}inline;{$ENDIF}
|
|
{$ELSE}
|
|
var
|
|
C: WideChar;
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
begin
|
|
{$IFDEF DOTNET}
|
|
C := System.Char.ConvertToUtf32(AStr, AIndex-1);
|
|
if (C >= #$10000) and (C <= #$10FFFF) then begin
|
|
Result := 2;
|
|
end else begin
|
|
Result := 1;
|
|
end;
|
|
{$ELSE}
|
|
{$IFDEF HAS_Character_TCharHelper}
|
|
Char.ConvertToUtf32(AStr, AIndex-1, Result);
|
|
{$ELSE}
|
|
{$IFDEF HAS_TCharacter}
|
|
TCharacter.ConvertToUtf32(AStr, AIndex, Result);
|
|
{$ELSE}
|
|
if (AIndex < {$IFDEF STRING_IS_UNICODE}1{$ELSE}0{$ENDIF}) or
|
|
(AIndex > (Length(AStr){$IFNDEF STRING_IS_UNICODE}-1{$ENDIF})) then
|
|
begin
|
|
raise EIdUTF16IndexOutOfRange.CreateResFmt(@RSUTF16IndexOutOfRange, [AIndex, Length(AStr)]);
|
|
end;
|
|
C := AStr[AIndex];
|
|
if (C >= #$D800) and (C <= #$DFFF) then
|
|
begin
|
|
if C > #$DBFF then begin
|
|
raise EIdUTF16InvalidHighSurrogate.CreateResFmt(@RSUTF16InvalidHighSurrogate, [AIndex]);
|
|
end;
|
|
if AIndex = (Length(AStr){$IFNDEF STRING_IS_UNICODE}-1{$ENDIF}) then begin
|
|
raise EIdUTF16MissingLowSurrogate.CreateRes(@RSUTF16MissingLowSurrogate);
|
|
end;
|
|
C := AStr[AIndex+1];
|
|
if (C < #$DC00) or (C > #$DFFF) then begin
|
|
raise EIdUTF16InvalidLowSurrogate.CreateResFmt(@RSUTF16InvalidLowSurrogate, [AIndex+1]);
|
|
end;
|
|
Result := 2;
|
|
end else begin
|
|
Result := 1;
|
|
end;
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
end;
|
|
|
|
function WideCharIsInSet(const ASet: TIdUnicodeString; const AChar: WideChar): Boolean;
|
|
{$IFDEF HAS_String_IndexOf}
|
|
{$IFDEF USE_INLINE}inline;{$ENDIF}
|
|
{$ELSE}
|
|
var
|
|
I: Integer;
|
|
{$ENDIF}
|
|
begin
|
|
{$IFDEF HAS_String_IndexOf}
|
|
Result := ASet.IndexOf(AChar) > -1;
|
|
{$ELSE}
|
|
// RLebeau 5/8/08: Calling Pos() with a Char as input creates a temporary
|
|
// String. Normally this is fine, but profiling reveils this to be a big
|
|
// bottleneck for code that makes a lot of calls to CharIsInSet(), so need
|
|
// to scan through ASet looking for the character without a conversion...
|
|
//
|
|
// Result := IndyPos(AString[ACharPos], ASet);
|
|
//
|
|
Result := False;
|
|
for I := 1 to Length(ASet) do begin
|
|
if ASet[I] = AChar then begin
|
|
Result := True;
|
|
Exit;
|
|
end;
|
|
end;
|
|
{$ENDIF}
|
|
end;
|
|
|
|
function GetUTF16Codepoint(const AStr: {$IFDEF STRING_IS_UNICODE}string{$ELSE}TIdWideChars{$ENDIF};
|
|
const AIndex: Integer): Integer;
|
|
{$IFDEF HAS_ConvertToUtf32}
|
|
{$IFDEF USE_INLINE}inline;{$ENDIF}
|
|
{$ELSE}
|
|
var
|
|
C: WideChar;
|
|
LowSurrogate, HighSurrogate: Integer;
|
|
{$ENDIF}
|
|
begin
|
|
{$IFDEF DOTNET}
|
|
Result := System.Char.ConvertToUtf32(AStr, AIndex-1);
|
|
{$ELSE}
|
|
{$IFDEF HAS_Character_TCharHelper}
|
|
Result := Char.ConvertToUtf32(AStr, AIndex-1);
|
|
{$ELSE}
|
|
{$IFDEF HAS_TCharacter}
|
|
Result := TCharacter.ConvertToUtf32(AStr, AIndex);
|
|
{$ELSE}
|
|
if (AIndex < {$IFDEF STRING_IS_UNICODE}1{$ELSE}0{$ENDIF}) or
|
|
(AIndex > (Length(AStr){$IFNDEF STRING_IS_UNICODE}-1{$ENDIF})) then
|
|
begin
|
|
raise EIdUTF16IndexOutOfRange.CreateResFmt(@RSUTF16IndexOutOfRange, [AIndex, Length(AStr)]);
|
|
end;
|
|
C := AStr[AIndex];
|
|
if (C >= #$D800) and (C <= #$DFFF) then
|
|
begin
|
|
HighSurrogate := Integer(C);
|
|
if HighSurrogate > $DBFF then begin
|
|
raise EIdUTF16InvalidHighSurrogate.CreateResFmt(@RSUTF16InvalidHighSurrogate, [AIndex]);
|
|
end;
|
|
if AIndex = (Length(AStr){$IFNDEF STRING_IS_UNICODE}-1{$ENDIF}) then begin
|
|
raise EIdUTF16MissingLowSurrogate.CreateRes(@RSUTF16MissingLowSurrogate);
|
|
end;
|
|
LowSurrogate := Integer(AStr[AIndex+1]);
|
|
if (LowSurrogate < $DC00) or (LowSurrogate > $DFFF) then begin
|
|
raise EIdUTF16InvalidLowSurrogate.CreateResFmt(@RSUTF16InvalidLowSurrogate, [AIndex+1]);
|
|
end;
|
|
Result := ((HighSurrogate - $D800) shl 10) or (LowSurrogate - $DC00) + $10000;
|
|
end else begin
|
|
Result := Integer(C);
|
|
end;
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
end;
|
|
|
|
end.
|