{ $Project$ $Workfile$ $Revision$ $DateUTC$ $Id$ This file is part of the Indy (Internet Direct) project, and is offered under the dual-licensing agreement described on the Indy website. (http://www.indyproject.org/) Copyright: (c) 1993-2005, Chad Z. Hower and the Indy Pit Crew. All rights reserved. } { $Log$ } { Rev 1.9 10.10.2004 13:46:00 ARybin dont add default port to URI Rev 1.8 2004.02.03 5:44:40 PM czhower Name changes Rev 1.7 2004.01.22 5:27:24 PM czhower Fixed compile errors. Rev 1.6 1/22/2004 4:06:56 PM SPerry fixed set problems Rev 1.5 10/5/2003 11:44:24 PM GGrieve Use IsLeadChar Rev 1.4 6/9/2003 9:35:58 PM BGooijen %00 is valid now too Rev 1.3 2003.05.09 10:30:16 PM czhower Rev 1.2 2003.04.11 9:41:34 PM czhower Rev 1.1 29/11/2002 9:56:10 AM SGrobety Version: 1.1 Changed URL encoding Rev 1.0 21/11/2002 12:42:52 PM SGrobety Version: Indy 10 Rev 1.0 11/13/2002 08:04:10 AM JPMugaas } unit IdURI; {Details of implementation ------------------------- 2002-Apr-14 Peter Mee - Fixed reset. Now resets FParams as well - wasn't before. 2001-Nov Doychin Bondzhev - Fixes in URLEncode. There is difference when encoding Path+Doc and Params 2001-Oct-17 Peter Mee - Minor speed improvement - removed use of NormalizePath in SetURI. - Fixed bug that was cutting off the first two chars of the host when a username / password present. - Fixed bug that prevented username and password being updated. - Fixed bug that was leaving the bookmark in the document when no ? or = parameters existed. 2001-Feb-18 Doychin Bondzhev - Added UserName and Password to support URI's like http://username:password@hostname:port/path/document#bookmark } interface {$i IdCompilerDefines.inc} uses IdException, IdGlobal; type TIdURIOptionalFields = (ofAuthInfo, ofBookmark); TIdURIOptionalFieldsSet = set of TIdURIOptionalFields; TIdURI = class protected FDocument: string; FProtocol: string; FURI: String; FPort: string; Fpath: string; FHost: string; FBookmark: string; FUserName: string; FPassword: string; FParams: string; FIPVersion: TIdIPVersion; // procedure SetURI(const Value: String); function GetURI: String; public constructor Create(const AURI: string = ''); virtual; {Do not Localize} function GetFullURI(const AOptionalFields: TIdURIOptionalFieldsSet = [ofAuthInfo, ofBookmark]): String; function GetPathAndParams: String; class procedure NormalizePath(var APath: string); class function URLDecode(ASrc: string; AByteEncoding: IIdTextEncoding = nil {$IFDEF STRING_IS_ANSI}; ADestEncoding: IIdTextEncoding = nil{$ENDIF} ): string; class function URLEncode(const ASrc: string; AByteEncoding: IIdTextEncoding = nil {$IFDEF STRING_IS_ANSI}; ASrcEncoding: IIdTextEncoding = nil{$ENDIF} ): string; class function ParamsEncode(const ASrc: string; AByteEncoding: IIdTextEncoding = nil {$IFDEF STRING_IS_ANSI}; ASrcEncoding: IIdTextEncoding = nil{$ENDIF} ): string; class function PathEncode(const ASrc: string; AByteEncoding: IIdTextEncoding = nil {$IFDEF STRING_IS_ANSI}; ASrcEncoding: IIdTextEncoding = nil{$ENDIF} ): string; // property Bookmark : string read FBookmark write FBookMark; property Document: string read FDocument write FDocument; property Host: string read FHost write FHost; property Password: string read FPassword write FPassword; property Path: string read FPath write FPath; property Params: string read FParams write FParams; property Port: string read FPort write FPort; property Protocol: string read FProtocol write FProtocol; property URI: string read GetURI write SetURI; property Username: string read FUserName write FUserName; property IPVersion : TIdIPVersion read FIPVersion write FIPVersion; end; EIdURIException = class(EIdException); implementation uses IdGlobalProtocols, IdResourceStringsProtocols, IdUriUtils, SysUtils; { TIdURI } constructor TIdURI.Create(const AURI: string = ''); {Do not Localize} begin inherited Create; if length(AURI) > 0 then begin URI := AURI; end; end; class procedure TIdURI.NormalizePath(var APath: string); var i: Integer; LChar: Char; {$IFDEF STRING_IS_IMMUTABLE} LSB: TIdStringBuilder; {$ENDIF} begin {$IFDEF STRING_IS_IMMUTABLE} LSB := nil; {$ENDIF} // Normalize the directory delimiters to follow the UNIX syntax // RLebeau 8/10/2010: only normalize within the actual path, // nothing outside of it... i := Pos(':', APath); {do not localize} if i > 0 then begin Inc(i); // if the path does not already begin with '//', then do not // normalize the first two characters if they would produce // '//', as that will change the semantics of the URL... if CharIsInSet(APath, I, '\/') and CharIsInSet(APath, I+1, '\/') then begin Inc(i, 2); end; end else begin i := 1; end; while i <= Length(APath) do begin LChar := APath[i]; {$IFDEF STRING_IS_ANSI} if IsLeadChar(LChar) then begin Inc(i, 2); Continue; end; {$ENDIF} if (LChar = '?') or (LChar = '#') then begin {Do not Localize} // stop normalizing at query/fragment portion of the URL Break; end; if LChar = '\' then begin {Do not Localize} {$IFDEF STRING_IS_IMMUTABLE} if LSB = nil then begin LSB := TIdStringBuilder.Create(APath); end; LSB[i-1] := '/'; {Do not Localize} {$ELSE} APath[i] := '/'; {Do not Localize} {$ENDIF} end; Inc(i); end; {$IFDEF STRING_IS_IMMUTABLE} if LSB <> nil then begin APath := LSB.ToString; end; {$ENDIF} end; procedure TIdURI.SetURI(const Value: String); var LBuffer: string; LTokenPos: Integer; LURI: string; begin FURI := Value; NormalizePath(FURI); LURI := FURI; FHost := ''; {Do not Localize} FProtocol := ''; {Do not Localize} FPath := ''; {Do not Localize} FDocument := ''; {Do not Localize} FPort := ''; {Do not Localize} FBookmark := ''; {Do not Localize} FUsername := ''; {Do not Localize} FPassword := ''; {Do not Localize} FParams := ''; {Do not localise} //Peter Mee FIPVersion := Id_IPv4; LTokenPos := IndyPos('://', LURI); {Do not Localize} if LTokenPos > 0 then begin // absolute URI // What to do when data don't match configuration ?? {Do not Localize} // Get the protocol FProtocol := Copy(LURI, 1, LTokenPos - 1); Delete(LURI, 1, LTokenPos + 2); // separate the path from the parameters LTokenPos := IndyPos('?', LURI); {Do not Localize} // RLebeau: this is BAD! It messes up JSP and similar URLs that use '=' characters in the document {if LTokenPos = 0 then begin LTokenPos := IndyPos('=', LURI); {Do not Localize end;} if LTokenPos > 0 then begin FParams := Copy(LURI, LTokenPos + 1, MaxInt); LURI := Copy(LURI, 1, LTokenPos - 1); // separate the bookmark from the parameters LTokenPos := IndyPos('#', FParams); {Do not Localize} if LTokenPos > 0 then begin {Do not Localize} FBookmark := FParams; FParams := Fetch(FBookmark, '#'); {Do not Localize} end; end else begin // separate the path from the bookmark LTokenPos := IndyPos('#', LURI); {Do not Localize} if LTokenPos > 0 then begin {Do not Localize} FBookmark := Copy(LURI, LTokenPos + 1, MaxInt); LURI := Copy(LURI, 1, LTokenPos - 1); end; end; // Get the user name, password, host and the port number LBuffer := Fetch(LURI, '/', True); {Do not Localize} // Get username and password LTokenPos := RPos('@', LBuffer); {Do not Localize} if LTokenPos > 0 then begin FPassword := Copy(LBuffer, 1, LTokenPos - 1); Delete(LBuffer, 1, LTokenPos); FUserName := Fetch(FPassword, ':'); {Do not Localize} // Ignore cases where there is only password (http://:password@host/pat/doc) if Length(FUserName) = 0 then begin FPassword := ''; {Do not Localize} end; end; // Get the host and the port number if (IndyPos('[', LBuffer) > 0) and (IndyPos(']', LBuffer) > IndyPos('[', LBuffer)) then begin {Do not Localize} //This is for IPv6 Hosts FHost := Fetch(LBuffer, ']'); {Do not Localize} Fetch(FHost, '['); {Do not Localize} Fetch(LBuffer, ':'); {Do not Localize} FIPVersion := Id_IPv6; end else begin FHost := Fetch(LBuffer, ':', True); {Do not Localize} end; FPort := LBuffer; // Get the path LTokenPos := RPos('/', LURI, -1); if LTokenPos > 0 then begin FPath := '/' + Copy(LURI, 1, LTokenPos); {Do not Localize} Delete(LURI, 1, LTokenPos); end else begin FPath := '/'; {Do not Localize} end; end else begin // received an absolute path, not an URI LTokenPos := IndyPos('?', LURI); {Do not Localize} // RLebeau: this is BAD! It messes up JSP and similar URLs that use '=' characters in the document {if LTokenPos = 0 then begin LTokenPos := IndyPos('=', LURI); {Do not Localize end;} if LTokenPos > 0 then begin // The case when there is parameters after the document name FParams := Copy(LURI, LTokenPos + 1, MaxInt); LURI := Copy(LURI, 1, LTokenPos - 1); // separate the bookmark from the parameters LTokenPos := IndyPos('#', FParams); {Do not Localize} if LTokenPos > 0 then begin FBookmark := FParams; FParams := Fetch(FBookmark, '#'); {Do not Localize} end; end else begin // separate the bookmark from the path LTokenPos := IndyPos('#', LURI); {Do not Localize} if LTokenPos > 0 then begin // The case when there is a bookmark after the document name FBookmark := Copy(LURI, LTokenPos + 1, MaxInt); LURI := Copy(LURI, 1, LTokenPos - 1); end; end; // Get the path LTokenPos := RPos('/', LURI, -1); {Do not Localize} if LTokenPos > 0 then begin FPath := Copy(LURI, 1, LTokenPos); Delete(LURI, 1, LTokenPos); end; end; // Get the document FDocument := LURI; end; function TIdURI.GetURI: String; begin FURI := GetFullURI; // Result must contain only the proto://host/path/document // If you need the full URI then you have to call GetFullURI Result := GetFullURI([]); end; class function TIdURI.URLDecode(ASrc: string; AByteEncoding: IIdTextEncoding = nil {$IFDEF STRING_IS_ANSI}; ADestEncoding: IIdTextEncoding = nil{$ENDIF} ): string; var i: Integer; ESC: string; LChars: TIdWideChars; LBytes: TIdBytes; begin Result := ''; {Do not Localize} LChars := nil; LBytes := nil; EnsureEncoding(AByteEncoding, encUTF8); // S.G. 27/11/2002: Spaces is NOT to be encoded as "+". // S.G. 27/11/2002: "+" is a field separator in query parameter, space is... // S.G. 27/11/2002: well, a space // ASrc := ReplaceAll(ASrc, '+', ' '); {do not localize} i := 1; while i <= Length(ASrc) do begin if ASrc[i] <> '%' then begin {do not localize} AppendByte(LBytes, Ord(ASrc[i])); // Copy the char Inc(i); // Then skip it end else begin Inc(i); // skip the % char if not CharIsInSet(ASrc, i, 'uU') then begin {do not localize} // simple ESC char ESC := Copy(ASrc, i, 2); // Copy the escape code Inc(i, 2); // Then skip it. try AppendByte(LBytes, Byte(IndyStrToInt('$' + ESC))); {do not localize} except end; end else begin // unicode ESC code // RLebeau 5/10/2006: under Win32, the character will likely end // up as '?' in the Result when converted from Unicode to Ansi, // but at least the URL will be parsed properly ESC := Copy(ASrc, i+1, 4); // Copy the escape code Inc(i, 5); // Then skip it. try if LChars = nil then begin SetLength(LChars, 1); end; LChars[0] := WideChar(IndyStrToInt('$' + ESC)); {do not localize} AppendBytes(LBytes, AByteEncoding.GetBytes(LChars)); except end; end; end; end; {$IFDEF STRING_IS_ANSI} EnsureEncoding(ADestEncoding, encOSDefault); CheckByteEncoding(LBytes, AByteEncoding, ADestEncoding); SetString(Result, PAnsiChar(LBytes), Length(LBytes)); {$ELSE} Result := AByteEncoding.GetString(LBytes); {$ENDIF} end; class function TIdURI.ParamsEncode(const ASrc: string; AByteEncoding: IIdTextEncoding = nil {$IFDEF STRING_IS_ANSI}; ASrcEncoding: IIdTextEncoding = nil{$ENDIF} ): string; const UnsafeChars: TIdUnicodeString = '*<>#%"{}|\^[]`'; {do not localize} var I, J, CharLen, ByteLen: Integer; Buf: TIdBytes; {$IFDEF STRING_IS_ANSI} LChars: TIdWideChars; {$ENDIF} LChar: WideChar; begin Result := ''; {Do not Localize} // keep the compiler happy Buf := nil; {$IFDEF STRING_IS_ANSI} LChars := nil; {$ENDIF} if ASrc = '' then begin Exit; end; EnsureEncoding(AByteEncoding, encUTF8); {$IFDEF STRING_IS_ANSI} EnsureEncoding(ASrcEncoding, encOSDefault); LChars := ASrcEncoding.GetChars(RawToBytes(ASrc[1], Length(ASrc))); {$ENDIF} // 2 Chars to handle UTF-16 surrogates SetLength(Buf, AByteEncoding.GetMaxByteCount(2)); I := 0; while I < Length({$IFDEF STRING_IS_UNICODE}ASrc{$ELSE}LChars{$ENDIF}) do begin LChar := {$IFDEF STRING_IS_UNICODE}ASrc[I+1]{$ELSE}LChars[I]{$ENDIF}; // S.G. 27/11/2002: Changed the parameter encoding: Even in parameters, a space // S.G. 27/11/2002: is much more likely to be meaning "space" than "this is // S.G. 27/11/2002: a new parameter" // S.G. 27/11/2002: ref: Message-ID: <3de30169@newsgroups.borland.com> borland.public.delphi.internet.winsock // S.G. 27/11/2002: Most low-ascii is actually Ok in parameters encoding. // RLebeau 1/7/09: using Char() for #128-#255 because in D2009, the compiler // may change characters >= #128 from their Ansi codepage value to their true // Unicode codepoint value, depending on the codepage used for the source code. // For instance, #128 may become #$20AC... if WideCharIsInSet(UnsafeChars, LChar) or (Ord(LChar) < 33) or (Ord(LChar) > 127) then begin CharLen := CalcUTF16CharLength( {$IFDEF STRING_IS_UNICODE}ASrc, I+1{$ELSE}LChars, I{$ENDIF} ); // calculate length including surrogates ByteLen := AByteEncoding.GetBytes( {$IFDEF STRING_IS_UNICODE}ASrc, I+1{$ELSE}LChars, I{$ENDIF}, CharLen, Buf, 0); // explicit Unicode->Ansi conversion for J := 0 to ByteLen-1 do begin Result := Result + '%' + IntToHex(Ord(Buf[J]), 2); {do not localize} end; Inc(I, CharLen); end else begin Result := Result + Char(LChar); Inc(I); end; end; end; class function TIdURI.PathEncode(const ASrc: string; AByteEncoding: IIdTextEncoding = nil {$IFDEF STRING_IS_ANSI}; ASrcEncoding: IIdTextEncoding = nil{$ENDIF} ): string; const UnsafeChars = '*<>#%"{}|\^[]`+'; {do not localize} var I, J, CharLen, ByteLen: Integer; Buf: TIdBytes; {$IFDEF STRING_IS_ANSI} LChars: TIdWideChars; {$ENDIF} LChar: WideChar; begin Result := ''; {Do not Localize} // keep the compiler happy Buf := nil; {$IFDEF STRING_IS_ANSI} LChars := nil; {$ENDIF} if ASrc = '' then begin Exit; end; EnsureEncoding(AByteEncoding, encUTF8); {$IFDEF STRING_IS_ANSI} EnsureEncoding(ASrcEncoding, encOSDefault); LChars := ASrcEncoding.GetChars(RawToBytes(ASrc[1], Length(ASrc))); {$ENDIF} // 2 Chars to handle UTF-16 surrogates SetLength(Buf, AByteEncoding.GetMaxByteCount(2)); I := 0; while I < Length({$IFDEF STRING_IS_UNICODE}ASrc{$ELSE}LChars{$ENDIF}) do begin LChar := {$IFDEF STRING_IS_UNICODE}ASrc[I+1]{$ELSE}LChars[I]{$ENDIF}; if WideCharIsInSet(UnsafeChars, LChar) or (Ord(LChar) < 33) or (Ord(LChar) > 127) then begin CharLen := CalcUTF16CharLength( {$IFDEF STRING_IS_UNICODE}ASrc, I+1{$ELSE}LChars, I{$ENDIF} ); // calculate length including surrogates ByteLen := AByteEncoding.GetBytes( {$IFDEF STRING_IS_UNICODE}ASrc, I+1{$ELSE}LChars, I{$ENDIF}, CharLen, Buf, 0); // explicit Unicode->Ansi conversion for J := 0 to ByteLen-1 do begin Result := Result + '%' + IntToHex(Ord(Buf[J]), 2); {do not localize} end; Inc(I, CharLen); end else begin Result := Result + Char(LChar); Inc(I); end; end; end; class function TIdURI.URLEncode(const ASrc: string; AByteEncoding: IIdTextEncoding = nil {$IFDEF STRING_IS_ANSI}; ASrcEncoding: IIdTextEncoding = nil{$ENDIF} ): string; var LUri: TIdURI; begin LUri := TIdURI.Create(ASrc); try LUri.Path := PathEncode(LUri.Path, AByteEncoding {$IFDEF STRING_IS_ANSI}, ASrcEncoding{$ENDIF} ); LUri.Document := PathEncode(LUri.Document, AByteEncoding {$IFDEF STRING_IS_ANSI}, ASrcEncoding{$ENDIF} ); LUri.Params := ParamsEncode(LUri.Params, AByteEncoding {$IFDEF STRING_IS_ANSI}, ASrcEncoding{$ENDIF} ); Result := LUri.URI; finally LUri.Free; end; end; function TIdURI.GetFullURI(const AOptionalFields: TIdURIOptionalFieldsSet): String; var LURI: String; begin if FProtocol = '' then begin raise EIdURIException.Create(RSURINoProto); end; if FHost = '' then begin raise EIdURIException.Create(RSURINoHost); end; LURI := FProtocol + '://'; {Do not Localize} if (FUserName <> '') and (ofAuthInfo in AOptionalFields) then begin LURI := LURI + FUserName; if FPassword <> '' then begin LURI := LURI + ':' + FPassword; {Do not Localize} end; LURI := LURI + '@'; {Do not Localize} end; if IPVersion = Id_IPv6 then begin LURI := LURI + '[' + FHost + ']'; {Do not Localize} end else begin LURI := LURI + FHost; end; if FPort <> '' then begin case PosInStrArray(FProtocol, ['HTTP', 'HTTPS', 'FTP'], False) of {Do not Localize} 0: begin if FPort <> '80' then begin LURI := LURI + ':' + FPort; {Do not Localize} end; end; 1: begin if FPort <> '443' then begin LURI := LURI + ':' + FPort; {Do not Localize} end; end; 2: begin if FPort <> '21' then begin LURI := LURI + ':' + FPort; {Do not Localize} end; end; else begin LURI := LURI + ':' + FPort; {Do not Localize} end; end; end; LURI := LURI + GetPathAndParams; if (FBookmark <> '') and (ofBookmark in AOptionalFields) then begin LURI := LURI + '#' + FBookmark; {Do not Localize} end; Result := LURI; end; function TIdURI.GetPathAndParams: String; begin Result := FPath + FDocument; if FParams <> '' then begin Result := Result + '?' + FParams; {Do not Localize} end; end; end.