restemplate/indy/Protocols/IdCoderHeader.pas

601 lines
19 KiB
Plaintext

{
$Project$
$Workfile$
$Revision$
$DateUTC$
$Id$
This file is part of the Indy (Internet Direct) project, and is offered
under the dual-licensing agreement described on the Indy website.
(http://www.indyproject.org/)
Copyright:
(c) 1993-2005, Chad Z. Hower and the Indy Pit Crew. All rights reserved.
$Log$
Rev 1.13 9/8/2004 8:55:46 PM JPMugaas
Fix for compile problem where a char is being compared with an incompatible
type in some compilers.
Rev 1.12 02/07/2004 21:59:28 CCostelloe
Bug fix
Rev 1.11 17/06/2004 14:19:00 CCostelloe
Bug fix for long subject lines that have characters needing CharSet encoding
Rev 1.10 23/04/2004 20:33:04 CCostelloe
Minor change to support From headers holding multiple addresses
Rev 1.9 2004.02.03 5:44:58 PM czhower
Name changes
Rev 1.8 24/01/2004 19:08:14 CCostelloe
Cleaned up warnings
Rev 1.7 1/22/2004 3:56:38 PM SPerry
fixed set problems
Rev 1.6 2004.01.22 2:34:58 PM czhower
TextIsSame + D8 bug workaround
Rev 1.5 10/16/2003 11:11:02 PM DSiders
Added localization comments.
Rev 1.4 10/8/2003 9:49:36 PM GGrieve
Use IdDelete
Rev 1.3 6/10/2003 5:48:46 PM SGrobety
DotNet updates
Rev 1.2 04/09/2003 20:35:28 CCostelloe
Parameter AUseAddressForNameIfNameMissing (defaulting to False to preserve
existing code) added to EncodeAddressItem
Rev 1.1 2003.06.23 9:46:52 AM czhower
Russian, Ukranian support for headers.
Rev 1.0 11/14/2002 02:14:46 PM JPMugaas
}
unit IdCoderHeader;
//refer http://www.faqs.org/rfcs/rfc2047.html
//TODO: Optimize and restructure code
//TODO: Redo this unit to fit with the new coders and use the exisiting MIME stuff
{
2002-08-21 JM Berg
- brought in line with the RFC regarding
whitespace between encoded words
- added logic so that lines that already seem encoded are really encoded again
(so that if a user types =?iso8859-1?Q?======?= its really encoded again
and displayed like that on the other side)
2001-Nov-18 Peter Mee
- Fixed multiple QP decoding in single header.
11-10-2001 - J. Peter Mugaas
- tiny fix for 8bit header encoding suggested by Andrew P.Rybin
}
interface
{$i IdCompilerDefines.inc}
uses
Classes,
IdComponent,
IdEMailAddress,
IdHeaderCoderBase;
// Procs
function EncodeAddressItem(EmailAddr: TIdEmailAddressItem; const HeaderEncoding: Char;
const MimeCharSet: string; AUseAddressForNameIfNameMissing: Boolean = False): string;
function EncodeHeader(const Header: string; Specials: String; const HeaderEncoding: Char;
const MimeCharSet: string): string;
function EncodeAddress(EmailAddr: TIdEMailAddressList; const HeaderEncoding: Char;
const MimeCharSet: string; AUseAddressForNameIfNameMissing: Boolean = False): string;
function DecodeHeader(const Header: string): string;
procedure DecodeAddress(EMailAddr: TIdEmailAddressItem);
procedure DecodeAddresses(AEMails: String; EMailAddr: TIdEmailAddressList);
implementation
uses
IdException,
IdGlobal,
IdGlobalProtocols,
IdAllHeaderCoders,
SysUtils;
const
csAddressSpecials: String = '()[]<>:;.,@\"'; {Do not Localize}
base64_tbl: array [0..63] of Char = (
'A','B','C','D','E','F','G','H', {Do not Localize}
'I','J','K','L','M','N','O','P', {Do not Localize}
'Q','R','S','T','U','V','W','X', {Do not Localize}
'Y','Z','a','b','c','d','e','f', {Do not Localize}
'g','h','i','j','k','l','m','n', {Do not Localize}
'o','p','q','r','s','t','u','v', {Do not Localize}
'w','x','y','z','0','1','2','3', {Do not Localize}
'4','5','6','7','8','9','+','/'); {Do not Localize}
function EncodeAddressItem(EmailAddr: TIdEmailAddressItem; const HeaderEncoding: Char;
const MimeCharSet: string; AUseAddressForNameIfNameMissing: Boolean = False): string;
var
S : string;
I : Integer;
NeedEncode : Boolean;
begin
if AUseAddressForNameIfNameMissing and (EmailAddr.Name = '') then begin
{CC: Use Address as Name...}
EmailAddr.Name := EmailAddr.Address;
end;
if EmailAddr.Name <> '' then {Do not Localize}
begin
NeedEncode := False;
for I := 1 to Length(EmailAddr.Name) do begin
if (EmailAddr.Name[I] < #32) or (EmailAddr.Name[I] >= #127) then
begin
NeedEncode := True;
Break;
end;
end;
if NeedEncode then begin
S := EncodeHeader(EmailAddr.Name, csAddressSpecials, HeaderEncoding, MimeCharSet);
end else begin
{ quoted string }
S := '"'; {Do not Localize}
for I := 1 to Length(EmailAddr.Name) do
begin { quote special characters }
if (EmailAddr.Name[I] = '\') or (EmailAddr.Name[I] = '"') then begin
S := S + '\'; {Do not Localize}
end;
S := S + EmailAddr.Name[I];
end;
S := S + '"'; {Do not Localize}
end;
Result := IndyFormat('%s <%s>', [S, EmailAddr.Address]) {Do not Localize}
end
else begin
Result := IndyFormat('%s', [EmailAddr.Address]); {Do not Localize}
end;
end;
function B64(AChar: Char): Byte;
//TODO: Make this use the more efficient MIME Coder
begin
for Result := Low(base64_tbl) to High(base64_tbl) do begin
if AChar = base64_tbl[Result] then begin
Exit;
end;
end;
Result := 0;
end;
function DecodeHeader(const Header: string): string;
var
HeaderCharSet, HeaderEncoding, HeaderData, S: string;
LDecoded: Boolean;
LStartPos, LLength, LEncodingStartPos, LEncodingEndPos, LLastStartPos: Integer;
LLastWordWasEncoded: Boolean;
Buf: TIdBytes;
function ExtractEncoding(const AHeader: string; const AStartPos: Integer;
var VStartPos, VEndPos: Integer; var VCharSet, VEncoding, VData: String): Boolean;
var
LCharSet, LEncoding, LData, LDataEnd: Integer;
begin
Result := False;
//we need a '=? followed by 2 question marks followed by a '?='. {Do not Localize}
//to find the end of the substring, we can't just search for '?=', {Do not Localize}
//example: '=?ISO-8859-1?Q?=E4?=' {Do not Localize}
LCharSet := PosIdx('=?', AHeader, AStartPos); {Do not Localize}
if (LCharSet = 0) or (LCharSet > VEndPos) then begin
Exit;
end;
Inc(LCharSet, 2);
LEncoding := PosIdx('?', AHeader, LCharSet); {Do not Localize}
if (LEncoding = 0) or (LEncoding > VEndPos) then begin
Exit;
end;
Inc(LEncoding);
LData := PosIdx('?', AHeader, LEncoding); {Do not Localize}
if (LData = 0) or (LData > VEndPos) then begin
Exit;
end;
Inc(LData);
LDataEnd := PosIdx('?=', AHeader, LData); {Do not Localize}
if (LDataEnd = 0) or (LDataEnd > VEndPos) then begin
Exit;
end;
Inc(LDataEnd);
VStartPos := LCharSet-2;
VEndPos := LDataEnd;
VCharSet := Copy(AHeader, LCharSet, LEncoding-LCharSet-1);
VEncoding := Copy(AHeader, LEncoding, LData-LEncoding-1);
VData := Copy(AHeader, LData, LDataEnd-LData-1);
Result := True;
end;
// TODO: use TIdCoderQuotedPrintable and TIdCoderMIME instead
function ExtractEncodedData(const AEncoding, AData: String; var VDecoded: TIdBytes): Boolean;
var
I, J: Integer;
a3: TIdBytes;
a4: array [0..3] of Byte;
begin
Result := False;
SetLength(VDecoded, 0);
case PosInStrArray(AEncoding, ['Q', 'B', '8'], False) of {Do not Localize}
0: begin // quoted-printable
I := 1;
while I <= Length(AData) do begin
if AData[i] = '_' then begin {Do not Localize}
AppendByte(VDecoded, Ord(' ')); {Do not Localize}
end
else if (AData[i] = '=') and (Length(AData) >= (i+2)) then begin //make sure we can access i+2
AppendByte(VDecoded, IndyStrToInt('$' + Copy(AData, i+1, 2), 32)); {Do not Localize}
Inc(I, 2);
end else
begin
AppendByte(VDecoded, Ord(AData[i]));
end;
Inc(I);
end;
Result := True;
end;
1: begin // base64
J := Length(AData) div 4;
if J > 0 then
begin
SetLength(a3, 3);
for I := 0 to J-1 do
begin
a4[0] := B64(AData[(I*4)+1]);
a4[1] := B64(AData[(I*4)+2]);
a4[2] := B64(AData[(I*4)+3]);
a4[3] := B64(AData[(I*4)+4]);
a3[0] := Byte((a4[0] shl 2) or (a4[1] shr 4));
a3[1] := Byte((a4[1] shl 4) or (a4[2] shr 2));
a3[2] := Byte((a4[2] shl 6) or (a4[3] shr 0));
if AData[(I*4)+4] = '=' then begin
if AData[(I*4)+3] = '=' then begin
AppendByte(VDecoded, a3[0]);
end else begin
AppendBytes(VDecoded, a3, 0, 2);
end;
Break;
end else begin
AppendBytes(VDecoded, a3, 0, 3);
end;
end;
end;
Result := True;
end;
2: begin // 8-bit
{$IFDEF STRING_IS_ANSI}
if AData <> '' then begin
VDecoded := RawToBytes(AData[1], Length(AData));
end;
{$ELSE}
VDecoded := IndyTextEncoding_8Bit.GetBytes(AData);
{$ENDIF}
Result := True;
end;
end;
end;
begin
Result := Header;
LStartPos := 1;
LLength := Length(Result);
LLastWordWasEncoded := False;
LLastStartPos := LStartPos;
while LStartPos <= LLength do
begin
// valid encoded words can not contain spaces
// if the user types something *almost* like an encoded word,
// and its sent as-is, we need to find this!!
LStartPos := FindFirstNotOf(LWS, Result, LLength, LStartPos);
if LStartPos = 0 then begin
Break;
end;
LEncodingEndPos := FindFirstOf(LWS, Result, LLength, LStartPos);
if LEncodingEndPos <> 0 then begin
Dec(LEncodingEndPos);
end else begin
LEncodingEndPos := LLength;
end;
if ExtractEncoding(Result, LStartPos, LEncodingStartPos, LEncodingEndPos, HeaderCharSet, HeaderEncoding, HeaderData) then
begin
LDecoded := False;
if ExtractEncodedData(HeaderEncoding, HeaderData, Buf) then begin
LDecoded := DecodeHeaderData(HeaderCharSet, Buf, S);
end;
if LDecoded then
begin
//replace old substring in header with decoded string,
// ignoring whitespace that separates encoded words:
if LLastWordWasEncoded then begin
Result := Copy(Result, 1, LLastStartPos - 1) + S + Copy(Result, LEncodingEndPos + 1, MaxInt);
LStartPos := LLastStartPos + Length(S);
end else begin
Result := Copy(Result, 1, LEncodingStartPos - 1) + S + Copy(Result, LEncodingEndPos + 1, MaxInt);
LStartPos := LEncodingStartPos + Length(S);
end;
end else
begin
// could not decode the data, so preserve it in case the user
// wants to do it manually. Though, they really should use the
// IdHeaderCoderBase.GHeaderDecodingNeeded hook for that instead...
LStartPos := LEncodingEndPos + 1;
end;
LLength := Length(Result);
LLastWordWasEncoded := True;
LLastStartPos := LStartPos;
end else
begin
LStartPos := FindFirstOf(LWS, Result, LLength, LStartPos);
if LStartPos = 0 then begin
Break;
end;
LLastWordWasEncoded := False;
end;
end;
end;
procedure DecodeAddress(EMailAddr : TIdEmailAddressItem);
begin
EMailAddr.Name := UnquotedStr(DecodeHeader(EMailAddr.Name));
end;
procedure DecodeAddresses(AEMails : String; EMailAddr: TIdEmailAddressList);
var
idx : Integer;
begin
EMailAddr.EMailAddresses := AEMails;
for idx := 0 to EMailAddr.Count-1 do begin
DecodeAddress(EMailAddr[idx]);
end;
end;
function EncodeAddress(EmailAddr: TIdEMailAddressList; const HeaderEncoding: Char;
const MimeCharSet: string; AUseAddressForNameIfNameMissing: Boolean = False): string;
var
idx : Integer;
begin
if EmailAddr.Count > 0 then begin
Result := EncodeAddressItem(EMailAddr[0], HeaderEncoding, MimeCharSet, AUseAddressForNameIfNameMissing);
for idx := 1 to EmailAddr.Count-1 do begin
Result := Result + ', ' + {Do not Localize}
EncodeAddressItem(EMailAddr[idx], HeaderEncoding, MimeCharSet, AUseAddressForNameIfNameMissing);
end;
end else begin
Result := ''; {Do not Localize}
end;
end;
{ encode a header field if non-ASCII characters are used }
function EncodeHeader(const Header: string; Specials: String; const HeaderEncoding: Char;
const MimeCharSet: string): string;
const
SPACES = [Ord(' '), 9, 13, 10]; {Do not Localize}
var
T: string;
Buf: TIdBytes;
L, P, Q, R: Integer;
B0, B1, B2: Integer;
InEncode: Integer;
NeedEncode: Boolean;
csNoEncode, csNoReqQuote, csSpecials: TIdBytes;
BeginEncode, EndEncode: string;
procedure EncodeWord(AP: Integer);
const
MaxEncLen = 75;
var
LQ: Integer;
EncLen: Integer;
Enc1: string;
begin
T := T + BeginEncode;
if L < AP then AP := L + 1;
LQ := InEncode;
InEncode := -1;
EncLen := Length(BeginEncode) + 2;
case PosInStrArray(HeaderEncoding, ['Q', 'B'], False) of {Do not Localize}
0: begin { quoted-printable }
while LQ < AP do
begin
if Buf[LQ] = Ord(' ') then begin {Do not Localize}
Enc1 := '_'; {Do not Localize}
end
else if (not ByteIsInSet(Buf, LQ, csNoReqQuote)) or ByteIsInSet(Buf, LQ, csSpecials) then begin
Enc1 := '=' + IntToHex(Buf[LQ], 2); {Do not Localize}
end
else begin
Enc1 := Char(Buf[LQ]);
end;
if (EncLen + Length(Enc1)) > MaxEncLen then begin
//T := T + EndEncode + #13#10#9 + BeginEncode;
//CC: The #13#10#9 above caused the subsequent call to FoldWrapText to
//insert an extra #13#10 which, being a blank line in the headers,
//was interpreted by email clients, etc., as the end of the headers
//and the start of the message body. FoldWrapText seems to look for
//and treat correctly the sequence #13#10 + ' ' however...
T := T + EndEncode + EOL + ' ' + BeginEncode;
EncLen := Length(BeginEncode) + 2;
end;
T := T + Enc1;
Inc(EncLen, Length(Enc1));
Inc(LQ);
end;
end;
1: begin { base64 }
while LQ < AP do begin
if (EncLen + 4) > MaxEncLen then begin
//T := T + EndEncode + #13#10#9 + BeginEncode;
//CC: The #13#10#9 above caused the subsequent call to FoldWrapText to
//insert an extra #13#10 which, being a blank line in the headers,
//was interpreted by email clients, etc., as the end of the headers
//and the start of the message body. FoldWrapText seems to look for
//and treat correctly the sequence #13#10 + ' ' however...
T := T + EndEncode + EOL + ' ' + BeginEncode;
EncLen := Length(BeginEncode) + 2;
end;
B0 := Buf[LQ];
case AP - LQ of
1:
begin
T := T + base64_tbl[B0 shr 2] + base64_tbl[B0 and $03 shl 4] + '=='; {Do not Localize}
end;
2:
begin
B1 := Buf[LQ + 1];
T := T + base64_tbl[B0 shr 2] +
base64_tbl[B0 and $03 shl 4 + B1 shr 4] +
base64_tbl[B1 and $0F shl 2] + '='; {Do not Localize}
end;
else
begin
B1 := Buf[LQ + 1];
B2 := Buf[LQ + 2];
T := T + base64_tbl[B0 shr 2] +
base64_tbl[B0 and $03 shl 4 + B1 shr 4] +
base64_tbl[B1 and $0F shl 2 + B2 shr 6] +
base64_tbl[B2 and $3F];
end;
end;
Inc(EncLen, 4);
Inc(LQ, 3);
end;
end;
end;
T := T + EndEncode;
end;
function CreateEncodeRange(AStart, AEnd: Byte): TIdBytes;
var
I: Integer;
begin
SetLength(Result, AEnd-AStart+1);
for I := 0 to Length(Result)-1 do begin
Result[I] := AStart+I;
end;
end;
begin
if Header = '' then begin
Result := '';
Exit;
end;
// TODO: this function needs to take encoded codeunits into account when
// deciding where to split the encoded data between adjacent encoded-words,
// so that a single encoded character does not get split between encoded-words
// thus corrupting that character...
Buf := EncodeHeaderData(MimeCharSet, Header);
{Suggested by Andrew P.Rybin for easy 8bit support}
if HeaderEncoding = '8' then begin {Do not Localize}
Result := BytesToStringRaw(Buf);
Exit;
end;//if
// RLebeau 1/7/09: using Char() for #128-#255 because in D2009, the compiler
// may change characters >= #128 from their Ansi codepage value to their true
// Unicode codepoint value, depending on the codepage used for the source code.
// For instance, #128 may become #$20AC...
// RLebeau 2/12/09: changed the logic to use "no-encode" sets instead, so
// that words containing codeunits outside the ASCII range are always
// encoded. This is easier to manage when Unicode data is involved.
csNoEncode := CreateEncodeRange(32, 126);
csNoReqQuote := CreateEncodeRange(33, 60);
AppendByte(csNoReqQuote, 62);
AppendBytes(csNoReqQuote, CreateEncodeRange(64, 94));
AppendBytes(csNoReqQuote, CreateEncodeRange(96, 126));
csSpecials := ToBytes(Specials, IndyTextEncoding_8Bit);
BeginEncode := '=?' + MimeCharSet + '?' + HeaderEncoding + '?'; {Do not Localize}
EndEncode := '?='; {Do not Localize}
// JMBERG: We want to encode stuff that the user typed
// as if it already is encoded!!
if DecodeHeader(Header) <> Header then begin
RemoveBytes(csNoEncode, 1, ByteIndex(Ord('='), csNoEncode));
end;
L := Length(Buf);
P := 0;
T := ''; {Do not Localize}
InEncode := -1;
while P < L do
begin
Q := P;
while (P < L) and (Buf[P] in SPACES) do begin
Inc(P);
end;
R := P;
NeedEncode := False;
while (P < L) and (not (Buf[P] in SPACES)) do begin
if (not ByteIsInSet(Buf, P, csNoEncode)) or ByteIsInSet(Buf, P, csSpecials) then begin
NeedEncode := True;
end;
Inc(P);
end;
if NeedEncode then begin
if InEncode = -1 then begin
T := T + BytesToString(Buf, Q, R - Q);
InEncode := R;
end;
end else
begin
if InEncode <> -1 then begin
EncodeWord(Q);
end;
T := T + BytesToString(Buf, Q, P - Q);
end;
end;
if InEncode <> -1 then begin
EncodeWord(P);
end;
Result := T;
end;
end.