512 lines
18 KiB
Plaintext
512 lines
18 KiB
Plaintext
{
|
|
$Project$
|
|
$Workfile$
|
|
$Revision$
|
|
$DateUTC$
|
|
$Id$
|
|
|
|
This file is part of the Indy (Internet Direct) project, and is offered
|
|
under the dual-licensing agreement described on the Indy website.
|
|
(http://www.indyproject.org/)
|
|
|
|
Copyright:
|
|
(c) 1993-2005, Chad Z. Hower and the Indy Pit Crew. All rights reserved.
|
|
}
|
|
{
|
|
$Log$
|
|
}
|
|
{
|
|
Rev 1.7 10/6/2004 10:47:00 PM BGooijen
|
|
changed array indexer from 64 to 32 bit, it gave errors in dotnet, and making
|
|
>2GB arrays is not done anyways
|
|
|
|
Rev 1.6 2004.05.20 1:39:28 PM czhower
|
|
Last of the IdStream updates
|
|
|
|
Rev 1.5 2004.05.20 11:37:24 AM czhower
|
|
IdStreamVCL
|
|
|
|
Rev 1.4 2004.05.19 3:06:56 PM czhower
|
|
IdStream / .NET fix
|
|
|
|
Rev 1.3 2004.02.03 5:45:50 PM czhower
|
|
Name changes
|
|
|
|
Rev 1.2 1/21/2004 1:19:58 PM JPMugaas
|
|
InitComponent.
|
|
|
|
Rev 1.1 16/01/2004 18:00:26 CCostelloe
|
|
This is now working code.
|
|
|
|
Rev 1.0 14/01/2004 00:46:14 CCostelloe
|
|
An implementation of Apple's BinHex4 encoding. It is a "work-in-progress",
|
|
it does not yet work properly, only checked in as a placeholder.
|
|
}
|
|
|
|
unit IdCoderBinHex4;
|
|
|
|
{
|
|
Written by Ciaran Costelloe, ccostelloe@flogas.ie, December 2003.
|
|
Based on TIdCoderMIME, derived from TIdCoder3to4, derived from TIdCoder.
|
|
|
|
DESCRIPTION:
|
|
|
|
This is an implementation of the BinHex 4.0 decoder used particularly by Apple.
|
|
It is defined in RFC 1741. It is a variant of a 3-to-4 decoder, but it uses
|
|
character 90 for sequences of repeating characters, allowing some compression,
|
|
but thereby not allowing it to be mapped in as another 3-to-4 decoder.
|
|
Per the RFC, it must be encapsulated in a MIME part (it cannot be directly coded
|
|
inline in an email "body"), the part is strictly defined to have a header entry
|
|
(with the appropriate "myfile.ext"):
|
|
Content-Type: application/mac-binhex40; name="myfile.ext"
|
|
After the header, the part MUST start with the text (NOT indented):
|
|
(This file must be converted with BinHex 4.0)
|
|
This allows the option AND the ambiguity of identifying it by either the
|
|
Content-Type OR by the initial text line. However, it is also stated that any
|
|
text before the specified text line must be ignored, implying the line does not
|
|
have to be the first - an apparent contradiction.
|
|
The encoded file then follows, split with CRLFs (to avoid lines that are too long
|
|
for emails) that must be discarded.
|
|
The file starts with a colon (:), a header, followed by the file contents, and
|
|
ending in another colon.
|
|
There is also an interesting article on the web, "BinHex 4.0 Definition by Peter
|
|
N Lewis, Aug 1991", which has very useful information on what is implemeted in
|
|
practice, and seems to come with the good provenance of bitter experience.
|
|
|
|
From RFC 1741:
|
|
|
|
1) 8 bit encoding of the file:
|
|
|
|
Byte: Length of FileName (1->63)
|
|
Bytes: FileName ("Length" bytes)
|
|
Byte: Version
|
|
Long: Type
|
|
Long: Creator
|
|
Word: Flags (And $F800)
|
|
Long: Length of Data Fork
|
|
Long: Length of Resource Fork
|
|
Word: CRC
|
|
Bytes: Data Fork ("Data Length" bytes)
|
|
Word: CRC
|
|
Bytes: Resource Fork ("Rsrc Length" bytes)
|
|
Word: CRC
|
|
|
|
2) Compression of repetitive characters.
|
|
|
|
($90 is the marker, encoding is made for 3->255 characters)
|
|
|
|
00 11 22 33 44 55 66 77 -> 00 11 22 33 44 55 66 77
|
|
11 22 22 22 22 22 22 33 -> 11 22 90 06 33
|
|
11 22 90 33 44 -> 11 22 90 00 33 44
|
|
|
|
The whole file is considered as a stream of bits. This stream will
|
|
be divided in blocks of 6 bits and then converted to one of 64
|
|
characters contained in a table. The characters in this table have
|
|
been chosen for maximum noise protection. The format will start
|
|
with a ":" (first character on a line) and end with a ":".
|
|
There will be a maximum of 64 characters on a line. It must be
|
|
preceded, by this comment, starting in column 1 (it does not start
|
|
in column 1 in this document):
|
|
|
|
(This file must be converted with BinHex 4.0)
|
|
|
|
Any text before this comment is to be ignored.
|
|
|
|
The characters used are:
|
|
|
|
!"#$%&'()*+,- 012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr
|
|
|
|
IMPLEMENTATION NOTES:
|
|
|
|
There are older variants referred to in RFC 1741, but I have only come
|
|
across encodings in current use as separate MIME parts, which this
|
|
implementation is targetted at.
|
|
|
|
When encoding into BinHex4, you do NOT have to implement the run-length
|
|
encoding (the character 90 for sequences of repeating characters), and
|
|
this encoder does not do it. The CRC values generated in the header have
|
|
NOT been tested (because this decoder ignores them).
|
|
|
|
The decoder has to allow for the run-length encoding. The decoder works
|
|
irrespective of whether it is preceded by the identification string
|
|
or not (GBinHex4IdentificationString below). The string to be decoded must
|
|
include the starting and ending colons. It can deal with embedded CR and LFs.
|
|
Unlike base64 and quoted-printable, we cannot decode line-by-line cleanly,
|
|
because the lines do not contain a clean number of 4-byte blocks due to the
|
|
first line starting with a colon, leaving 63 bytes on that line, plus you have
|
|
the problem of dealing with the run-length encoding and stripping the header.
|
|
If the attachment only has a data fork, it is saved; if only a resource fork,
|
|
it is saved; if both, only the data fork is saved. The decoder does NOT
|
|
check that the CRC values are correct.
|
|
|
|
Indy units use the content-type to decide if the part is BinHex4:
|
|
Content-Type: application/mac-binhex40; name="myfile.ext"
|
|
|
|
WARNING: This code only implements BinHex4.0 when used as a part in a
|
|
MIME-encoded email. To have a part encoded, set the parts
|
|
ContentTransfer := 'binhex40'.
|
|
}
|
|
|
|
interface
|
|
|
|
{$i IdCompilerDefines.inc}
|
|
|
|
uses
|
|
Classes,
|
|
IdException,
|
|
IdCoder,
|
|
IdCoder3to4,
|
|
IdGlobal,
|
|
IdStream,
|
|
SysUtils;
|
|
|
|
type
|
|
TIdEncoderBinHex4 = class(TIdEncoder3to4)
|
|
protected
|
|
FFileName: String;
|
|
function GetCRC(const ABlock: TIdBytes; const AOffset: Integer = 0; const ASize: Integer = -1): Word;
|
|
procedure AddByteCRC(var ACRC: Word; AByte: Byte);
|
|
procedure InitComponent; override;
|
|
public
|
|
{$IFDEF WORKAROUND_INLINE_CONSTRUCTORS}
|
|
constructor Create(AOwner: TComponent); reintroduce; overload;
|
|
{$ENDIF}
|
|
procedure Encode(ASrcStream: TStream; ADestStream: TStream; const ABytes: Integer = -1); override;
|
|
//We need to specify this value before calling Encode...
|
|
property FileName: String read FFileName write FFileName;
|
|
end;
|
|
|
|
TIdDecoderBinHex4 = class(TIdDecoder4to3)
|
|
protected
|
|
procedure InitComponent; override;
|
|
public
|
|
{$IFDEF WORKAROUND_INLINE_CONSTRUCTORS}
|
|
constructor Create(AOwner: TComponent); reintroduce; overload;
|
|
{$ENDIF}
|
|
procedure Decode(ASrcStream: TStream; const ABytes: Integer = -1); override;
|
|
end;
|
|
|
|
const
|
|
//Note the 7th characeter is a ' which is represented in a string as ''
|
|
GBinHex4CodeTable: string = '!"#$%&''()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr'; {Do not Localize}
|
|
GBinHex4IdentificationString: string = '(This file must be converted with BinHex 4.0)'; {Do not Localize}
|
|
|
|
type
|
|
EIdMissingColon = class(EIdException);
|
|
EIdMissingFileName = class(EIdException);
|
|
|
|
var
|
|
GBinHex4DecodeTable: TIdDecodeTable;
|
|
|
|
implementation
|
|
|
|
uses
|
|
IdResourceStrings;
|
|
|
|
{ TIdDecoderBinHex4 }
|
|
|
|
{$IFDEF WORKAROUND_INLINE_CONSTRUCTORS}
|
|
constructor TIdDecoderBinHex4.Create(AOwner: TComponent);
|
|
begin
|
|
inherited Create(AOwner);
|
|
end;
|
|
{$ENDIF}
|
|
|
|
procedure TIdDecoderBinHex4.InitComponent;
|
|
begin
|
|
inherited InitComponent;
|
|
FDecodeTable := GBinHex4DecodeTable;
|
|
FCodingTable := ToBytes(GBinHex4CodeTable);
|
|
FFillChar := '='; {Do not Localize}
|
|
end;
|
|
|
|
procedure TIdDecoderBinHex4.Decode(ASrcStream: TStream; const ABytes: Integer = -1);
|
|
var
|
|
LCopyToPos: integer;
|
|
LIn : TIdBytes;
|
|
LInSize: Integer;
|
|
LOut: TIdBytes;
|
|
LN: Integer;
|
|
LRepetition: Integer;
|
|
LForkLength: Integer;
|
|
begin
|
|
LInSize := IndyLength(ASrcStream, ABytes);
|
|
if LInSize <= 0 then begin
|
|
Exit;
|
|
end;
|
|
|
|
SetLength(LIn, LInSize);
|
|
TIdStreamHelper.ReadBytes(ASrcStream, LIn, LInSize);
|
|
|
|
//We don't need to check if the identification string is present, since the
|
|
//attachment is bounded by a : at the start and end, and the identification
|
|
//string may have been stripped off already.
|
|
//While we are at it, remove all the CRs and LFs...
|
|
LCopyToPos := -1;
|
|
for LN := 0 to LInSize-1 do begin
|
|
if LIn[LN] = 58 then begin //Ascii 58 is a colon :
|
|
if LCopyToPos = -1 then begin
|
|
//This is the start of the file...
|
|
LCopyToPos := 0;
|
|
end else begin
|
|
//This is the second :, i.e. the end of the file...
|
|
SetLength(LIn, LCopyToPos);
|
|
LCopyToPos := -2; //Flag that we got an end marker
|
|
Break;
|
|
end;
|
|
end else begin
|
|
if (LCopyToPos > -1) and (not ByteIsInEOL(LIn, LN)) then begin
|
|
LIn[LCopyToPos] := LIn[LN];
|
|
Inc(LCopyToPos);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
//did we get the initial colon?
|
|
if LCopyToPos = -1 then begin
|
|
raise EIdMissingColon.Create('Block passed to TIdDecoderBinHex4.Decode is missing a starting colon :'); {Do not Localize}
|
|
end;
|
|
//did we get the terminating colon?
|
|
if LCopyToPos <> -2 then begin
|
|
raise EIdMissingColon.Create('Block passed to TIdDecoderBinHex4.Decode is missing a terminating colon :'); {Do not Localize}
|
|
end;
|
|
|
|
if Length(LIn) = 0 then begin
|
|
Exit;
|
|
end;
|
|
|
|
LOut := InternalDecode(LIn);
|
|
|
|
// Now expand the run-length encoding.
|
|
// $90 is the marker, encoding is made for 3->255 characters
|
|
// 00 11 22 33 44 55 66 77 -> 00 11 22 33 44 55 66 77
|
|
// 11 22 22 22 22 22 22 33 -> 11 22 90 06 33
|
|
// 11 22 90 33 44 -> 11 22 90 00 33 44
|
|
LN := 0;
|
|
while LN < Length(LOut) do begin
|
|
if LOut[LN] = $90 then begin
|
|
LRepetition := LOut[LN+1];
|
|
if LRepetition = 0 then begin
|
|
//90 is by itself, so just remove the 00
|
|
//22 90 00 -> 22 90
|
|
RemoveBytes(LOut, LN+1, 1);
|
|
Inc(LN); //Move past the $90
|
|
end
|
|
else if LRepetition = 1 then begin
|
|
//Not allowed: 22 90 01 -> 22
|
|
//Throw an exception or deal with it? Deal with it.
|
|
RemoveBytes(LOut, LN, 2);
|
|
end
|
|
else if LRepetition = 2 then begin
|
|
//Not allowed: 22 90 02 -> 22 22
|
|
//Throw an exception or deal with it? Deal with it.
|
|
LOut[LN] := LOut[LN-1];
|
|
RemoveBytes(LOut, LN+1, 1);
|
|
Inc(LN);
|
|
end
|
|
else if LRepetition = 3 then begin
|
|
//22 90 03 -> 22 22 22
|
|
LOut[LN] := LOut[LN-1];
|
|
LOut[LN+1] := LOut[LN-1];
|
|
Inc(LN, 2);
|
|
end
|
|
else begin
|
|
//Repetition is 4 to 255: expand the sequence.
|
|
//22 90 04 -> 22 22 22 22
|
|
LOut[LN] := LOut[LN-1];
|
|
LOut[LN+1] := LOut[LN-1];
|
|
ExpandBytes(LOut, LN+2, LRepetition-2, LOut[LN-1]);
|
|
Inc(LN, LRepetition-1);
|
|
end;
|
|
end else begin
|
|
Inc(LN);
|
|
end;
|
|
end;
|
|
|
|
//We are not finished yet. Strip off the header, by calculating the offset
|
|
//of the start of the attachment and it's length.
|
|
LN := 1 + LOut[0]; //Length byte + length of filename
|
|
Inc(LN, 1 + 4 + 4 + 2); //Version, type, creator, flags
|
|
// TODO: use one of the BytesTo...() functions here instead?
|
|
LForkLength := (((((LOut[LN]*256)+LOut[LN+1])*256)+LOut[LN+2])*256)+LOut[LN+3];
|
|
Inc(LN, 4); //Go past the data fork length
|
|
if LForkLength = 0 then begin
|
|
//No data fork present, save the resource fork instead...
|
|
// TODO: use one of the BytesTo...() functions here instead?
|
|
LForkLength := (((((LOut[LN]*256)+LOut[LN+1])*256)+LOut[LN+2])*256)+LOut[LN+3];
|
|
end;
|
|
Inc(LN, 4); //Go past the resource fork length
|
|
Inc(LN, 2); //CRC
|
|
|
|
//At this point, LOut[LN] points to the actual data (the data fork, if there
|
|
//is one, or else the resource fork if there is no data fork).
|
|
if Assigned(FStream) then begin
|
|
TIdStreamHelper.Write(FStream, LOut, LForkLength, LN);
|
|
end;
|
|
end;
|
|
|
|
{ TIdEncoderBinHex4 }
|
|
|
|
{$IFDEF WORKAROUND_INLINE_CONSTRUCTORS}
|
|
constructor TIdEncoderBinHex4.Create(AOwner: TComponent);
|
|
begin
|
|
inherited Create(AOwner);
|
|
end;
|
|
{$ENDIF}
|
|
|
|
procedure TIdEncoderBinHex4.InitComponent;
|
|
begin
|
|
inherited InitComponent;
|
|
FCodingTable := ToBytes(GBinHex4CodeTable);
|
|
FFillChar := '='; {Do not Localize}
|
|
end;
|
|
|
|
function TIdEncoderBinHex4.GetCRC(const ABlock: TIdBytes; const AOffset: Integer = 0;
|
|
const ASize: Integer = -1): Word;
|
|
var
|
|
LN: Integer;
|
|
LActual: Integer;
|
|
begin
|
|
Result := 0;
|
|
LActual := IndyLength(ABlock, ASize, AOffset);
|
|
if LActual > 0 then
|
|
begin
|
|
for LN := 0 to LActual-1 do begin
|
|
AddByteCRC(Result, ABlock[AOffset+LN]);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure TIdEncoderBinHex4.AddByteCRC(var ACRC: Word; AByte: Byte);
|
|
//BinHex 4.0 uses a 16-bit CRC with an 0x1021 seed.
|
|
var
|
|
LWillShiftedOutBitBeA1: boolean;
|
|
LN: integer;
|
|
begin
|
|
for LN := 1 to 8 do begin
|
|
LWillShiftedOutBitBeA1 := (ACRC and $8000) <> 0;
|
|
//Shift the CRC left, and add the next bit from our byte...
|
|
ACRC := (ACRC shl 1) or (AByte shr 7);
|
|
if LWillShiftedOutBitBeA1 then begin
|
|
ACRC := ACRC xor $1021;
|
|
end;
|
|
AByte := (AByte shl 1) and $FF;
|
|
end;
|
|
end;
|
|
|
|
procedure TIdEncoderBinHex4.Encode(ASrcStream: TStream; ADestStream: TStream; const ABytes: Integer = -1);
|
|
var
|
|
LN: Integer;
|
|
LOffset: Integer;
|
|
LBlocks: Integer;
|
|
LOut: TIdBytes;
|
|
LSSize, LTemp: Integer;
|
|
LFileName: {$IFDEF HAS_AnsiString}AnsiString{$ELSE}TIdBytes{$ENDIF};
|
|
LCRC: word;
|
|
LRemainder: integer;
|
|
begin
|
|
if FFileName = '' then begin
|
|
raise EIdMissingFileName.Create('Data passed to TIdEncoderBinHex4.Encode is missing a filename'); {Do not Localize}
|
|
end;
|
|
//Read in the attachment first...
|
|
LSSize := IndyLength(ASrcStream, ABytes);
|
|
//BinHex4.0 allows filenames to be only 255 bytes long (because the length
|
|
//is stored in a byte), so truncate the filename to 255 bytes...
|
|
{$IFNDEF HAS_AnsiString}
|
|
LFileName := IndyTextEncoding_OSDefault.GetBytes(FFileName);
|
|
{$ELSE}
|
|
{$IFDEF STRING_IS_UNICODE}
|
|
LFileName := AnsiString(FFileName); // explicit convert to Ansi
|
|
{$ELSE}
|
|
LFileName := FFileName;
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
if Length(FFileName) > 255 then begin
|
|
SetLength(LFileName, 255);
|
|
end;
|
|
//Construct the header...
|
|
SetLength(LOut, 1+Length(LFileName)+1+4+4+2+4+4+2+LSSize+2);
|
|
LOut[0] := Length(LFileName); //Length of filename in 1st byte
|
|
for LN := 1 to Length(LFileName) do begin
|
|
LOut[LN] := {$IFNDEF HAS_AnsiString}LFileName[LN-1]{$ELSE}Byte(LFileName[LN]){$ENDIF};
|
|
end;
|
|
LOffset := 1+Length(LFileName); //Points to byte after filename
|
|
LOut[LOffset] := 0; //Version
|
|
Inc(LOffset);
|
|
for LN := 0 to 7 do begin
|
|
LOut[LOffset+LN] := 32; //Use spaces for Type & Creator
|
|
end;
|
|
Inc(LOffset, 8);
|
|
LOut[LOffset] := 0; //Flags
|
|
LOut[LOffset] := 0; //Flags
|
|
Inc(LOffset, 2);
|
|
LTemp := LSSize;
|
|
LOut[LOffset] := LTemp mod 256; //Length of data fork
|
|
LTemp := LTemp div 256;
|
|
LOut[LOffset+1] := LTemp mod 256; //Length of data fork
|
|
LTemp := LTemp div 256;
|
|
LOut[LOffset+2] := LTemp mod 256; //Length of data fork
|
|
LTemp := LTemp div 256;
|
|
LOut[LOffset+3] := LTemp; //Length of data fork
|
|
Inc(LOffset, 4);
|
|
LOut[LOffset] := 0; //Length of resource fork
|
|
LOut[LOffset+1] := 0; //Length of resource fork
|
|
LOut[LOffset+2] := 0; //Length of resource fork
|
|
LOut[LOffset+3] := 0; //Length of resource fork
|
|
Inc(LOffset, 4);
|
|
//Next comes the CRC for the header...
|
|
LCRC := GetCRC(LOut, 0, LOffset);
|
|
LOut[LOffset] := LCRC mod 256; //CRC of data fork
|
|
LCRC := LCRC div 256;
|
|
LOut[LOffset+1] := LCRC; //CRC of data fork
|
|
Inc(LOffset, 2);
|
|
//Next comes the data fork (we will not be using the resource fork)...
|
|
//Copy in the attachment...
|
|
TIdStreamHelper.ReadBytes(ASrcStream, LOut, LSSize, LOffset);
|
|
LCRC := GetCRC(LOut, LOffset, LSSize);
|
|
Inc(LOffset, LSSize);
|
|
LOut[LOffset] := LCRC mod 256; //CRC of data fork
|
|
LCRC := LCRC div 256;
|
|
LOut[LOffset+1] := LCRC; //CRC of data fork
|
|
Inc(LOffset, 2);
|
|
//To prepare for the 3to4 encoder, make sure our block is a multiple of 3...
|
|
LSSize := LOffset mod 3;
|
|
if LSSize > 0 then begin
|
|
ExpandBytes(LOut, LOffset, 3-LSSize);
|
|
end;
|
|
//We now need to 3to4 encode LOut...
|
|
//TODO: compress repetitive bytes to "<byte> $90 <run length>"
|
|
LOut := InternalEncode(LOut);
|
|
//Need to add a colon at the start & end of the block...
|
|
InsertByte(LOut, 58, 0);
|
|
AppendByte(LOut, 58);
|
|
//Expand any bare $90 to $90 $00
|
|
LN := 0;
|
|
while LN < Length(LOut) do begin
|
|
if LOut[LN] = $90 then begin
|
|
InsertByte(LOut, 0, LN+1);
|
|
Inc(LN);
|
|
end;
|
|
Inc(LN);
|
|
end;
|
|
|
|
WriteStringToStream(ADestStream, GBinHex4IdentificationString + EOL);
|
|
|
|
//Put back in our CRLFs. A max of 64 chars are allowed per line.
|
|
LBlocks := Length(LOut) div 64;
|
|
for LN := 0 to LBlocks-1 do begin
|
|
TIdStreamHelper.Write(ADestStream, LOut, 64, LN*64);
|
|
WriteStringToStream(ADestStream, EOL);
|
|
end;
|
|
LRemainder := Length(LOut) mod 64;
|
|
if LRemainder > 0 then begin
|
|
TIdStreamHelper.Write(ADestStream, LOut, LRemainder, LBlocks*64);
|
|
WriteStringToStream(ADestStream, EOL);
|
|
end;
|
|
end;
|
|
|
|
initialization
|
|
TIdDecoder4to3.ConstructDecodeTable(GBinHex4CodeTable, GBinHex4DecodeTable);
|
|
end.
|
|
|