restemplate/indy/Protocols/IdHashSHA1.pas

388 lines
13 KiB
Plaintext

{
$Project$
$Workfile$
$Revision$
$DateUTC$
$Id$
This file is part of the Indy (Internet Direct) project, and is offered
under the dual-licensing agreement described on the Indy website.
(http://www.indyproject.org/)
Copyright:
(c) 1993-2005, Chad Z. Hower and the Indy Pit Crew. All rights reserved.
}
{
$Log$
}
{
Rev 1.6 2003-10-12 15:25:50 HHellström
Comments added
Rev 1.5 2003-10-12 03:08:24 HHellström
New implementation; copyright changed. The source code formatting has been
adjusted to fit the margins. The new implementation is faster on dotNet
compared to the old one, but is slightly slower on Win32.
Rev 1.4 2003-10-11 18:44:54 HHellström
Range checking and overflow checking disabled in the Coder method only. The
purpose of this setting is to force the arithmetic operations performed on
LongWord variables to be modulo $100000000. This hack entails reasonable
performance on both Win32 and dotNet.
Rev 1.3 10/10/2003 2:20:56 PM GGrieve
turn range checking off
Rev 1.2 2003-09-21 17:31:02 HHellström Version: 1.2
DotNET compatibility
Rev 1.1 2/16/2003 03:19:18 PM JPMugaas
Should now compile on D7 better.
Rev 1.0 11/13/2002 07:53:48 AM JPMugaas
}
unit IdHashSHA1;
interface
{$i IdCompilerDefines.inc}
uses
Classes,
IdGlobal, IdHash;
type
T5x4LongWordRecord = array[0..4] of UInt32;
T512BitRecord = array [0..63] of Byte;
TIdHashSHA1 = class(TIdHash)
protected
FCheckSum: T5x4LongWordRecord;
FCBuffer: TIdBytes;
procedure Coder;
function GetHashBytes(AStream: TStream; ASize: TIdStreamSize): TIdBytes; override;
function HashToHex(const AHash: TIdBytes): String; override;
public
constructor Create; override;
end;
implementation
{$IFNDEF DOTNET}
uses
IdStreamVCL;
{$ENDIF}
{ TIdHashSHA1 }
function SwapLongWord(const AValue: UInt32): UInt32;
begin
Result := ((AValue and $FF) shl 24) or ((AValue and $FF00) shl 8) or ((AValue and $FF0000) shr 8) or ((AValue and $FF000000) shr 24);
end;
constructor TIdHashSHA1.Create;
begin
inherited Create;
SetLength(FCBuffer, 64);
end;
{$Q-,R-} // Operations performed modulo $100000000
procedure TIdHashSHA1.Coder;
var
T, A, B, C, D, E: UInt32;
{ The size of the W variable has been reduced to make the Coder method
consume less memory on dotNet. This change has been tested with the v1.1
framework and entails a general increase of performance by >50%. }
W: array [0..19] of UInt32;
i: UInt32;
begin
{ The first 16 W values are identical to the input block with endian
conversion. }
for i := 0 to 15 do
begin
W[i]:= (FCBuffer[i*4] shl 24) or
(FCBuffer[i*4+1] shl 16) or
(FCBuffer[i*4+2] shl 8) or
FCBuffer[i*4+3];
end;
{ In normal x86 code all of the remaining 64 W values would be calculated
here. Here only the four next values are calculated, to reduce the code
size of the first of the four loops below. }
for i := 16 to 19 do
begin
T := W[i-3] xor W[i-8] xor W[i-14] xor W[i-16];
W[i] := (T shl 1) or (T shr 31);
end;
A := FCheckSum[0];
B := FCheckSum[1];
C := FCheckSum[2];
D := FCheckSum[3];
E := FCheckSum[4];
{ The following loop could be expanded, but has been kept together to reduce
the code size. A small code size entails better performance due to CPU
caching.
Note that the code size could be reduced further by using the SHA-1
reference code:
for i := 0 to 19 do begin
T := E + (A shl 5) + (A shr 27) + (D xor (B and (C xor D))) + W[i];
Inc(T,$5A827999);
E := D;
D := C;
C := (B shl 30) + (B shr 2);
B := A;
A := T;
end;
The reference code is usually (at least partly) expanded, mostly because
the assignments that circle the state variables A, B, C, D and E are costly,
in particular on dotNET. (In x86 code further optimization can be achieved
by eliminating the loop variable, which occupies a CPU register that is
better used by one of the state variables, plus by expanding the W array
at the beginning.) }
i := 0;
repeat
Inc(E,(A shl 5) + (A shr 27) + (D xor (B and (C xor D))) + W[i+0]);
Inc(E,$5A827999);
B := (B shl 30) + (B shr 2);
Inc(D,(E shl 5) + (E shr 27) + (C xor (A and (B xor C))) + W[i+1]);
Inc(D,$5A827999);
A := (A shl 30) + (A shr 2);
Inc(C,(D shl 5) + (D shr 27) + (B xor (E and (A xor B))) + W[i+2]);
Inc(C,$5A827999);
E := (E shl 30) + (E shr 2);
Inc(B,(C shl 5) + (C shr 27) + (A xor (D and (E xor A))) + W[i+3]);
Inc(B,$5A827999);
D := (D shl 30) + (D shr 2);
Inc(A,(B shl 5) + (B shr 27) + (E xor (C and (D xor E))) + W[i+4]);
Inc(A,$5A827999);
C := (C shl 30) + (C shr 2);
Inc(i,5);
until i = 20;
{ The following three loops will only use the first 16 elements of the W
array in a circular, recursive pattern. The following assignments are a
trade-off to avoid having to split up the first loop. }
W[0] := W[16];
W[1] := W[17];
W[2] := W[18];
W[3] := W[19];
{ In the following three loops the recursive W array expansion is performed
"just in time" following a circular pattern. Using circular indicies (e.g.
(i+2) and $F) is not free, but the cost of declaring a large W array would
be higher on dotNET. Before attempting to optimize this code, please note
that the following language features are also costly:
* Assignments and moves/copies, in particular on dotNET
* Constant lookup tables, in particular on dotNET
* Sub functions, in particular on x86
* if..then and case..of. }
i := 20;
repeat
T := W[(i+13) and $F] xor W[(i+8) and $F];
T := T xor W[(i+2) and $F] xor W[i and $F];
T := (T shl 1) or (T shr 31);
W[i and $F] := T;
Inc(E,(A shl 5) + (A shr 27) + (B xor C xor D) + T + $6ED9EBA1);
B := (B shl 30) + (B shr 2);
T := W[(i+14) and $F] xor W[(i+9) and $F];
T := T xor W[(i+3) and $F] xor W[(i+1) and $F];
T := (T shl 1) or (T shr 31);
W[(i+1) and $F] := T;
Inc(D,(E shl 5) + (E shr 27) + (A xor B xor C) + T + $6ED9EBA1);
A := (A shl 30) + (A shr 2);
T := W[(i+15) and $F] xor W[(i+10) and $F];
T := T xor W[(i+4) and $F] xor W[(i+2) and $F];
T := (T shl 1) or (T shr 31);
W[(i+2) and $F] := T;
Inc(C,(D shl 5) + (D shr 27) + (E xor A xor B) + T + $6ED9EBA1);
E := (E shl 30) + (E shr 2);
T := W[i and $F] xor W[(i+11) and $F];
T := T xor W[(i+5) and $F] xor W[(i+3) and $F];
T := (T shl 1) or (T shr 31);
W[(i+3) and $F] := T;
Inc(B,(C shl 5) + (C shr 27) + (D xor E xor A) + T + $6ED9EBA1);
D := (D shl 30) + (D shr 2);
T := W[(i+1) and $F] xor W[(i+12) and $F];
T := T xor W[(i+6) and $F] xor W[(i+4) and $F];
T := (T shl 1) or (T shr 31);
W[(i+4) and $F] := T;
Inc(A,(B shl 5) + (B shr 27) + (C xor D xor E) + T + $6ED9EBA1);
C := (C shl 30) + (C shr 2);
Inc(i,5);
until i = 40;
{ Note that the constant $70E44324 = $100000000 - $8F1BBCDC has been selected
to slightly reduce the probability that the CPU flag C (Carry) is set. This
trick is taken from the StreamSec(R) StrSecII(TM) implementation of SHA-1.
It entails a marginal but measurable performance gain on some CPUs. }
i := 40;
repeat
T := W[(i+13) and $F] xor W[(i+8) and $F];
T := T xor W[(i+2) and $F] xor W[i and $F];
T := (T shl 1) or (T shr 31);
W[i and $F] := T;
Inc(E,(A shl 5) + (A shr 27) + ((B and C) or (D and (B or C))) + T);
Dec(E,$70E44324);
B := (B shl 30) + (B shr 2);
T := W[(i+14) and $F] xor W[(i+9) and $F];
T := T xor W[(i+3) and $F] xor W[(i+1) and $F];
T := (T shl 1) or (T shr 31);
W[(i+1) and $F] := T;
Inc(D,(E shl 5) + (E shr 27) + ((A and B) or (C and (A or B))) + T);
Dec(D,$70E44324);
A := (A shl 30) + (A shr 2);
T := W[(i+15) and $F] xor W[(i+10) and $F];
T := T xor W[(i+4) and $F] xor W[(i+2) and $F];
T := (T shl 1) or (T shr 31);
W[(i+2) and $F] := T;
Inc(C,(D shl 5) + (D shr 27) + ((E and A) or (B and (E or A))) + T);
Dec(C,$70E44324);
E := (E shl 30) + (E shr 2);
T := W[i and $F] xor W[(i+11) and $F];
T := T xor W[(i+5) and $F] xor W[(i+3) and $F];
T := (T shl 1) or (T shr 31);
W[(i+3) and $F] := T;
Inc(B,(C shl 5) + (C shr 27) + ((D and E) or (A and (D or E))) + T);
Dec(B,$70E44324);
D := (D shl 30) + (D shr 2);
T := W[(i+1) and $F] xor W[(i+12) and $F];
T := T xor W[(i+6) and $F] xor W[(i+4) and $F];
T := (T shl 1) or (T shr 31);
W[(i+4) and $F] := T;
Inc(A,(B shl 5) + (B shr 27) + ((C and D) or (E and (C or D))) + T);
Dec(A,$70E44324);
C := (C shl 30) + (C shr 2);
Inc(i,5);
until i = 60;
{ Note that the constant $359D3E2A = $100000000 - $CA62C1D6 has been selected
to slightly reduce the probability that the CPU flag C (Carry) is set. This
trick is taken from the StreamSec(R) StrSecII(TM) implementation of SHA-1.
It entails a marginal but measurable performance gain on some CPUs. }
repeat
T := W[(i+13) and $F] xor W[(i+8) and $F];
T := T xor W[(i+2) and $F] xor W[i and $F];
T := (T shl 1) or (T shr 31);
W[i and $F] := T;
Inc(E,(A shl 5) + (A shr 27) + (B xor C xor D) + T - $359D3E2A);
B := (B shl 30) + (B shr 2);
T := W[(i+14) and $F] xor W[(i+9) and $F];
T := T xor W[(i+3) and $F] xor W[(i+1) and $F];
T := (T shl 1) or (T shr 31);
W[(i+1) and $F] := T;
Inc(D,(E shl 5) + (E shr 27) + (A xor B xor C) + T - $359D3E2A);
A := (A shl 30) + (A shr 2);
T := W[(i+15) and $F] xor W[(i+10) and $F];
T := T xor W[(i+4) and $F] xor W[(i+2) and $F];
T := (T shl 1) or (T shr 31);
W[(i+2) and $F] := T;
Inc(C,(D shl 5) + (D shr 27) + (E xor A xor B) + T - $359D3E2A);
E := (E shl 30) + (E shr 2);
T := W[i and $F] xor W[(i+11) and $F];
T := T xor W[(i+5) and $F] xor W[(i+3) and $F];
T := (T shl 1) or (T shr 31);
W[(i+3) and $F] := T;
Inc(B,(C shl 5) + (C shr 27) + (D xor E xor A) + T - $359D3E2A);
D := (D shl 30) + (D shr 2);
T := W[(i+1) and $F] xor W[(i+12) and $F];
T := T xor W[(i+6) and $F] xor W[(i+4) and $F];
T := (T shl 1) or (T shr 31);
W[(i+4) and $F] := T;
Inc(A,(B shl 5) + (B shr 27) + (C xor D xor E) + T - $359D3E2A);
C := (C shl 30) + (C shr 2);
Inc(i,5);
until i = 80;
FCheckSum[0]:= FCheckSum[0] + A;
FCheckSum[1]:= FCheckSum[1] + B;
FCheckSum[2]:= FCheckSum[2] + C;
FCheckSum[3]:= FCheckSum[3] + D;
FCheckSum[4]:= FCheckSum[4] + E;
end;
function TIdHashSHA1.GetHashBytes(AStream: TStream; ASize: TIdStreamSize): TIdBytes;
var
LSize: Integer;
LLenHi: UInt32;
LLenLo: UInt32;
I: Integer;
begin
Result := nil;
FCheckSum[0] := $67452301;
FCheckSum[1] := $EFCDAB89;
FCheckSum[2] := $98BADCFE;
FCheckSum[3] := $10325476;
FCheckSum[4] := $C3D2E1F0;
LLenHi := 0;
LLenLo := 0;
// Code the entire file in complete 64-byte chunks.
while ASize >= 64 do begin
LSize := ReadTIdBytesFromStream(AStream, FCBuffer, 64);
// TODO: handle stream read error
Inc(LLenLo, LSize * 8);
if LLenLo < UInt32(LSize * 8) then begin
Inc(LLenHi);
end;
Coder;
Dec(ASize, LSize);
end;
// Read the last set of bytes.
LSize := ReadTIdBytesFromStream(AStream, FCBuffer, ASize);
// TODO: handle stream read error
Inc(LLenLo, LSize * 8);
if LLenLo < UInt32(LSize * 8) then begin
Inc(LLenHi);
end;
FCBuffer[LSize] := $80;
if LSize >= 56 then begin
for I := (LSize + 1) to 63 do begin
FCBuffer[i] := 0;
end;
Coder;
LSize := -1;
end;
for I := (LSize + 1) to 55 do begin
FCBuffer[i] := 0;
end;
FCBuffer[56] := (LLenHi shr 24);
FCBuffer[57] := (LLenHi shr 16) and $FF;
FCBuffer[58] := (LLenHi shr 8) and $FF;
FCBuffer[59] := (LLenHi and $FF);
FCBuffer[60] := (LLenLo shr 24);
FCBuffer[61] := (LLenLo shr 16) and $FF;
FCBuffer[62] := (LLenLo shr 8) and $FF;
FCBuffer[63] := (LLenLo and $FF);
Coder;
FCheckSum[0] := SwapLongWord(FCheckSum[0]);
FCheckSum[1] := SwapLongWord(FCheckSum[1]);
FCheckSum[2] := SwapLongWord(FCheckSum[2]);
FCheckSum[3] := SwapLongWord(FCheckSum[3]);
FCheckSum[4] := SwapLongWord(FCheckSum[4]);
SetLength(Result, SizeOf(UInt32)*5);
for I := 0 to 4 do begin
CopyTIdUInt32(FCheckSum[I], Result, SizeOf(UInt32)*I);
end;
end;
function TIdHashSHA1.HashToHex(const AHash: TIdBytes): String;
begin
Result := LongWordHashToHex(AHash, 5);
end;
end.