{ $Project$ $Workfile$ $Revision$ $DateUTC$ $Id$ This file is part of the Indy (Internet Direct) project, and is offered under the dual-licensing agreement described on the Indy website. (http://www.indyproject.org/) Copyright: (c) 1993-2005, Chad Z. Hower and the Indy Pit Crew. All rights reserved. } { $Log$ } { Rev 1.6 2003-10-12 15:25:50 HHellström Comments added Rev 1.5 2003-10-12 03:08:24 HHellström New implementation; copyright changed. The source code formatting has been adjusted to fit the margins. The new implementation is faster on dotNet compared to the old one, but is slightly slower on Win32. Rev 1.4 2003-10-11 18:44:54 HHellström Range checking and overflow checking disabled in the Coder method only. The purpose of this setting is to force the arithmetic operations performed on UInt32 variables to be modulo $100000000. This hack entails reasonable performance on both Win32 and dotNet. Rev 1.3 10/10/2003 2:20:56 PM GGrieve turn range checking off Rev 1.2 2003-09-21 17:31:02 HHellström Version: 1.2 DotNET compatibility Rev 1.1 2/16/2003 03:19:18 PM JPMugaas Should now compile on D7 better. Rev 1.0 11/13/2002 07:53:48 AM JPMugaas } unit IdHashSHA; interface {$i IdCompilerDefines.inc} uses Classes, IdFIPS, IdGlobal, IdHash; { Microsoft.NET notes!!!! In Microsoft.NET, there are some limitations that you need to be aware of. 1) In Microsoft.NET 1.1, 2.0, and 3.0, only the CryptoService SHA1 class is FIPS-complient. Unfortunately, SHA1 will not be permitted after 2010. 2) In Microsoft.NET 3.5,There are more classes ending in CryptoServiceProvider" or "Cng" that are complient. 3) SHA224 is not exposed. } type T5x4LongWordRecord = array[0..4] of UInt32; T512BitRecord = array [0..63] of Byte; {$IFNDEF DOTNET} TIdHashSHA1 = class(TIdHashNativeAndIntF) {$ELSE} TIdHashSHA1 = class(TIdHashIntF) {$ENDIF} protected {$IFNDEF DOTNET} FCheckSum: T5x4LongWordRecord; FCBuffer: TIdBytes; procedure Coder; function NativeGetHashBytes(AStream: TStream; ASize: TIdStreamSize): TIdBytes; override; function HashToHex(const AHash: TIdBytes): String; override; {$ENDIF} function InitHash : TIdHashIntCtx; override; public {$IFDEF DOTNET} class function IsAvailable : Boolean; override; {$ELSE} constructor Create; override; {$ENDIF} class function IsIntfAvailable: Boolean; override; end; {$IFNDEF DOTNET} TIdHashSHA224 = class(TIdHashIntF) protected function InitHash : TIdHashIntCtx; override; public class function IsAvailable : Boolean; override; end; {$ENDIF} TIdHashSHA256 = class(TIdHashIntF) protected function InitHash : TIdHashIntCtx; override; public class function IsAvailable : Boolean; override; end; TIdHashSHA384 = class(TIdHashIntF) protected function InitHash : TIdHashIntCtx; override; public class function IsAvailable : Boolean; override; end; TIdHashSHA512 = class(TIdHashIntF) protected function InitHash : TIdHashIntCtx; override; public class function IsAvailable : Boolean; override; end; implementation uses {$IFDEF DOTNET} IdStreamNET; {$ELSE} IdStreamVCL; {$ENDIF} { TIdHashSHA1 } {$IFDEF DOTNET} function TIdHashSHA1.GetHashInst : TIdHashInst; begin //You can not use SHA256Managed for FIPS complience. Result := System.Security.Cryptography.SHA1CryptoServiceProvider.Create; end; class function TIdHashSHA1.IsIntfAvailable : Boolean; begin Result := True; end; class function TIdHashSHA1.IsAvailable : Boolean; begin Result := True; end; {$ELSE} function SwapLongWord(const AValue: UInt32): UInt32; begin Result := ((AValue and $FF) shl 24) or ((AValue and $FF00) shl 8) or ((AValue and $FF0000) shr 8) or ((AValue and $FF000000) shr 24); end; constructor TIdHashSHA1.Create; begin inherited Create; SetLength(FCBuffer, 64); end; function TIdHashSHA1.InitHash: TIdHashIntCtx; begin Result := GetSHA1HashInst; end; class function TIdHashSHA1.IsIntfAvailable: Boolean; begin Result := IsHashingIntfAvail and IsSHA1HashIntfAvail; end; {$Q-,R-} // Operations performed modulo $100000000 procedure TIdHashSHA1.Coder; var T, A, B, C, D, E: UInt32; { The size of the W variable has been reduced to make the Coder method consume less memory on dotNet. This change has been tested with the v1.1 framework and entails a general increase of performance by >50%. } W: array [0..19] of UInt32; i: UInt32; begin { The first 16 W values are identical to the input block with endian conversion. } for i := 0 to 15 do begin W[i]:= (FCBuffer[i*4] shl 24) or (FCBuffer[i*4+1] shl 16) or (FCBuffer[i*4+2] shl 8) or FCBuffer[i*4+3]; end; { In normal x86 code all of the remaining 64 W values would be calculated here. Here only the four next values are calculated, to reduce the code size of the first of the four loops below. } for i := 16 to 19 do begin T := W[i-3] xor W[i-8] xor W[i-14] xor W[i-16]; W[i] := (T shl 1) or (T shr 31); end; A := FCheckSum[0]; B := FCheckSum[1]; C := FCheckSum[2]; D := FCheckSum[3]; E := FCheckSum[4]; { The following loop could be expanded, but has been kept together to reduce the code size. A small code size entails better performance due to CPU caching. Note that the code size could be reduced further by using the SHA-1 reference code: for i := 0 to 19 do begin T := E + (A shl 5) + (A shr 27) + (D xor (B and (C xor D))) + W[i]; Inc(T,$5A827999); E := D; D := C; C := (B shl 30) + (B shr 2); B := A; A := T; end; The reference code is usually (at least partly) expanded, mostly because the assignments that circle the state variables A, B, C, D and E are costly, in particular on dotNET. (In x86 code further optimization can be achieved by eliminating the loop variable, which occupies a CPU register that is better used by one of the state variables, plus by expanding the W array at the beginning.) } i := 0; repeat Inc(E,(A shl 5) + (A shr 27) + (D xor (B and (C xor D))) + W[i+0]); Inc(E,$5A827999); B := (B shl 30) + (B shr 2); Inc(D,(E shl 5) + (E shr 27) + (C xor (A and (B xor C))) + W[i+1]); Inc(D,$5A827999); A := (A shl 30) + (A shr 2); Inc(C,(D shl 5) + (D shr 27) + (B xor (E and (A xor B))) + W[i+2]); Inc(C,$5A827999); E := (E shl 30) + (E shr 2); Inc(B,(C shl 5) + (C shr 27) + (A xor (D and (E xor A))) + W[i+3]); Inc(B,$5A827999); D := (D shl 30) + (D shr 2); Inc(A,(B shl 5) + (B shr 27) + (E xor (C and (D xor E))) + W[i+4]); Inc(A,$5A827999); C := (C shl 30) + (C shr 2); Inc(i,5); until i = 20; { The following three loops will only use the first 16 elements of the W array in a circular, recursive pattern. The following assignments are a trade-off to avoid having to split up the first loop. } W[0] := W[16]; W[1] := W[17]; W[2] := W[18]; W[3] := W[19]; { In the following three loops the recursive W array expansion is performed "just in time" following a circular pattern. Using circular indicies (e.g. (i+2) and $F) is not free, but the cost of declaring a large W array would be higher on dotNET. Before attempting to optimize this code, please note that the following language features are also costly: * Assignments and moves/copies, in particular on dotNET * Constant lookup tables, in particular on dotNET * Sub functions, in particular on x86 * if..then and case..of. } i := 20; repeat T := W[(i+13) and $F] xor W[(i+8) and $F]; T := T xor W[(i+2) and $F] xor W[i and $F]; T := (T shl 1) or (T shr 31); W[i and $F] := T; Inc(E,(A shl 5) + (A shr 27) + (B xor C xor D) + T + $6ED9EBA1); B := (B shl 30) + (B shr 2); T := W[(i+14) and $F] xor W[(i+9) and $F]; T := T xor W[(i+3) and $F] xor W[(i+1) and $F]; T := (T shl 1) or (T shr 31); W[(i+1) and $F] := T; Inc(D,(E shl 5) + (E shr 27) + (A xor B xor C) + T + $6ED9EBA1); A := (A shl 30) + (A shr 2); T := W[(i+15) and $F] xor W[(i+10) and $F]; T := T xor W[(i+4) and $F] xor W[(i+2) and $F]; T := (T shl 1) or (T shr 31); W[(i+2) and $F] := T; Inc(C,(D shl 5) + (D shr 27) + (E xor A xor B) + T + $6ED9EBA1); E := (E shl 30) + (E shr 2); T := W[i and $F] xor W[(i+11) and $F]; T := T xor W[(i+5) and $F] xor W[(i+3) and $F]; T := (T shl 1) or (T shr 31); W[(i+3) and $F] := T; Inc(B,(C shl 5) + (C shr 27) + (D xor E xor A) + T + $6ED9EBA1); D := (D shl 30) + (D shr 2); T := W[(i+1) and $F] xor W[(i+12) and $F]; T := T xor W[(i+6) and $F] xor W[(i+4) and $F]; T := (T shl 1) or (T shr 31); W[(i+4) and $F] := T; Inc(A,(B shl 5) + (B shr 27) + (C xor D xor E) + T + $6ED9EBA1); C := (C shl 30) + (C shr 2); Inc(i,5); until i = 40; { Note that the constant $70E44324 = $100000000 - $8F1BBCDC has been selected to slightly reduce the probability that the CPU flag C (Carry) is set. This trick is taken from the StreamSec(R) StrSecII(TM) implementation of SHA-1. It entails a marginal but measurable performance gain on some CPUs. } i := 40; repeat T := W[(i+13) and $F] xor W[(i+8) and $F]; T := T xor W[(i+2) and $F] xor W[i and $F]; T := (T shl 1) or (T shr 31); W[i and $F] := T; Inc(E,(A shl 5) + (A shr 27) + ((B and C) or (D and (B or C))) + T); Dec(E,$70E44324); B := (B shl 30) + (B shr 2); T := W[(i+14) and $F] xor W[(i+9) and $F]; T := T xor W[(i+3) and $F] xor W[(i+1) and $F]; T := (T shl 1) or (T shr 31); W[(i+1) and $F] := T; Inc(D,(E shl 5) + (E shr 27) + ((A and B) or (C and (A or B))) + T); Dec(D,$70E44324); A := (A shl 30) + (A shr 2); T := W[(i+15) and $F] xor W[(i+10) and $F]; T := T xor W[(i+4) and $F] xor W[(i+2) and $F]; T := (T shl 1) or (T shr 31); W[(i+2) and $F] := T; Inc(C,(D shl 5) + (D shr 27) + ((E and A) or (B and (E or A))) + T); Dec(C,$70E44324); E := (E shl 30) + (E shr 2); T := W[i and $F] xor W[(i+11) and $F]; T := T xor W[(i+5) and $F] xor W[(i+3) and $F]; T := (T shl 1) or (T shr 31); W[(i+3) and $F] := T; Inc(B,(C shl 5) + (C shr 27) + ((D and E) or (A and (D or E))) + T); Dec(B,$70E44324); D := (D shl 30) + (D shr 2); T := W[(i+1) and $F] xor W[(i+12) and $F]; T := T xor W[(i+6) and $F] xor W[(i+4) and $F]; T := (T shl 1) or (T shr 31); W[(i+4) and $F] := T; Inc(A,(B shl 5) + (B shr 27) + ((C and D) or (E and (C or D))) + T); Dec(A,$70E44324); C := (C shl 30) + (C shr 2); Inc(i,5); until i = 60; { Note that the constant $359D3E2A = $100000000 - $CA62C1D6 has been selected to slightly reduce the probability that the CPU flag C (Carry) is set. This trick is taken from the StreamSec(R) StrSecII(TM) implementation of SHA-1. It entails a marginal but measurable performance gain on some CPUs. } repeat T := W[(i+13) and $F] xor W[(i+8) and $F]; T := T xor W[(i+2) and $F] xor W[i and $F]; T := (T shl 1) or (T shr 31); W[i and $F] := T; Inc(E,(A shl 5) + (A shr 27) + (B xor C xor D) + T - $359D3E2A); B := (B shl 30) + (B shr 2); T := W[(i+14) and $F] xor W[(i+9) and $F]; T := T xor W[(i+3) and $F] xor W[(i+1) and $F]; T := (T shl 1) or (T shr 31); W[(i+1) and $F] := T; Inc(D,(E shl 5) + (E shr 27) + (A xor B xor C) + T - $359D3E2A); A := (A shl 30) + (A shr 2); T := W[(i+15) and $F] xor W[(i+10) and $F]; T := T xor W[(i+4) and $F] xor W[(i+2) and $F]; T := (T shl 1) or (T shr 31); W[(i+2) and $F] := T; Inc(C,(D shl 5) + (D shr 27) + (E xor A xor B) + T - $359D3E2A); E := (E shl 30) + (E shr 2); T := W[i and $F] xor W[(i+11) and $F]; T := T xor W[(i+5) and $F] xor W[(i+3) and $F]; T := (T shl 1) or (T shr 31); W[(i+3) and $F] := T; Inc(B,(C shl 5) + (C shr 27) + (D xor E xor A) + T - $359D3E2A); D := (D shl 30) + (D shr 2); T := W[(i+1) and $F] xor W[(i+12) and $F]; T := T xor W[(i+6) and $F] xor W[(i+4) and $F]; T := (T shl 1) or (T shr 31); W[(i+4) and $F] := T; Inc(A,(B shl 5) + (B shr 27) + (C xor D xor E) + T - $359D3E2A); C := (C shl 30) + (C shr 2); Inc(i,5); until i = 80; FCheckSum[0]:= FCheckSum[0] + A; FCheckSum[1]:= FCheckSum[1] + B; FCheckSum[2]:= FCheckSum[2] + C; FCheckSum[3]:= FCheckSum[3] + D; FCheckSum[4]:= FCheckSum[4] + E; end; function TIdHashSHA1.NativeGetHashBytes(AStream: TStream; ASize: TIdStreamSize): TIdBytes; var LSize: Integer; LLenHi: UInt32; LLenLo: UInt32; I: Integer; begin Result := nil; FCheckSum[0] := $67452301; FCheckSum[1] := $EFCDAB89; FCheckSum[2] := $98BADCFE; FCheckSum[3] := $10325476; FCheckSum[4] := $C3D2E1F0; LLenHi := 0; LLenLo := 0; // Code the entire file in complete 64-byte chunks. while ASize >= 64 do begin LSize := ReadTIdBytesFromStream(AStream, FCBuffer, 64); // TODO: handle stream read error Inc(LLenLo, LSize * 8); if LLenLo < UInt32(LSize * 8) then begin Inc(LLenHi); end; Coder; Dec(ASize, LSize); end; // Read the last set of bytes. LSize := ReadTIdBytesFromStream(AStream, FCBuffer, ASize); // TODO: handle stream read error Inc(LLenLo, LSize * 8); if LLenLo < UInt32(LSize * 8) then begin Inc(LLenHi); end; FCBuffer[LSize] := $80; if LSize >= 56 then begin for I := (LSize + 1) to 63 do begin FCBuffer[i] := 0; end; Coder; LSize := -1; end; for I := (LSize + 1) to 55 do begin FCBuffer[i] := 0; end; FCBuffer[56] := (LLenHi shr 24); FCBuffer[57] := (LLenHi shr 16) and $FF; FCBuffer[58] := (LLenHi shr 8) and $FF; FCBuffer[59] := (LLenHi and $FF); FCBuffer[60] := (LLenLo shr 24); FCBuffer[61] := (LLenLo shr 16) and $FF; FCBuffer[62] := (LLenLo shr 8) and $FF; FCBuffer[63] := (LLenLo and $FF); Coder; FCheckSum[0] := SwapLongWord(FCheckSum[0]); FCheckSum[1] := SwapLongWord(FCheckSum[1]); FCheckSum[2] := SwapLongWord(FCheckSum[2]); FCheckSum[3] := SwapLongWord(FCheckSum[3]); FCheckSum[4] := SwapLongWord(FCheckSum[4]); SetLength(Result, SizeOf(UInt32)*5); for I := 0 to 4 do begin CopyTIdUInt32(FCheckSum[I], Result, SizeOf(UInt32)*I); end; end; function TIdHashSHA1.HashToHex(const AHash: TIdBytes): String; begin Result := LongWordHashToHex(AHash, 5); end; {$ENDIF} {$IFNDEF DOTNET} { TIdHashSHA224 } function TIdHashSHA224.InitHash: TIdHashIntCtx; begin Result := GetSHA224HashInst; end; class function TIdHashSHA224.IsAvailable: Boolean; begin Result := IsHashingIntfAvail and IsSHA224HashIntfAvail; end; {$ENDIF} { TIdHashSHA256 } function TIdHashSHA256.InitHash: TIdHashIntCtx; begin Result := GetSHA256HashInst; end; class function TIdHashSHA256.IsAvailable : Boolean; begin Result := IsHashingIntfAvail and IsSHA256HashIntfAvail; end; { TIdHashSHA384 } function TIdHashSHA384.InitHash: TIdHashIntCtx; begin Result := GetSHA384HashInst; end; class function TIdHashSHA384.IsAvailable: Boolean; begin Result := IsHashingIntfAvail and IsSHA384HashIntfAvail; end; { TIdHashSHA512 } function TIdHashSHA512.InitHash: TIdHashIntCtx; begin Result := GetSHA512HashInst; end; class function TIdHashSHA512.IsAvailable: Boolean; begin Result := IsHashingIntfAvail and IsSHA512HashIntfAvail; end; end.