{
  $Project$
  $Workfile$
  $Revision$
  $DateUTC$
  $Id$

  This file is part of the Indy (Internet Direct) project, and is offered
  under the dual-licensing agreement described on the Indy website.
  (http://www.indyproject.org/)

  Copyright:
   (c) 1993-2005, Chad Z. Hower and the Indy Pit Crew. All rights reserved.
}
{
  $Log$
}
{
  Rev 1.6    2003-10-12 15:25:50  HHellström
  Comments added

  Rev 1.5    2003-10-12 03:08:24  HHellström
  New implementation; copyright changed. The source code formatting has been
  adjusted to fit the margins. The new implementation is faster on dotNet
  compared to the old one, but is slightly slower on Win32.

  Rev 1.4    2003-10-11 18:44:54  HHellström
  Range checking and overflow checking disabled in the Coder method only. The
  purpose of this setting is to force the arithmetic operations performed on
  UInt32 variables to be modulo $100000000. This hack entails reasonable
  performance on both Win32 and dotNet.

  Rev 1.3    10/10/2003 2:20:56 PM  GGrieve
  turn range checking off

  Rev 1.2    2003-09-21 17:31:02  HHellström    Version: 1.2
  DotNET compatibility

  Rev 1.1    2/16/2003 03:19:18 PM  JPMugaas
  Should now compile on D7 better.

  Rev 1.0    11/13/2002 07:53:48 AM  JPMugaas
}

unit IdHashSHA;

interface
{$i IdCompilerDefines.inc}

uses
  Classes,
  IdFIPS,
  IdGlobal, IdHash;

{
Microsoft.NET notes!!!!

In Microsoft.NET, there are some limitations that you need to be aware of.

1) In Microsoft.NET 1.1, 2.0, and 3.0, only the CryptoService SHA1 class is
FIPS-complient.  Unfortunately, SHA1 will not be permitted after 2010.
2) In Microsoft.NET 3.5,There are more classes ending in CryptoServiceProvider" or
"Cng" that are complient.
3) SHA224 is not exposed.
}
type
  T5x4LongWordRecord = array[0..4] of UInt32;
  T512BitRecord = array [0..63] of Byte;
  {$IFNDEF DOTNET}
  TIdHashSHA1 = class(TIdHashNativeAndIntF)
  {$ELSE}
  TIdHashSHA1 = class(TIdHashIntF)
  {$ENDIF}
  protected

    {$IFNDEF DOTNET}
    FCheckSum: T5x4LongWordRecord;
    FCBuffer: TIdBytes;
    procedure Coder;
    function NativeGetHashBytes(AStream: TStream; ASize: TIdStreamSize): TIdBytes; override;
    function HashToHex(const AHash: TIdBytes): String; override;

    {$ENDIF}
    function InitHash : TIdHashIntCtx; override;
  public
    {$IFDEF DOTNET}
    class function IsAvailable : Boolean; override;
    {$ELSE}
    constructor Create; override;
    {$ENDIF}
    class function IsIntfAvailable: Boolean; override;
  end;
  {$IFNDEF DOTNET}
  TIdHashSHA224 = class(TIdHashIntF)
  protected
    function InitHash : TIdHashIntCtx; override;
  public
    class function IsAvailable : Boolean; override;
  end;
  {$ENDIF}
  TIdHashSHA256 = class(TIdHashIntF)
  protected
    function InitHash : TIdHashIntCtx; override;
  public
    class function IsAvailable : Boolean; override;
  end;
  TIdHashSHA384 = class(TIdHashIntF)
  protected
    function InitHash : TIdHashIntCtx; override;
  public
    class function IsAvailable : Boolean; override;
  end;
  TIdHashSHA512 = class(TIdHashIntF)
  protected
    function InitHash : TIdHashIntCtx; override;
  public
    class function IsAvailable : Boolean; override;
  end;

implementation
uses
  {$IFDEF DOTNET}
  IdStreamNET;
  {$ELSE}
  IdStreamVCL;
  {$ENDIF}

{ TIdHashSHA1 }

{$IFDEF DOTNET}

function TIdHashSHA1.GetHashInst : TIdHashInst;
begin
//You can not use SHA256Managed for FIPS complience.
  Result := System.Security.Cryptography.SHA1CryptoServiceProvider.Create;
end;

class function TIdHashSHA1.IsIntfAvailable : Boolean;
begin
  Result := True;
end;

class function TIdHashSHA1.IsAvailable : Boolean; 
begin
  Result := True;
end;

{$ELSE}

function SwapLongWord(const AValue: UInt32): UInt32;
begin
  Result := ((AValue and $FF) shl 24) or ((AValue and $FF00) shl 8) or ((AValue and $FF0000) shr 8) or ((AValue and $FF000000) shr 24);
end;

constructor TIdHashSHA1.Create;
begin
  inherited Create;
  SetLength(FCBuffer, 64);
end;

function TIdHashSHA1.InitHash: TIdHashIntCtx;
begin
  Result := GetSHA1HashInst;
end;

class function TIdHashSHA1.IsIntfAvailable: Boolean;
begin
  Result := IsHashingIntfAvail and  IsSHA1HashIntfAvail;
end;

{$Q-,R-} // Operations performed modulo $100000000
procedure TIdHashSHA1.Coder;
var
  T, A, B, C, D, E: UInt32;
  { The size of the W variable has been reduced to make the Coder method
    consume less memory on dotNet. This change has been tested with the v1.1
    framework and entails a general increase of performance by >50%. }
  W: array [0..19] of UInt32;
  i: UInt32;
begin
  { The first 16 W values are identical to the input block with endian
    conversion. }
  for i := 0 to 15 do
  begin
    W[i]:= (FCBuffer[i*4] shl 24) or
            (FCBuffer[i*4+1] shl 16) or
            (FCBuffer[i*4+2] shl 8) or
            FCBuffer[i*4+3];
  end;
  { In normal x86 code all of the remaining 64 W values would be calculated
    here. Here only the four next values are calculated, to reduce the code
    size of the first of the four loops below. }
  for i := 16 to 19 do
  begin
    T := W[i-3] xor W[i-8] xor W[i-14] xor W[i-16];
    W[i] := (T shl 1) or (T shr 31);
  end;

  A := FCheckSum[0];
  B := FCheckSum[1];
  C := FCheckSum[2];
  D := FCheckSum[3];
  E := FCheckSum[4];

  { The following loop could be expanded, but has been kept together to reduce
    the code size. A small code size entails better performance due to CPU
    caching.

    Note that the code size could be reduced further by using the SHA-1
    reference code:

      for i := 0 to 19 do begin
        T := E + (A shl 5) + (A shr 27) + (D xor (B and (C xor D))) + W[i];
        Inc(T,$5A827999);
        E := D;
        D := C;
        C := (B shl 30) + (B shr 2);
        B := A;
        A := T;
      end;

    The reference code is usually (at least partly) expanded, mostly because
    the assignments that circle the state variables A, B, C, D and E are costly,
    in particular on dotNET. (In x86 code further optimization can be achieved
    by eliminating the loop variable, which occupies a CPU register that is
    better used by one of the state variables, plus by expanding the W array
    at the beginning.) }

  i := 0;
  repeat
    Inc(E,(A shl 5) + (A shr 27) + (D xor (B and (C xor D))) + W[i+0]);
    Inc(E,$5A827999);
    B := (B shl 30) + (B shr 2);
    Inc(D,(E shl 5) + (E shr 27) + (C xor (A and (B xor C))) + W[i+1]);
    Inc(D,$5A827999);
    A := (A shl 30) + (A shr 2);
    Inc(C,(D shl 5) + (D shr 27) + (B xor (E and (A xor B))) + W[i+2]);
    Inc(C,$5A827999);
    E := (E shl 30) + (E shr 2);
    Inc(B,(C shl 5) + (C shr 27) + (A xor (D and (E xor A))) + W[i+3]);
    Inc(B,$5A827999);
    D := (D shl 30) + (D shr 2);
    Inc(A,(B shl 5) + (B shr 27) + (E xor (C and (D xor E))) + W[i+4]);
    Inc(A,$5A827999);
    C := (C shl 30) + (C shr 2);
    Inc(i,5);
  until i = 20;

  { The following three loops will only use the first 16 elements of the W
    array in a circular, recursive pattern. The following assignments are a
    trade-off to avoid having to split up the first loop. }
  W[0] := W[16];
  W[1] := W[17];
  W[2] := W[18];
  W[3] := W[19];

  { In the following three loops the recursive W array expansion is performed
    "just in time" following a circular pattern. Using circular indicies (e.g.
    (i+2) and $F) is not free, but the cost of declaring a large W array would
    be higher on dotNET. Before attempting to optimize this code, please note
    that the following language features are also costly:

      * Assignments and moves/copies, in particular on dotNET
      * Constant lookup tables, in particular on dotNET
      * Sub functions, in particular on x86
      * if..then and case..of. }

  i := 20;
  repeat
    T := W[(i+13) and $F] xor W[(i+8) and $F];
    T := T xor W[(i+2) and $F] xor W[i and $F];
    T := (T shl 1) or (T shr 31);
    W[i and $F] := T;
    Inc(E,(A shl 5) + (A shr 27) + (B xor C xor D) + T + $6ED9EBA1);
    B := (B shl 30) + (B shr 2);
    T := W[(i+14) and $F] xor W[(i+9) and $F];
    T := T xor W[(i+3) and $F] xor W[(i+1) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+1) and $F] := T;
    Inc(D,(E shl 5) + (E shr 27) + (A xor B xor C) + T + $6ED9EBA1);
    A := (A shl 30) + (A shr 2);
    T := W[(i+15) and $F] xor W[(i+10) and $F];
    T := T xor W[(i+4) and $F] xor W[(i+2) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+2) and $F] := T;
    Inc(C,(D shl 5) + (D shr 27) + (E xor A xor B) + T + $6ED9EBA1);
    E := (E shl 30) + (E shr 2);
    T := W[i and $F] xor W[(i+11) and $F];
    T := T xor W[(i+5) and $F] xor W[(i+3) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+3) and $F] := T;
    Inc(B,(C shl 5) + (C shr 27) + (D xor E xor A) + T + $6ED9EBA1);
    D := (D shl 30) + (D shr 2);
    T := W[(i+1) and $F] xor W[(i+12) and $F];
    T := T xor W[(i+6) and $F] xor W[(i+4) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+4) and $F] := T;
    Inc(A,(B shl 5) + (B shr 27) + (C xor D xor E) + T + $6ED9EBA1);
    C := (C shl 30) + (C shr 2);
    Inc(i,5);
  until i = 40;

  { Note that the constant $70E44324 = $100000000 - $8F1BBCDC has been selected
    to slightly reduce the probability that the CPU flag C (Carry) is set. This
    trick is taken from the StreamSec(R) StrSecII(TM) implementation of SHA-1.
    It entails a marginal but measurable performance gain on some CPUs. }

  i := 40;
  repeat
    T := W[(i+13) and $F] xor W[(i+8) and $F];
    T := T xor W[(i+2) and $F] xor W[i and $F];
    T := (T shl 1) or (T shr 31);
    W[i and $F] := T;
    Inc(E,(A shl 5) + (A shr 27) + ((B and C) or (D and (B or C))) + T);
    Dec(E,$70E44324);
    B := (B shl 30) + (B shr 2);
    T := W[(i+14) and $F] xor W[(i+9) and $F];
    T := T xor W[(i+3) and $F] xor W[(i+1) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+1) and $F] := T;
    Inc(D,(E shl 5) + (E shr 27) + ((A and B) or (C and (A or B))) + T);
    Dec(D,$70E44324);
    A := (A shl 30) + (A shr 2);
    T := W[(i+15) and $F] xor W[(i+10) and $F];
    T := T xor W[(i+4) and $F] xor W[(i+2) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+2) and $F] := T;
    Inc(C,(D shl 5) + (D shr 27) + ((E and A) or (B and (E or A))) + T);
    Dec(C,$70E44324);
    E := (E shl 30) + (E shr 2);
    T := W[i and $F] xor W[(i+11) and $F];
    T := T xor W[(i+5) and $F] xor W[(i+3) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+3) and $F] := T;
    Inc(B,(C shl 5) + (C shr 27) + ((D and E) or (A and (D or E))) + T);
    Dec(B,$70E44324);
    D := (D shl 30) + (D shr 2);
    T := W[(i+1) and $F] xor W[(i+12) and $F];
    T := T xor W[(i+6) and $F] xor W[(i+4) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+4) and $F] := T;
    Inc(A,(B shl 5) + (B shr 27) + ((C and D) or (E and (C or D))) + T);
    Dec(A,$70E44324);
    C := (C shl 30) + (C shr 2);
    Inc(i,5);
  until i = 60;

  { Note that the constant $359D3E2A = $100000000 - $CA62C1D6 has been selected
    to slightly reduce the probability that the CPU flag C (Carry) is set. This
    trick is taken from the StreamSec(R) StrSecII(TM) implementation of SHA-1.
    It entails a marginal but measurable performance gain on some CPUs. }

  repeat
    T := W[(i+13) and $F] xor W[(i+8) and $F];
    T := T xor W[(i+2) and $F] xor W[i and $F];
    T := (T shl 1) or (T shr 31);
    W[i and $F] := T;
    Inc(E,(A shl 5) + (A shr 27) + (B xor C xor D) + T - $359D3E2A);
    B := (B shl 30) + (B shr 2);
    T := W[(i+14) and $F] xor W[(i+9) and $F];
    T := T xor W[(i+3) and $F] xor W[(i+1) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+1) and $F] := T;
    Inc(D,(E shl 5) + (E shr 27) + (A xor B xor C) + T - $359D3E2A);
    A := (A shl 30) + (A shr 2);
    T := W[(i+15) and $F] xor W[(i+10) and $F];
    T := T xor W[(i+4) and $F] xor W[(i+2) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+2) and $F] := T;
    Inc(C,(D shl 5) + (D shr 27) + (E xor A xor B) + T - $359D3E2A);
    E := (E shl 30) + (E shr 2);
    T := W[i and $F] xor W[(i+11) and $F];
    T := T xor W[(i+5) and $F] xor W[(i+3) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+3) and $F] := T;
    Inc(B,(C shl 5) + (C shr 27) + (D xor E xor A) + T - $359D3E2A);
    D := (D shl 30) + (D shr 2);
    T := W[(i+1) and $F] xor W[(i+12) and $F];
    T := T xor W[(i+6) and $F] xor W[(i+4) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+4) and $F] := T;
    Inc(A,(B shl 5) + (B shr 27) + (C xor D xor E) + T - $359D3E2A);
    C := (C shl 30) + (C shr 2);
    Inc(i,5);
  until i = 80;

  FCheckSum[0]:= FCheckSum[0] + A;
  FCheckSum[1]:= FCheckSum[1] + B;
  FCheckSum[2]:= FCheckSum[2] + C;
  FCheckSum[3]:= FCheckSum[3] + D;
  FCheckSum[4]:= FCheckSum[4] + E;
end;

function TIdHashSHA1.NativeGetHashBytes(AStream: TStream; ASize: TIdStreamSize): TIdBytes;
var
  LSize: Integer;
  LLenHi: UInt32;
  LLenLo: UInt32;
  I: Integer;
begin
  Result := nil;

  FCheckSum[0] := $67452301;
  FCheckSum[1] := $EFCDAB89;
  FCheckSum[2] := $98BADCFE;
  FCheckSum[3] := $10325476;
  FCheckSum[4] := $C3D2E1F0;

  LLenHi := 0;
  LLenLo := 0;

  // Code the entire file in complete 64-byte chunks.
  while ASize >= 64 do begin
    LSize := ReadTIdBytesFromStream(AStream, FCBuffer, 64);
    // TODO: handle stream read error
    Inc(LLenLo, LSize * 8);
    if LLenLo < UInt32(LSize * 8) then begin
      Inc(LLenHi);
    end;
    Coder;
    Dec(ASize, LSize);
  end;

  // Read the last set of bytes.
  LSize := ReadTIdBytesFromStream(AStream, FCBuffer, ASize);
  // TODO: handle stream read error
  Inc(LLenLo, LSize * 8);
  if LLenLo < UInt32(LSize * 8) then begin
    Inc(LLenHi);
  end;

  FCBuffer[LSize] := $80;
  if LSize >= 56 then begin
    for I := (LSize + 1) to 63 do begin
      FCBuffer[i] := 0;
    end;
    Coder;
    LSize := -1;
  end;

  for I := (LSize + 1) to 55 do begin
    FCBuffer[i] := 0;
  end;
  FCBuffer[56] := (LLenHi shr 24);
  FCBuffer[57] := (LLenHi shr 16) and $FF;
  FCBuffer[58] := (LLenHi shr 8) and $FF;
  FCBuffer[59] := (LLenHi and $FF);
  FCBuffer[60] := (LLenLo shr 24);
  FCBuffer[61] := (LLenLo shr 16) and $FF;
  FCBuffer[62] := (LLenLo shr 8) and $FF;
  FCBuffer[63] := (LLenLo and $FF);
  Coder;

  FCheckSum[0] := SwapLongWord(FCheckSum[0]);
  FCheckSum[1] := SwapLongWord(FCheckSum[1]);
  FCheckSum[2] := SwapLongWord(FCheckSum[2]);
  FCheckSum[3] := SwapLongWord(FCheckSum[3]);
  FCheckSum[4] := SwapLongWord(FCheckSum[4]);

  SetLength(Result, SizeOf(UInt32)*5);
  for I := 0 to 4 do begin
    CopyTIdUInt32(FCheckSum[I], Result, SizeOf(UInt32)*I);
  end;
end;

function TIdHashSHA1.HashToHex(const AHash: TIdBytes): String;
begin
  Result := LongWordHashToHex(AHash, 5);
end;
{$ENDIF}

{$IFNDEF DOTNET}
{ TIdHashSHA224 }

function TIdHashSHA224.InitHash: TIdHashIntCtx;
begin
  Result := GetSHA224HashInst;
end;

class function TIdHashSHA224.IsAvailable: Boolean;
begin
  Result := IsHashingIntfAvail and  IsSHA224HashIntfAvail;
end;
{$ENDIF}

{ TIdHashSHA256 }

function TIdHashSHA256.InitHash: TIdHashIntCtx;
begin
    Result := GetSHA256HashInst;
end;

class function TIdHashSHA256.IsAvailable : Boolean;
begin
  Result := IsHashingIntfAvail and  IsSHA256HashIntfAvail;
end;

{ TIdHashSHA384 }

function TIdHashSHA384.InitHash: TIdHashIntCtx;
begin
  Result := GetSHA384HashInst;
end;

class function TIdHashSHA384.IsAvailable: Boolean;
begin
  Result := IsHashingIntfAvail and  IsSHA384HashIntfAvail;
end;

{ TIdHashSHA512 }

function TIdHashSHA512.InitHash: TIdHashIntCtx;
begin
  Result := GetSHA512HashInst;
end;

class function TIdHashSHA512.IsAvailable: Boolean;
begin
  Result := IsHashingIntfAvail and  IsSHA512HashIntfAvail;
end;

end.