{
  $Project$
  $Workfile$
  $Revision$
  $DateUTC$
  $Id$

  This file is part of the Indy (Internet Direct) project, and is offered
  under the dual-licensing agreement described on the Indy website.
  (http://www.indyproject.org/)

  Copyright:
   (c) 1993-2005, Chad Z. Hower and the Indy Pit Crew. All rights reserved.
}
{
  $Log$
}
{
  Rev 1.6    2003-10-12 15:25:50  HHellström
  Comments added

  Rev 1.5    2003-10-12 03:08:24  HHellström
  New implementation; copyright changed. The source code formatting has been
  adjusted to fit the margins. The new implementation is faster on dotNet
  compared to the old one, but is slightly slower on Win32.

  Rev 1.4    2003-10-11 18:44:54  HHellström
  Range checking and overflow checking disabled in the Coder method only. The
  purpose of this setting is to force the arithmetic operations performed on
  LongWord variables to be modulo $100000000. This hack entails reasonable
  performance on both Win32 and dotNet.

  Rev 1.3    10/10/2003 2:20:56 PM  GGrieve
  turn range checking off

  Rev 1.2    2003-09-21 17:31:02  HHellström    Version: 1.2
  DotNET compatibility

  Rev 1.1    2/16/2003 03:19:18 PM  JPMugaas
  Should now compile on D7 better.

  Rev 1.0    11/13/2002 07:53:48 AM  JPMugaas
}

unit IdHashSHA1;

interface
{$i IdCompilerDefines.inc}

uses
  Classes,
  IdGlobal, IdHash;

type
  T5x4LongWordRecord = array[0..4] of UInt32;
  T512BitRecord = array [0..63] of Byte;

  TIdHashSHA1 = class(TIdHash)
  protected
    FCheckSum: T5x4LongWordRecord;
    FCBuffer: TIdBytes;
    procedure Coder;
    function GetHashBytes(AStream: TStream; ASize: TIdStreamSize): TIdBytes; override;
    function HashToHex(const AHash: TIdBytes): String; override;
  public
    constructor Create; override;
  end;

implementation
  {$IFNDEF DOTNET}
uses
  IdStreamVCL;
  {$ENDIF}
{ TIdHashSHA1 }

function SwapLongWord(const AValue: UInt32): UInt32;
begin
  Result := ((AValue and $FF) shl 24) or ((AValue and $FF00) shl 8) or ((AValue and $FF0000) shr 8) or ((AValue and $FF000000) shr 24);
end;

constructor TIdHashSHA1.Create;
begin
  inherited Create;
  SetLength(FCBuffer, 64);
end;

{$Q-,R-} // Operations performed modulo $100000000
procedure TIdHashSHA1.Coder;
var
  T, A, B, C, D, E: UInt32;
  { The size of the W variable has been reduced to make the Coder method
    consume less memory on dotNet. This change has been tested with the v1.1
    framework and entails a general increase of performance by >50%. }
  W: array [0..19] of UInt32;
  i: UInt32;
begin
  { The first 16 W values are identical to the input block with endian
    conversion. }
  for i := 0 to 15 do
  begin
    W[i]:= (FCBuffer[i*4] shl 24) or
            (FCBuffer[i*4+1] shl 16) or
            (FCBuffer[i*4+2] shl 8) or
            FCBuffer[i*4+3];
  end;
  { In normal x86 code all of the remaining 64 W values would be calculated
    here. Here only the four next values are calculated, to reduce the code
    size of the first of the four loops below. }
  for i := 16 to 19 do
  begin
    T := W[i-3] xor W[i-8] xor W[i-14] xor W[i-16];
    W[i] := (T shl 1) or (T shr 31);
  end;

  A := FCheckSum[0];
  B := FCheckSum[1];
  C := FCheckSum[2];
  D := FCheckSum[3];
  E := FCheckSum[4];

  { The following loop could be expanded, but has been kept together to reduce
    the code size. A small code size entails better performance due to CPU
    caching.

    Note that the code size could be reduced further by using the SHA-1
    reference code:

      for i := 0 to 19 do begin
        T := E + (A shl 5) + (A shr 27) + (D xor (B and (C xor D))) + W[i];
        Inc(T,$5A827999);
        E := D;
        D := C;
        C := (B shl 30) + (B shr 2);
        B := A;
        A := T;
      end;

    The reference code is usually (at least partly) expanded, mostly because
    the assignments that circle the state variables A, B, C, D and E are costly,
    in particular on dotNET. (In x86 code further optimization can be achieved
    by eliminating the loop variable, which occupies a CPU register that is
    better used by one of the state variables, plus by expanding the W array
    at the beginning.) }

  i := 0;
  repeat
    Inc(E,(A shl 5) + (A shr 27) + (D xor (B and (C xor D))) + W[i+0]);
    Inc(E,$5A827999);
    B := (B shl 30) + (B shr 2);
    Inc(D,(E shl 5) + (E shr 27) + (C xor (A and (B xor C))) + W[i+1]);
    Inc(D,$5A827999);
    A := (A shl 30) + (A shr 2);
    Inc(C,(D shl 5) + (D shr 27) + (B xor (E and (A xor B))) + W[i+2]);
    Inc(C,$5A827999);
    E := (E shl 30) + (E shr 2);
    Inc(B,(C shl 5) + (C shr 27) + (A xor (D and (E xor A))) + W[i+3]);
    Inc(B,$5A827999);
    D := (D shl 30) + (D shr 2);
    Inc(A,(B shl 5) + (B shr 27) + (E xor (C and (D xor E))) + W[i+4]);
    Inc(A,$5A827999);
    C := (C shl 30) + (C shr 2);
    Inc(i,5);
  until i = 20;

  { The following three loops will only use the first 16 elements of the W
    array in a circular, recursive pattern. The following assignments are a
    trade-off to avoid having to split up the first loop. }
  W[0] := W[16];
  W[1] := W[17];
  W[2] := W[18];
  W[3] := W[19];

  { In the following three loops the recursive W array expansion is performed
    "just in time" following a circular pattern. Using circular indicies (e.g.
    (i+2) and $F) is not free, but the cost of declaring a large W array would
    be higher on dotNET. Before attempting to optimize this code, please note
    that the following language features are also costly:

      * Assignments and moves/copies, in particular on dotNET
      * Constant lookup tables, in particular on dotNET
      * Sub functions, in particular on x86
      * if..then and case..of. }

  i := 20;
  repeat
    T := W[(i+13) and $F] xor W[(i+8) and $F];
    T := T xor W[(i+2) and $F] xor W[i and $F];
    T := (T shl 1) or (T shr 31);
    W[i and $F] := T;
    Inc(E,(A shl 5) + (A shr 27) + (B xor C xor D) + T + $6ED9EBA1);
    B := (B shl 30) + (B shr 2);
    T := W[(i+14) and $F] xor W[(i+9) and $F];
    T := T xor W[(i+3) and $F] xor W[(i+1) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+1) and $F] := T;
    Inc(D,(E shl 5) + (E shr 27) + (A xor B xor C) + T + $6ED9EBA1);
    A := (A shl 30) + (A shr 2);
    T := W[(i+15) and $F] xor W[(i+10) and $F];
    T := T xor W[(i+4) and $F] xor W[(i+2) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+2) and $F] := T;
    Inc(C,(D shl 5) + (D shr 27) + (E xor A xor B) + T + $6ED9EBA1);
    E := (E shl 30) + (E shr 2);
    T := W[i and $F] xor W[(i+11) and $F];
    T := T xor W[(i+5) and $F] xor W[(i+3) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+3) and $F] := T;
    Inc(B,(C shl 5) + (C shr 27) + (D xor E xor A) + T + $6ED9EBA1);
    D := (D shl 30) + (D shr 2);
    T := W[(i+1) and $F] xor W[(i+12) and $F];
    T := T xor W[(i+6) and $F] xor W[(i+4) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+4) and $F] := T;
    Inc(A,(B shl 5) + (B shr 27) + (C xor D xor E) + T + $6ED9EBA1);
    C := (C shl 30) + (C shr 2);
    Inc(i,5);
  until i = 40;

  { Note that the constant $70E44324 = $100000000 - $8F1BBCDC has been selected
    to slightly reduce the probability that the CPU flag C (Carry) is set. This
    trick is taken from the StreamSec(R) StrSecII(TM) implementation of SHA-1.
    It entails a marginal but measurable performance gain on some CPUs. }

  i := 40;
  repeat
    T := W[(i+13) and $F] xor W[(i+8) and $F];
    T := T xor W[(i+2) and $F] xor W[i and $F];
    T := (T shl 1) or (T shr 31);
    W[i and $F] := T;
    Inc(E,(A shl 5) + (A shr 27) + ((B and C) or (D and (B or C))) + T);
    Dec(E,$70E44324);
    B := (B shl 30) + (B shr 2);
    T := W[(i+14) and $F] xor W[(i+9) and $F];
    T := T xor W[(i+3) and $F] xor W[(i+1) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+1) and $F] := T;
    Inc(D,(E shl 5) + (E shr 27) + ((A and B) or (C and (A or B))) + T);
    Dec(D,$70E44324);
    A := (A shl 30) + (A shr 2);
    T := W[(i+15) and $F] xor W[(i+10) and $F];
    T := T xor W[(i+4) and $F] xor W[(i+2) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+2) and $F] := T;
    Inc(C,(D shl 5) + (D shr 27) + ((E and A) or (B and (E or A))) + T);
    Dec(C,$70E44324);
    E := (E shl 30) + (E shr 2);
    T := W[i and $F] xor W[(i+11) and $F];
    T := T xor W[(i+5) and $F] xor W[(i+3) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+3) and $F] := T;
    Inc(B,(C shl 5) + (C shr 27) + ((D and E) or (A and (D or E))) + T);
    Dec(B,$70E44324);
    D := (D shl 30) + (D shr 2);
    T := W[(i+1) and $F] xor W[(i+12) and $F];
    T := T xor W[(i+6) and $F] xor W[(i+4) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+4) and $F] := T;
    Inc(A,(B shl 5) + (B shr 27) + ((C and D) or (E and (C or D))) + T);
    Dec(A,$70E44324);
    C := (C shl 30) + (C shr 2);
    Inc(i,5);
  until i = 60;

  { Note that the constant $359D3E2A = $100000000 - $CA62C1D6 has been selected
    to slightly reduce the probability that the CPU flag C (Carry) is set. This
    trick is taken from the StreamSec(R) StrSecII(TM) implementation of SHA-1.
    It entails a marginal but measurable performance gain on some CPUs. }

  repeat
    T := W[(i+13) and $F] xor W[(i+8) and $F];
    T := T xor W[(i+2) and $F] xor W[i and $F];
    T := (T shl 1) or (T shr 31);
    W[i and $F] := T;
    Inc(E,(A shl 5) + (A shr 27) + (B xor C xor D) + T - $359D3E2A);
    B := (B shl 30) + (B shr 2);
    T := W[(i+14) and $F] xor W[(i+9) and $F];
    T := T xor W[(i+3) and $F] xor W[(i+1) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+1) and $F] := T;
    Inc(D,(E shl 5) + (E shr 27) + (A xor B xor C) + T - $359D3E2A);
    A := (A shl 30) + (A shr 2);
    T := W[(i+15) and $F] xor W[(i+10) and $F];
    T := T xor W[(i+4) and $F] xor W[(i+2) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+2) and $F] := T;
    Inc(C,(D shl 5) + (D shr 27) + (E xor A xor B) + T - $359D3E2A);
    E := (E shl 30) + (E shr 2);
    T := W[i and $F] xor W[(i+11) and $F];
    T := T xor W[(i+5) and $F] xor W[(i+3) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+3) and $F] := T;
    Inc(B,(C shl 5) + (C shr 27) + (D xor E xor A) + T - $359D3E2A);
    D := (D shl 30) + (D shr 2);
    T := W[(i+1) and $F] xor W[(i+12) and $F];
    T := T xor W[(i+6) and $F] xor W[(i+4) and $F];
    T := (T shl 1) or (T shr 31);
    W[(i+4) and $F] := T;
    Inc(A,(B shl 5) + (B shr 27) + (C xor D xor E) + T - $359D3E2A);
    C := (C shl 30) + (C shr 2);
    Inc(i,5);
  until i = 80;

  FCheckSum[0]:= FCheckSum[0] + A;
  FCheckSum[1]:= FCheckSum[1] + B;
  FCheckSum[2]:= FCheckSum[2] + C;
  FCheckSum[3]:= FCheckSum[3] + D;
  FCheckSum[4]:= FCheckSum[4] + E;
end;

function TIdHashSHA1.GetHashBytes(AStream: TStream; ASize: TIdStreamSize): TIdBytes;
var
  LSize: Integer;
  LLenHi: UInt32;
  LLenLo: UInt32;
  I: Integer;
begin
  Result := nil;

  FCheckSum[0] := $67452301;
  FCheckSum[1] := $EFCDAB89;
  FCheckSum[2] := $98BADCFE;
  FCheckSum[3] := $10325476;
  FCheckSum[4] := $C3D2E1F0;

  LLenHi := 0;
  LLenLo := 0;

  // Code the entire file in complete 64-byte chunks.
  while ASize >= 64 do begin
    LSize := ReadTIdBytesFromStream(AStream, FCBuffer, 64);
    // TODO: handle stream read error
    Inc(LLenLo, LSize * 8);
    if LLenLo < UInt32(LSize * 8) then begin
      Inc(LLenHi);
    end;
    Coder;
    Dec(ASize, LSize);
  end;

  // Read the last set of bytes.
  LSize := ReadTIdBytesFromStream(AStream, FCBuffer, ASize);
  // TODO: handle stream read error
  Inc(LLenLo, LSize * 8);
  if LLenLo < UInt32(LSize * 8) then begin
    Inc(LLenHi);
  end;

  FCBuffer[LSize] := $80;
  if LSize >= 56 then begin
    for I := (LSize + 1) to 63 do begin
      FCBuffer[i] := 0;
    end;
    Coder;
    LSize := -1;
  end;

  for I := (LSize + 1) to 55 do begin
    FCBuffer[i] := 0;
  end;
  FCBuffer[56] := (LLenHi shr 24);
  FCBuffer[57] := (LLenHi shr 16) and $FF;
  FCBuffer[58] := (LLenHi shr 8) and $FF;
  FCBuffer[59] := (LLenHi and $FF);
  FCBuffer[60] := (LLenLo shr 24);
  FCBuffer[61] := (LLenLo shr 16) and $FF;
  FCBuffer[62] := (LLenLo shr 8) and $FF;
  FCBuffer[63] := (LLenLo and $FF);
  Coder;

  FCheckSum[0] := SwapLongWord(FCheckSum[0]);
  FCheckSum[1] := SwapLongWord(FCheckSum[1]);
  FCheckSum[2] := SwapLongWord(FCheckSum[2]);
  FCheckSum[3] := SwapLongWord(FCheckSum[3]);
  FCheckSum[4] := SwapLongWord(FCheckSum[4]);

  SetLength(Result, SizeOf(UInt32)*5);
  for I := 0 to 4 do begin
    CopyTIdUInt32(FCheckSum[I], Result, SizeOf(UInt32)*I);
  end;
end;

function TIdHashSHA1.HashToHex(const AHash: TIdBytes): String;
begin
  Result := LongWordHashToHex(AHash, 5);
end;

end.