(***************************************************************
 *
 * Unit Name: SteakRef
 * Purpose  : Steak2 cipher reference implementation.
 *            This code is *not* optimized for speed. Use it as reference only.
 * Author   : Henrick Hellström, StreamSec
 * History  : First version, 2001-may-17
 *            Steak2 differs from Steak:
 *            a) in the algorithm used to set up of the s-boxes,
 *            b) in the domain of the the A-vector values,
 *            c) in the Steak derivate used for key expansion,
 *            d) in the order of the columns in the 32 times 32 representation
 *               of the linear MT-matrix, and
 *            e) because of the re-ordering of the MT-matrix columns, the ROL 4
 *               operation is replaced by a ROL 3 operation.
 *
 ****************************************************************)


(* Compiler directives: Turn off run-time range- and overflow checking.       *)
{$Q-,R-}
unit SteakRef;

  (****************************************************************************)
  (*  Declarations:                                                           *)
interface

  (*  Note:                                                                   *
   *  The LongWord type is an unsigned 32 bit integer. Signed 32 bit          *
   *  integers (LongInt in Delphi) would do as well, but using them would     *
   *  result in compiler warnings.                                            *)
                                                                                
  (****************************************************************************)
  (*  Declaration of constants:                                               *)
const
  (*  This value might be increased. VectorSize = 8 should be considerd as a  *
   *  minimum.                                                                *)
  VectorSize = 8;

  PermKeySize = 837;

  KeySize = PermKeySize + VectorSize * 6;

  (*  The 32 times 32 MT matrix is obtained from the 4 times 4 MDS matrix of  *
   *  TwoFish by the following procedure:                                     *
   *  1) Expand the 4 times 4 MDS matrix to its 4 times 256 table             *
   *     representation.                                                      *
   *  2) Sort rows 0,2,3 by arithmetic comparison in N.                       *
   *  3) Sort row 1 by arithmetic comparison in Z_{2^16}.                     *
   *  4) Extract the columns [i,2^j]. These columns will generate the entire  *
   *     table.                                                               *
   *  5) Apply a rotate right by one on each column.                          *
   *  6) Rotate the columns one position to the right (which is equivalent to *
   *     rotating the input one position left before linear transformation).  *)
  MT: array [0..31] of LongWord =
  ($810145BD, $02023FFB, $84047F42,
   $88084AB1, $101021E3, $A0204372, $C04032D1,
   $C4F88080, $3DC58101, $7B3F8202, $42FF0404,
   $31CA8808, $63219010, $72C32020, $51B2C040,
   $80C480F8, $813D8145, $827B023F, $0442847F,
   $8831884A, $90631021, $2072A043, $C051C032,
   $80C8A480, $0125C901, $024B2682, $0422CD04,
   $08452E88, $103EDD10, $207D0EA0, $404E9D40,
   $8080F8C4);

  (*  End of constant declaration.                                            *)
  (****************************************************************************)

                                                                                
  (****************************************************************************)
  (*  Declaration of types:                                                   *)
type
  (*  TSBox and TSBoxes are the types of the s-box varibales.                 *)
  TSBox = array [0..255] of Byte;
  TSBoxes = array [0..3] of TSBox;

  (*  The case statement inside a record declaration is a Pascal feature. It  *
   *  is called "variant record part declaration". In Delphi, the             *
   *  respective variant record parts will occupy the same physical memory,   *
   *  and that's usually the point in using them. In this case, we have that  *
   *  L = A + (B shl 8) + (C shl 16) + (D shl 24).                            *)
  TLongRec = record
    case Integer of
      0: (A, B, C, D: Byte);
      1: (L: LongWord);
  end;

  TVectorIndex = 0..VectorSize-1;
  TVector = array [TVectorIndex] of LongWord;
  TVectorRec = record
    case Integer of
      0: (L: TVector);
      1: (B: array [0..VectorSize*4-1] of Byte);
  end;

  (*  The TSteakRec type is the type of the internal state and key data       *
   *  variable.                                                               *)
  TSteakRec = record
    S: TSBoxes;
    A: TVector;
    V: TVectorRec;
  end;

  PByte = ^Byte;

  TByteDiv = 2..256;
  TShift = 0..31;

  (*  End of type declaration.                                                *)
  (****************************************************************************)

                                                                                
  (****************************************************************************)
  (*  Declaration of functions and procedures:                                *)

  (*  Note:                                                                   *
   *  Untyped parameters are a Delphi feature. They are usually treated as    *
   *  untyped pointers. If they are declared as const, the memory             *
   *  contents cannot (or should not) be altered by the procedure.            *)

  (*  Utility routines:                                                       *)
  function DivModByByte(Data: PByte; D: TByteDiv): Byte;
  function ROR(Value: LongWord; Shift: TShift): LongWord;
  function ROL(Value: LongWord; Shift: TShift): LongWord;

  (*  Key set up sub routines:                                                *)
  procedure ExpandKey1(const Buf; Count: Integer; var Data: PByte);
  procedure SetUpSBoxes(Data: PByte; var SBoxes: TSBoxes);
  procedure ExtractPermutation(Data: PByte; var SBox: TSBox);
  procedure ExpandKey2(Data: PByte);
  procedure SetUpA(Data: PByte; var A: TVector);
  procedure SetUpIV(Data: PByte; var V: TVectorRec);

  (*  Encryption / decryption sub functions:                                  *)
  function Substitute(const X: LongWord; const SBoxes: TSBoxes): LongWord;
  function Transform(X: LongWord): LongWord;
  function H(X: LongWord; Index: Integer; var KeyData: TSteakRec): LongWord;
  function G(const C: Byte; Index: Integer; var KeyData: TSteakRec): LongWord;
  function F(const C: Byte; var KeyData: TSteakRec): Byte;

  (*  User level functions:                                                   *)
  procedure Steak2_Encrypt_AllInOne(var Buf; Count: Integer; var KeyData: TSteakRec);
  procedure Steak2_Encrypt( var Buf; Count: Integer;
                     var KeyData: TSteakRec
                    );
  procedure Steak2_Decrypt( var Buf; Count: Integer;
                     var KeyData: TSteakRec
                    );
  procedure Steak2_SetUp(const Buf; Count: Integer; var SteakRec: TSteakRec);

  (*  End of function and procedure declaration.                              *)
  (****************************************************************************)


  (****************************************************************************)
  (*  Implementation:                                                         *)
implementation


(******************************************************************************)
(*  User level functions:                                                     *)
procedure Steak2_Encrypt(var Buf; Count: Integer; var KeyData: TSteakRec);
var
  Data: PByte;
  R: TLongRec;
  I, J: Integer;
  C: Byte;
begin
  (*  C_-1 := MostSignificantByte(V)  *)
  R.L := KeyData.V.L[VectorSize-1];
  C := R.D;

  (*  Main loop:  *)
  Data := @Buf;
  for I := 0 to Count - 1 do begin
    (*  C_i := P_i xor F(C_{i-1},KeyData)  *)
    C := Data^ xor F(C,KeyData);
    Data^ := C;
    Inc(Data);

    (*  V := V / 256  *)
    for J := 0 to VectorSize*4 - 2 do
      KeyData.V.B[J] := KeyData.V.B[J+1];
  end;
end;

procedure Steak2_Encrypt_AllInOne(var Buf; Count: Integer; var KeyData: TSteakRec);
var
  Data: PByte;
  R: TLongRec;
  X: LongWord;
  I, J, K: Integer;
  C: Byte;
begin
  (*  C_-1 := MostSignificantByte(V)  *)
  C := KeyData.V.B[VectorSize * 4 - 1];

  (*  Main loop:  *)
  Data := @Buf;
  for I := 0 to Count - 1 do begin
    (*  C_i := P_i xor F(C_{i-1},KeyData)  *)
    R.L := KeyData.V.L[0];
    KeyData.V.B[VectorSize * 4 - 1] := C;
    for J := VectorSize - 1 downto 0 do begin
      R.L := R.L + KeyData.A[J];

      (*  Substitute:  *)
      R.A := KeyData.S[0,R.A];
      R.B := KeyData.S[1,R.B];
      R.C := KeyData.S[2,R.C];
      R.D := KeyData.S[3,R.D];
      
      (*  Transform:  *)
      X := 0;
      for K := 0 to 31 do begin
        if Odd(R.L) then X := X xor MT[K];
        R.L := R.L shr 1;
      end;

      if J > 0 then R.L := ROL(X + KeyData.V.L[J],3)
      else R.L := X + KeyData.V.L[0];
      KeyData.V.L[J] := R.L;
    end;
    C := Data^ xor (X mod 256);
    Data^ := C;
    Inc(Data);

    (*  V := V / 256  *)
    for J := 0 to VectorSize*4 - 2 do
      KeyData.V.B[J] := KeyData.V.B[J+1];
  end;
end;

procedure Steak2_Decrypt(var Buf; Count: Integer; var KeyData: TSteakRec);
var
  Data: PByte;
  R: TLongRec;
  I, J: Integer;
  C, NextC: Byte;
begin
  (*  C_-1 := MostSignificantByte(V)  *)
  R.L := KeyData.V.L[VectorSize-1];
  C := R.D;

  (*  Main loop:  *)
  Data := @Buf;
  for I := 0 to Count - 1 do begin
    (*  P_i := C_i xor F(C_{i-1},KeyData)  *)
    NextC := Data^;
    Data^ := NextC xor F(C,KeyData);
    C := NextC;
    Inc(Data);

    (*  V := V / 256  *)
    for J := 0 to VectorSize*4 - 2 do
      KeyData.V.B[J] := KeyData.V.B[J+1];
  end;
end;

procedure Steak2_SetUp(const Buf; Count: Integer; var SteakRec: TSteakRec);
var
  Data: PByte;
begin
  (*  Expand the user key in Buf and copy it to Data:  *)
  ExpandKey1(Buf, Count, Data);
  try
    (*  Call set up sub routines:  *)
    SetUpSBoxes(Data,SteakRec.S);
    ExpandKey2(Data);
    SetUpA(Data,SteakRec.A);
    SetUpIV(Data,SteakRec.V);
  finally
    (*  Deallocate the Data variable:  *)
    FreeMem(Data);
  end;
end;

                                                                                
(******************************************************************************)
(*  Encryption / decryption sub functions:                                    *)

(*  Substitute: {0,1}^32 x [0..255!)^4 -> {0,1}^32  *)
function Substitute(const X: LongWord; const SBoxes: TSBoxes): LongWord;
var
  R: TLongRec;
begin
  R.L := X;
  R.A := SBoxes[0,R.A];
  R.B := SBoxes[1,R.B];
  R.C := SBoxes[2,R.C];
  R.D := SBoxes[3,R.D];
  Result := R.L;
end;

(*  Transform: {0,1}^32 -> {0,1}^32  *)
function Transform(X: LongWord): LongWord;
var
  I: TShift;
begin
  Result := 0;
  for I := 0 to 31 do begin
    if Odd(X) then Result := Result xor MT[I];
    X := X shr 1;
  end;
end;

(*  H: {0,1}^32 x
       [0..255!)^4 x
       {0,1}^32 x
       {0,1}^32
         ->
       {0,1}^32 x
       {0,1}^32
*)
function H(X: LongWord; Index: Integer; var KeyData: TSteakRec): LongWord;
begin
  (*  Static whitening: (keyed)  *)
  X := X + KeyData.A[VectorSize - Index - 1];

  (*  Byte-wise substitution: (keyed)  *)
  X := Substitute(X, KeyData.S);

  (*  Linear transformation: (un-keyed)  *)
  X := Transform(X);

  if Index = VectorSize - 1 then begin
    (*  Update internal state:  *)
    KeyData.V.L[0] := X + KeyData.V.L[0];

    (*  Assign return value:  *)
    Result := X;

  end else begin
    (*  Dynamic whitening: (keyed)  *)
    X := ROL(X + KeyData.V.L[VectorSize - Index - 1],3);

    (*  Update internal state:  *)
    KeyData.V.L[VectorSize - Index - 1] := X;

    (*  Assign return value:  *)
    Result := X;
  end;
end;

(*  G_i: {0,1}^8 x
         [0..255!)^4 x
         {{0,1}^32}^i x
         {0,1}^24 x {0,1}^32^i
           ->
         {0,1}^32 x
         {{0,1}^32}^i  *)
(*  G_VectorSize:
         {0,1}^8 x
         [0..255!)^4 x
         {{0,1}^32}^VectorSize x
         {0,1}^24 x {{0,1}^32}^(VectorSize-1)
           ->
         {0,1}^32 x
         {{0,1}^32}^VectorSize  *)
function G(const C: Byte; Index: Integer; var KeyData: TSteakRec): LongWord;
begin
  if Index = 0 then begin
    KeyData.V.B[VectorSize*4 - 1] := C;
    Result := H(KeyData.V.L[0],0,KeyData);
  end else
    Result := H(G(C,Index-1,KeyData),Index,KeyData);
end;

(*  F:   {0,1}^8 x
         [0..255!)^4 x
         {{0,1}^32}^VectorSize x
         {0,1}^24 x {{0,1}^32}^(VectorSize-1)
           ->
         {0,1}^8 x
         {{0,1}^32}^VectorSize  *)
function F(const C: Byte; var KeyData: TSteakRec): Byte;
begin
  Result := G(C,VectorSize - 1,KeyData) mod 256;
end;

                                                                                
(******************************************************************************)
(*  Key set up routines:                                                      *)

(*  Stretch key from Count bytes to 885 bytes.  *)
procedure ExpandKey1(const Buf; Count: Integer; var Data: PByte);
var
  I, J, K: Integer;
  tmp: array [0..27] of TSteakRec;
  C, X: Byte;
begin
  (*  Allocate memory for Data:  *)
  GetMem(Data,KeySize);

  (*  Clear Data:  *)
  FillChar(Data^,KeySize,0);

  (*  Move Buf to Data:  *)
  if Count < KeySize then
    Move(Buf,Data^,Count)
  else
    Move(Buf,Data^,KeySize);

  (*  Set up default KeyData for key expansion:  *)
  for K := 0 to 27 do begin
    for I := 0 to 3 do begin
      for J := 0 to 254 do
        tmp[K].S[I,J] := J + 1;
      tmp[K].S[I,255] := 0;
    end;
    for I := 0 to VectorSize-1 do begin
      tmp[K].A[I] := $AAAAAAAA;
      tmp[K].V.L[I] := $AAAAAAAA;
    end;
  end;

  (*  Expand key:  *)
  C := $AA;
  for I := 0 to KeySize - 1 do begin
    X := C;
    for K := 0 to 27 do
      X := F(X,tmp[K]);
    C := X xor Data^;
    Data^ := C;
    for K := 0 to 27 do
      for J := 0 to VectorSize * 4 - 2 do
        tmp[K].V.B[J] := tmp[K].V.B[J+1];
  end;
end;

(*  Extract 4 single cycle permutations from Data.  *)
procedure SetUpSBoxes(Data: PByte; var SBoxes: TSBoxes);
var
  I: Integer;
begin
  (*  Each s-box is a pseudo random single cycle permutation.  *)
  for I := 0 to 3 do
    ExtractPermutation(Data,SBoxes[I]);
end;

(*  Extract a single cycle permutation from Data.  *)
procedure ExtractPermutation(Data: PByte; var SBox: TSBox);
var
  I, J, X: Byte;
  tmp: TSBox;
begin
  (*  Extract a 256 element permutation tmp, with tmp[255] = 255: *)
  for I := 0 to 255 do
    tmp[I] := I;
  for I := 255 downto 2 do begin
    J := DivModByByte(Data,I);
    X := tmp[I];
    tmp[I] := tmp[J];
    tmp[J] := X;
  end;

  (*  Treat tmp as the single cycle of SBox and compute SBox accordingly:     *)
  J := 255;
  for I := 0 to 254 do begin
    X := tmp[I];
    SBox[J] := X;
    J := X;
  end;
  SBox[J] := 255;
end;

(*  Distort the key data a second time, before A and V are extracted.  *)
procedure ExpandKey2(Data: PByte);
var
  I, J: Integer;
  tmp: TSteakRec;
begin
  (*  Set up default KeyData:  *)
  for I := 0 to 3 do
    for J := 0 to 255 do
      tmp.S[I,J] := J;
  for I := 0 to VectorSize-1 do begin
    tmp.A[I] := $AAAAAAAA;
    tmp.V.L[I] := $AAAAAAAA;
  end;

  (*  Expand key:  *)
  Steak2_Encrypt(Data^,VectorSize * 6,tmp);
end;

(*  Extract A from Data.  *)
procedure SetUpA(Data: PByte; var A: TVector);
var
  I, J: Integer;
  D: Byte;
begin
  (*  A is such that:                                                         *
   *    if the bit at position 2*i is 1 then bit at position 2*i+1 is 0, and  *
   *    if the bit at position 2*i is 0 then bit at position 2*i+1 is 1.      *)
  for I := 0 to VectorSize - 1 do begin
    A[I] := 0;
    D := DivModByByte(Data,256);
    for J := 0 to 7 do begin
      if Odd(D) then
        A[I] := (A[I] shl 2) + 1
      else
        A[I] := (A[I] shl 2) + 2;
      D := D shr 1;
    end;
    D := DivModByByte(Data,256);
    for J := 0 to 7 do begin
      if Odd(D) then
        A[I] := (A[I] shl 2) + 1
      else
        A[I] := (A[I] shl 2) + 2;
      D := D shr 1;
    end;
  end;
end;

(*  Extract V from data.  *)
procedure SetUpIV(Data: PByte; var V: TVectorRec);
var
  I: Integer;
begin
  (*  V is "only" pseudo random.  *)
  for I := 0 to VectorSize * 4 - 1 do begin
    V.B[I] := Data^;
    Inc(Data);
  end;
end;

     
(******************************************************************************)
(*  Utility routines:                                                         *)
function DivModByByte(Data: PByte; D: TByteDiv): Byte;
var
  I: Integer;
  C: Word;
begin
  if D = 256 then begin
    Result := Data^;
    Move(Ptr(LongInt(Data) + 1)^,Data^,KeySize-1);
  end else begin
    // Make Data point to the end of its contents:
    Data := Ptr(LongInt(Data) + KeySize);
    // DivMod:
    C := 0;
    for I := KeySize - 1 downto 0 do begin
      Dec(Data);
      C := Data^ + (C shl 8);
      Data^ := C div D;
      C := C mod D;
    end;
    Result := C;
  end;
end;

function ROR(Value: LongWord; Shift: TShift): LongWord;
begin
  Result := (Value shr Shift) + (Value shl (32 - Shift));
end;

function ROL(Value: LongWord; Shift: TShift): LongWord;
begin
  Result := (Value shl Shift) + (Value shr (32 - Shift));
end;


end.