CentrED/Imaging/JpegLib/imjidctred.pas

unit imjidctred;


{ This file contains inverse-DCT routines that produce reduced-size output:
  either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block.

  The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M)
  algorithm used in jidctint.c.  We simply replace each 8-to-8 1-D IDCT step
  with an 8-to-4 step that produces the four averages of two adjacent outputs
  (or an 8-to-2 step producing two averages of four outputs, for 2x2 output).
  These steps were derived by computing the corresponding values at the end
  of the normal LL&M code, then simplifying as much as possible.

  1x1 is trivial: just take the DC coefficient divided by 8.

  See jidctint.c for additional comments. }


{ Original : jidctred.c ; Copyright (C) 1994-1998, Thomas G. Lane. }

interface

{$I imjconfig.inc}

uses
  imjmorecfg,
  imjinclude,
  imjpeglib,
  imjdct;         	{ Private declarations for DCT subsystem }

{ Perform dequantization and inverse DCT on one block of coefficients,
  producing a reduced-size 1x1 output block. }

{GLOBAL}
procedure jpeg_idct_1x1 (cinfo : j_decompress_ptr;
                         compptr : jpeg_component_info_ptr;
	                 coef_block : JCOEFPTR;
	                 output_buf : JSAMPARRAY;
                         output_col : JDIMENSION);

{ Perform dequantization and inverse DCT on one block of coefficients,
  producing a reduced-size 2x2 output block. }

{GLOBAL}
procedure jpeg_idct_2x2 (cinfo : j_decompress_ptr;
                         compptr : jpeg_component_info_ptr;
	                 coef_block : JCOEFPTR;
                         output_buf : JSAMPARRAY;
                         output_col : JDIMENSION);

{ Perform dequantization and inverse DCT on one block of coefficients,
  producing a reduced-size 4x4 output block. }

{GLOBAL}
procedure jpeg_idct_4x4 (cinfo : j_decompress_ptr;
                         compptr : jpeg_component_info_ptr;
	                 coef_block : JCOEFPTR;
	                 output_buf : JSAMPARRAY;
                         output_col : JDIMENSION);

implementation

{ This module is specialized to the case DCTSIZE = 8. }

{$ifndef DCTSIZE_IS_8}
  Sorry, this code only copes with 8x8 DCTs. { deliberate syntax err }
{$endif}


{ Scaling is the same as in jidctint.c. }

{$ifdef BITS_IN_JSAMPLE_IS_8}
const
  CONST_BITS = 13;
  PASS1_BITS = 2;
{$else}
const
  CONST_BITS = 13;
  PASS1_BITS = 1;	{ lose a little precision to avoid overflow }
{$endif}

const
  FIX_0_211164243 = INT32(Round((INT32(1) shl CONST_BITS) * 0.211164243)); {1730}
  FIX_0_509795579 = INT32(Round((INT32(1) shl CONST_BITS) * 0.509795579)); {4176}
  FIX_0_601344887 = INT32(Round((INT32(1) shl CONST_BITS) * 0.601344887)); {4926}
  FIX_0_720959822 = INT32(Round((INT32(1) shl CONST_BITS) * 0.720959822)); {5906}
  FIX_0_765366865 = INT32(Round((INT32(1) shl CONST_BITS) * 0.765366865)); {6270}
  FIX_0_850430095 = INT32(Round((INT32(1) shl CONST_BITS) * 0.850430095)); {6967}
  FIX_0_899976223 = INT32(Round((INT32(1) shl CONST_BITS) * 0.899976223)); {7373}
  FIX_1_061594337 = INT32(Round((INT32(1) shl CONST_BITS) * 1.061594337)); {8697}
  FIX_1_272758580 = INT32(Round((INT32(1) shl CONST_BITS) * 1.272758580)); {10426}
  FIX_1_451774981 = INT32(Round((INT32(1) shl CONST_BITS) * 1.451774981)); {11893}
  FIX_1_847759065 = INT32(Round((INT32(1) shl CONST_BITS) * 1.847759065)); {15137}
  FIX_2_172734803 = INT32(Round((INT32(1) shl CONST_BITS) * 2.172734803)); {17799}
  FIX_2_562915447 = INT32(Round((INT32(1) shl CONST_BITS) * 2.562915447)); {20995}
  FIX_3_624509785 = INT32(Round((INT32(1) shl CONST_BITS) * 3.624509785)); {29692}


{ Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
  For 8-bit samples with the recommended scaling, all the variable
  and constant values involved are no more than 16 bits wide, so a
  16x16->32 bit multiply can be used instead of a full 32x32 multiply.
  For 12-bit samples, a full 32-bit multiplication will be needed. }

{$ifdef BITS_IN_JSAMPLE_IS_8}

   {function Multiply(X, Y: Integer): integer; assembler;
   asm
     mov ax, X
     imul Y
     mov al, ah
     mov ah, dl
   end;}

   {MULTIPLY16C16(var,const)}
   function Multiply(X, Y: Integer): INT32;
   begin
     Multiply := X*INT32(Y);
   end;


{$else}
   function Multiply(X, Y: INT32): INT32;
   begin
     Multiply := X*Y;
   end;
{$endif}


{ Dequantize a coefficient by multiplying it by the multiplier-table
  entry; produce an int result.  In this module, both inputs and result
  are 16 bits or less, so either int or short multiply will work. }

function DEQUANTIZE(coef,quantval : int) : int;
begin
  Dequantize := ( ISLOW_MULT_TYPE(coef) * quantval);
end;


{ Descale and correctly round an INT32 value that's scaled by N bits.
  We assume RIGHT_SHIFT rounds towards minus infinity, so adding
  the fudge factor is correct for either sign of X. }

function DESCALE(x : INT32; n : int) : INT32;
var
  shift_temp : INT32;
begin
{$ifdef RIGHT_SHIFT_IS_UNSIGNED}
  shift_temp := x + (INT32(1) shl (n-1));
  if shift_temp < 0 then
    Descale :=  (shift_temp shr n) or ((not INT32(0)) shl (32-n))
  else
    Descale :=  (shift_temp shr n);
{$else}
  Descale := (x + (INT32(1) shl (n-1)) shr n;
{$endif}
end;

{ Perform dequantization and inverse DCT on one block of coefficients,
  producing a reduced-size 4x4 output block. }

{GLOBAL}
procedure jpeg_idct_4x4 (cinfo : j_decompress_ptr;
                         compptr : jpeg_component_info_ptr;
	                 coef_block : JCOEFPTR;
	                 output_buf : JSAMPARRAY;
                         output_col : JDIMENSION);
type
  PWorkspace = ^TWorkspace;
  TWorkspace = array[0..(DCTSIZE*4)-1] of int; { buffers data between passes }
var
  tmp0, tmp2, tmp10, tmp12 : INT32;
  z1, z2, z3, z4 : INT32;
  inptr : JCOEFPTR;
  quantptr : ISLOW_MULT_TYPE_FIELD_PTR;
  wsptr : PWorkspace;
  outptr : JSAMPROW;
  range_limit : JSAMPROW;
  ctr : int;
  workspace : TWorkspace;	{ buffers data between passes }
  {SHIFT_TEMPS}
var
  dcval : int;
var
  dcval_ : JSAMPLE;
begin
{ Each IDCT routine is responsible for range-limiting its results and
  converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
  be quite far out of range if the input data is corrupt, so a bulletproof
  range-limiting step is required.  We use a mask-and-table-lookup method
  to do the combined operations quickly.  See the comments with
  prepare_range_limit_table (in jdmaster.c) for more info. }

  range_limit := JSAMPROW(@(cinfo^.sample_range_limit^[CENTERJSAMPLE]));

  { Pass 1: process columns from input, store into work array. }

  inptr := coef_block;
  quantptr := ISLOW_MULT_TYPE_FIELD_PTR (compptr^.dct_table);
  wsptr := @workspace;
  for ctr := DCTSIZE downto 1 do
  begin
    { Don't bother to process column 4, because second pass won't use it }
    if (ctr = DCTSIZE-4) then
    begin
      Inc(JCOEF_PTR(inptr));
      Inc(ISLOW_MULT_TYPE_PTR(quantptr));
      Inc(int_ptr(wsptr));

      continue;
    end;
    if (inptr^[DCTSIZE*1]=0) and (inptr^[DCTSIZE*2]=0) and (inptr^[DCTSIZE*3]=0) and
       (inptr^[DCTSIZE*5]=0) and (inptr^[DCTSIZE*6]=0) and (inptr^[DCTSIZE*7]=0) then
    begin
      { AC terms all zero; we need not examine term 4 for 4x4 output }
      dcval := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*0]) *
                      quantptr^[DCTSIZE*0]) shl PASS1_BITS;

      wsptr^[DCTSIZE*0] := dcval;
      wsptr^[DCTSIZE*1] := dcval;
      wsptr^[DCTSIZE*2] := dcval;
      wsptr^[DCTSIZE*3] := dcval;

      Inc(JCOEF_PTR(inptr));
      Inc(ISLOW_MULT_TYPE_PTR(quantptr));
      Inc(int_ptr(wsptr));

      continue;
    end;

    { Even part }

    tmp0 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*0]) * quantptr^[DCTSIZE*0]);

    tmp0 := tmp0 shl (CONST_BITS+1);

    z2 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*2]) * quantptr^[DCTSIZE*2]);
    z3 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*6]) * quantptr^[DCTSIZE*6]);

    tmp2 := MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865);

    tmp10 := tmp0 + tmp2;
    tmp12 := tmp0 - tmp2;

    { Odd part }

    z1 := ISLOW_MULT_TYPE(inptr^[DCTSIZE*7]) * quantptr^[DCTSIZE*7];
    z2 := ISLOW_MULT_TYPE(inptr^[DCTSIZE*5]) * quantptr^[DCTSIZE*5];
    z3 := ISLOW_MULT_TYPE(inptr^[DCTSIZE*3]) * quantptr^[DCTSIZE*3];
    z4 := ISLOW_MULT_TYPE(inptr^[DCTSIZE*1]) * quantptr^[DCTSIZE*1];

    tmp0 := MULTIPLY(z1, - FIX_0_211164243) { sqrt(2) * (c3-c1) }
	  + MULTIPLY(z2, FIX_1_451774981) { sqrt(2) * (c3+c7) }
	  + MULTIPLY(z3, - FIX_2_172734803) { sqrt(2) * (-c1-c5) }
	  + MULTIPLY(z4, FIX_1_061594337); { sqrt(2) * (c5+c7) }

    tmp2 := MULTIPLY(z1, - FIX_0_509795579) { sqrt(2) * (c7-c5) }
	  + MULTIPLY(z2, - FIX_0_601344887) { sqrt(2) * (c5-c1) }
	  + MULTIPLY(z3, FIX_0_899976223) { sqrt(2) * (c3-c7) }
	  + MULTIPLY(z4, FIX_2_562915447); { sqrt(2) * (c1+c3) }

    { Final output stage }

    wsptr^[DCTSIZE*0] := int(DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1));
    wsptr^[DCTSIZE*3] := int(DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1));
    wsptr^[DCTSIZE*1] := int(DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1));
    wsptr^[DCTSIZE*2] := int(DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1));

    Inc(JCOEF_PTR(inptr));
    Inc(ISLOW_MULT_TYPE_PTR(quantptr));
    Inc(int_ptr(wsptr));
  end;

  { Pass 2: process 4 rows from work array, store into output array. }

  wsptr := @workspace;
  for ctr := 0 to pred(4) do
  begin
    outptr := JSAMPROW(@ output_buf^[ctr]^[output_col]);
    { It's not clear whether a zero row test is worthwhile here ... }

{$ifndef NO_ZERO_ROW_TEST}
    if (wsptr^[1]=0) and (wsptr^[2]=0) and (wsptr^[3]=0) and
       (wsptr^[5]=0) and (wsptr^[6]=0) and (wsptr^[7]=0) then
    begin
      { AC terms all zero }
      dcval_ := range_limit^[int(DESCALE(INT32(wsptr^[0]), PASS1_BITS+3))
				  and RANGE_MASK];

      outptr^[0] := dcval_;
      outptr^[1] := dcval_;
      outptr^[2] := dcval_;
      outptr^[3] := dcval_;

      Inc(int_ptr(wsptr), DCTSIZE);	{ advance pointer to next row }
      continue;
    end;
{$endif}

    { Even part }

    tmp0 := (INT32(wsptr^[0])) shl (CONST_BITS+1);

    tmp2 := MULTIPLY(INT32(wsptr^[2]), FIX_1_847759065)
	  + MULTIPLY(INT32(wsptr^[6]), - FIX_0_765366865);

    tmp10 := tmp0 + tmp2;
    tmp12 := tmp0 - tmp2;

    { Odd part }

    z1 := INT32(wsptr^[7]);
    z2 := INT32(wsptr^[5]);
    z3 := INT32(wsptr^[3]);
    z4 := INT32(wsptr^[1]);

    tmp0 := MULTIPLY(z1, - FIX_0_211164243) { sqrt(2) * (c3-c1) }
	  + MULTIPLY(z2, FIX_1_451774981) { sqrt(2) * (c3+c7) }
	  + MULTIPLY(z3, - FIX_2_172734803) { sqrt(2) * (-c1-c5) }
	  + MULTIPLY(z4, FIX_1_061594337); { sqrt(2) * (c5+c7) }

    tmp2 := MULTIPLY(z1, - FIX_0_509795579) { sqrt(2) * (c7-c5) }
	  + MULTIPLY(z2, - FIX_0_601344887) { sqrt(2) * (c5-c1) }
	  + MULTIPLY(z3, FIX_0_899976223) { sqrt(2) * (c3-c7) }
	  + MULTIPLY(z4, FIX_2_562915447); { sqrt(2) * (c1+c3) }

    { Final output stage }

    outptr^[0] := range_limit^[ int(DESCALE(tmp10 + tmp2,
					  CONST_BITS+PASS1_BITS+3+1))
			    and RANGE_MASK];
    outptr^[3] := range_limit^[ int(DESCALE(tmp10 - tmp2,
					  CONST_BITS+PASS1_BITS+3+1))
			    and RANGE_MASK];
    outptr^[1] := range_limit^[ int(DESCALE(tmp12 + tmp0,
					  CONST_BITS+PASS1_BITS+3+1))
			    and RANGE_MASK];
    outptr^[2] := range_limit^[ int(DESCALE(tmp12 - tmp0,
				 	  CONST_BITS+PASS1_BITS+3+1))
			    and RANGE_MASK];

    Inc(int_ptr(wsptr), DCTSIZE);	{ advance pointer to next row }
  end;
end;


{ Perform dequantization and inverse DCT on one block of coefficients,
  producing a reduced-size 2x2 output block. }

{GLOBAL}
procedure jpeg_idct_2x2 (cinfo : j_decompress_ptr;
                         compptr : jpeg_component_info_ptr;
	                 coef_block : JCOEFPTR;
                         output_buf : JSAMPARRAY;
                         output_col : JDIMENSION);
type
  PWorkspace = ^TWorkspace;
  TWorkspace = array[0..(DCTSIZE*2)-1] of int; { buffers data between passes }
var
  tmp0, tmp10, z1 : INT32;
  inptr : JCOEFPTR;
  quantptr : ISLOW_MULT_TYPE_FIELD_PTR;
  wsptr : PWorkspace;
  outptr : JSAMPROW;
  range_limit : JSAMPROW;
  ctr : int;
  workspace : TWorkspace;  { buffers data between passes }
  {SHIFT_TEMPS}
var
  dcval : int;
var
  dcval_ : JSAMPLE;
begin
{ Each IDCT routine is responsible for range-limiting its results and
  converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
  be quite far out of range if the input data is corrupt, so a bulletproof
  range-limiting step is required.  We use a mask-and-table-lookup method
  to do the combined operations quickly.  See the comments with
  prepare_range_limit_table (in jdmaster.c) for more info. }

  range_limit := JSAMPROW(@(cinfo^.sample_range_limit^[CENTERJSAMPLE]));
  { Pass 1: process columns from input, store into work array. }

  inptr := coef_block;
  quantptr := ISLOW_MULT_TYPE_FIELD_PTR (compptr^.dct_table);
  wsptr := @workspace;
  for ctr := DCTSIZE downto 1 do
  begin
    { Don't bother to process columns 2,4,6 }
    if (ctr = DCTSIZE-2) or (ctr = DCTSIZE-4) or (ctr = DCTSIZE-6) then
    begin
      Inc(JCOEF_PTR(inptr));
      Inc(ISLOW_MULT_TYPE_PTR(quantptr));
      Inc(int_ptr(wsptr));

      continue;
    end;
    if (inptr^[DCTSIZE*1]=0) and (inptr^[DCTSIZE*3]=0) and
       (inptr^[DCTSIZE*5]=0) and (inptr^[DCTSIZE*7]=0) then
    begin
      { AC terms all zero; we need not examine terms 2,4,6 for 2x2 output }
      dcval := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*0]) *
                 quantptr^[DCTSIZE*0]) shl PASS1_BITS;

      wsptr^[DCTSIZE*0] := dcval;
      wsptr^[DCTSIZE*1] := dcval;

      Inc(JCOEF_PTR(inptr));
      Inc(ISLOW_MULT_TYPE_PTR(quantptr));
      Inc(int_ptr(wsptr));

      continue;
    end;

    { Even part }

    z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*0]) * quantptr^[DCTSIZE*0]);

    tmp10 := z1 shl (CONST_BITS+2);

    { Odd part }

    z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*7]) * quantptr^[DCTSIZE*7]);
    tmp0 := MULTIPLY(z1, - FIX_0_720959822); { sqrt(2) * (c7-c5+c3-c1) }
    z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*5]) * quantptr^[DCTSIZE*5]);
    Inc(tmp0, MULTIPLY(z1, FIX_0_850430095)); { sqrt(2) * (-c1+c3+c5+c7) }
    z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*3]) * quantptr^[DCTSIZE*3]);
    Inc(tmp0, MULTIPLY(z1, - FIX_1_272758580)); { sqrt(2) * (-c1+c3-c5-c7) }
    z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*1]) * quantptr^[DCTSIZE*1]);
    Inc(tmp0, MULTIPLY(z1, FIX_3_624509785)); { sqrt(2) * (c1+c3+c5+c7) }

    { Final output stage }

    wsptr^[DCTSIZE*0] := int (DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2));
    wsptr^[DCTSIZE*1] := int (DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2));

    Inc(JCOEF_PTR(inptr));
    Inc(ISLOW_MULT_TYPE_PTR(quantptr));
    Inc(int_ptr(wsptr));
  end;

  { Pass 2: process 2 rows from work array, store into output array. }

  wsptr := @workspace;
  for ctr := 0 to pred(2) do
  begin
    outptr := JSAMPROW(@ output_buf^[ctr]^[output_col]);
    { It's not clear whether a zero row test is worthwhile here ... }

{$ifndef NO_ZERO_ROW_TEST}
    if (wsptr^[1]=0) and (wsptr^[3]=0) and (wsptr^[5]=0) and (wsptr^[7]= 0) then
    begin
      { AC terms all zero }
      dcval_ := range_limit^[ int(DESCALE(INT32(wsptr^[0]), PASS1_BITS+3))
				  and RANGE_MASK];

      outptr^[0] := dcval_;
      outptr^[1] := dcval_;

      Inc(int_ptr(wsptr), DCTSIZE);	{ advance pointer to next row }
      continue;
    end;
{$endif}

    { Even part }

    tmp10 := (INT32 (wsptr^[0])) shl (CONST_BITS+2);

    { Odd part }

    tmp0 := MULTIPLY( INT32(wsptr^[7]), - FIX_0_720959822) { sqrt(2) * (c7-c5+c3-c1) }
	  + MULTIPLY( INT32(wsptr^[5]), FIX_0_850430095) { sqrt(2) * (-c1+c3+c5+c7) }
	  + MULTIPLY( INT32(wsptr^[3]), - FIX_1_272758580) { sqrt(2) * (-c1+c3-c5-c7) }
	  + MULTIPLY( INT32(wsptr^[1]), FIX_3_624509785); { sqrt(2) * (c1+c3+c5+c7) }

    { Final output stage }

    outptr^[0] := range_limit^[ int(DESCALE(tmp10 + tmp0,
					  CONST_BITS+PASS1_BITS+3+2))
			    and RANGE_MASK];
    outptr^[1] := range_limit^[ int(DESCALE(tmp10 - tmp0,
					  CONST_BITS+PASS1_BITS+3+2))
			    and RANGE_MASK];

    Inc(int_ptr(wsptr), DCTSIZE);		{ advance pointer to next row }
  end;
end;


{ Perform dequantization and inverse DCT on one block of coefficients,
  producing a reduced-size 1x1 output block. }

{GLOBAL}
procedure jpeg_idct_1x1 (cinfo : j_decompress_ptr;
                         compptr : jpeg_component_info_ptr;
	                 coef_block : JCOEFPTR;
	                 output_buf : JSAMPARRAY;
                         output_col : JDIMENSION);
var
  dcval : int;
  quantptr : ISLOW_MULT_TYPE_FIELD_PTR;
  range_limit : JSAMPROW;
  {SHIFT_TEMPS}
begin
{ Each IDCT routine is responsible for range-limiting its results and
  converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
  be quite far out of range if the input data is corrupt, so a bulletproof
  range-limiting step is required.  We use a mask-and-table-lookup method
  to do the combined operations quickly.  See the comments with
  prepare_range_limit_table (in jdmaster.c) for more info. }

  range_limit := JSAMPROW(@(cinfo^.sample_range_limit^[CENTERJSAMPLE]));
  { Pass 1: process columns from input, store into work array. }

  { We hardly need an inverse DCT routine for this: just take the
    average pixel value, which is one-eighth of the DC coefficient. }

  quantptr := ISLOW_MULT_TYPE_FIELD_PTR (compptr^.dct_table);
  dcval := (ISLOW_MULT_TYPE(coef_block^[0]) * quantptr^[0]);
  dcval := int (DESCALE( INT32(dcval), 3));

  output_buf^[0]^[output_col] := range_limit^[dcval and RANGE_MASK];
end;

end.
* Unified line-endings (using hgeol) 2015-05-01 12:14:15 +02:00			`unit imjidctred;`


			`{ This file contains inverse-DCT routines that produce reduced-size output:`
			`either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block.`

			`The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M)`
			`algorithm used in jidctint.c. We simply replace each 8-to-8 1-D IDCT step`
			`with an 8-to-4 step that produces the four averages of two adjacent outputs`
			`(or an 8-to-2 step producing two averages of four outputs, for 2x2 output).`
			`These steps were derived by computing the corresponding values at the end`
			`of the normal LL&M code, then simplifying as much as possible.`

			`1x1 is trivial: just take the DC coefficient divided by 8.`

			`See jidctint.c for additional comments. }`


			`{ Original : jidctred.c ; Copyright (C) 1994-1998, Thomas G. Lane. }`

			`interface`

			`{$I imjconfig.inc}`

			`uses`
			`imjmorecfg,`
			`imjinclude,`
			`imjpeglib,`
			`imjdct; { Private declarations for DCT subsystem }`

			`{ Perform dequantization and inverse DCT on one block of coefficients,`
			`producing a reduced-size 1x1 output block. }`

			`{GLOBAL}`
			`procedure jpeg_idct_1x1 (cinfo : j_decompress_ptr;`
			`compptr : jpeg_component_info_ptr;`
			`coef_block : JCOEFPTR;`
			`output_buf : JSAMPARRAY;`
			`output_col : JDIMENSION);`

			`{ Perform dequantization and inverse DCT on one block of coefficients,`
			`producing a reduced-size 2x2 output block. }`

			`{GLOBAL}`
			`procedure jpeg_idct_2x2 (cinfo : j_decompress_ptr;`
			`compptr : jpeg_component_info_ptr;`
			`coef_block : JCOEFPTR;`
			`output_buf : JSAMPARRAY;`
			`output_col : JDIMENSION);`

			`{ Perform dequantization and inverse DCT on one block of coefficients,`
			`producing a reduced-size 4x4 output block. }`

			`{GLOBAL}`
			`procedure jpeg_idct_4x4 (cinfo : j_decompress_ptr;`
			`compptr : jpeg_component_info_ptr;`
			`coef_block : JCOEFPTR;`
			`output_buf : JSAMPARRAY;`
			`output_col : JDIMENSION);`

			`implementation`

			`{ This module is specialized to the case DCTSIZE = 8. }`

			`{$ifndef DCTSIZE_IS_8}`
			`Sorry, this code only copes with 8x8 DCTs. { deliberate syntax err }`
			`{$endif}`


			`{ Scaling is the same as in jidctint.c. }`

			`{$ifdef BITS_IN_JSAMPLE_IS_8}`
			`const`
			`CONST_BITS = 13;`
			`PASS1_BITS = 2;`
			`{$else}`
			`const`
			`CONST_BITS = 13;`
			`PASS1_BITS = 1; { lose a little precision to avoid overflow }`
			`{$endif}`

			`const`
			`FIX_0_211164243 = INT32(Round((INT32(1) shl CONST_BITS) * 0.211164243)); {1730}`
			`FIX_0_509795579 = INT32(Round((INT32(1) shl CONST_BITS) * 0.509795579)); {4176}`
			`FIX_0_601344887 = INT32(Round((INT32(1) shl CONST_BITS) * 0.601344887)); {4926}`
			`FIX_0_720959822 = INT32(Round((INT32(1) shl CONST_BITS) * 0.720959822)); {5906}`
			`FIX_0_765366865 = INT32(Round((INT32(1) shl CONST_BITS) * 0.765366865)); {6270}`
			`FIX_0_850430095 = INT32(Round((INT32(1) shl CONST_BITS) * 0.850430095)); {6967}`
			`FIX_0_899976223 = INT32(Round((INT32(1) shl CONST_BITS) * 0.899976223)); {7373}`
			`FIX_1_061594337 = INT32(Round((INT32(1) shl CONST_BITS) * 1.061594337)); {8697}`
			`FIX_1_272758580 = INT32(Round((INT32(1) shl CONST_BITS) * 1.272758580)); {10426}`
			`FIX_1_451774981 = INT32(Round((INT32(1) shl CONST_BITS) * 1.451774981)); {11893}`
			`FIX_1_847759065 = INT32(Round((INT32(1) shl CONST_BITS) * 1.847759065)); {15137}`
			`FIX_2_172734803 = INT32(Round((INT32(1) shl CONST_BITS) * 2.172734803)); {17799}`
			`FIX_2_562915447 = INT32(Round((INT32(1) shl CONST_BITS) * 2.562915447)); {20995}`
			`FIX_3_624509785 = INT32(Round((INT32(1) shl CONST_BITS) * 3.624509785)); {29692}`


			`{ Multiply an INT32 variable by an INT32 constant to yield an INT32 result.`
			`For 8-bit samples with the recommended scaling, all the variable`
			`and constant values involved are no more than 16 bits wide, so a`
			`16x16->32 bit multiply can be used instead of a full 32x32 multiply.`
			`For 12-bit samples, a full 32-bit multiplication will be needed. }`

			`{$ifdef BITS_IN_JSAMPLE_IS_8}`

			`{function Multiply(X, Y: Integer): integer; assembler;`
			`asm`
			`mov ax, X`
			`imul Y`
			`mov al, ah`
			`mov ah, dl`
			`end;}`

			`{MULTIPLY16C16(var,const)}`
			`function Multiply(X, Y: Integer): INT32;`
			`begin`
			`Multiply := X*INT32(Y);`
			`end;`


			`{$else}`
			`function Multiply(X, Y: INT32): INT32;`
			`begin`
			`Multiply := X*Y;`
			`end;`
			`{$endif}`


			`{ Dequantize a coefficient by multiplying it by the multiplier-table`
			`entry; produce an int result. In this module, both inputs and result`
			`are 16 bits or less, so either int or short multiply will work. }`

			`function DEQUANTIZE(coef,quantval : int) : int;`
			`begin`
			`Dequantize := ( ISLOW_MULT_TYPE(coef) * quantval);`
			`end;`


			`{ Descale and correctly round an INT32 value that's scaled by N bits.`
			`We assume RIGHT_SHIFT rounds towards minus infinity, so adding`
			`the fudge factor is correct for either sign of X. }`

			`function DESCALE(x : INT32; n : int) : INT32;`
			`var`
			`shift_temp : INT32;`
			`begin`
			`{$ifdef RIGHT_SHIFT_IS_UNSIGNED}`
			`shift_temp := x + (INT32(1) shl (n-1));`
			`if shift_temp < 0 then`
			`Descale := (shift_temp shr n) or ((not INT32(0)) shl (32-n))`
			`else`
			`Descale := (shift_temp shr n);`
			`{$else}`
			`Descale := (x + (INT32(1) shl (n-1)) shr n;`
			`{$endif}`
			`end;`

			`{ Perform dequantization and inverse DCT on one block of coefficients,`
			`producing a reduced-size 4x4 output block. }`

			`{GLOBAL}`
			`procedure jpeg_idct_4x4 (cinfo : j_decompress_ptr;`
			`compptr : jpeg_component_info_ptr;`
			`coef_block : JCOEFPTR;`
			`output_buf : JSAMPARRAY;`
			`output_col : JDIMENSION);`
			`type`
			`PWorkspace = ^TWorkspace;`
			`TWorkspace = array[0..(DCTSIZE*4)-1] of int; { buffers data between passes }`
			`var`
			`tmp0, tmp2, tmp10, tmp12 : INT32;`
			`z1, z2, z3, z4 : INT32;`
			`inptr : JCOEFPTR;`
			`quantptr : ISLOW_MULT_TYPE_FIELD_PTR;`
			`wsptr : PWorkspace;`
			`outptr : JSAMPROW;`
			`range_limit : JSAMPROW;`
			`ctr : int;`
			`workspace : TWorkspace; { buffers data between passes }`
			`{SHIFT_TEMPS}`
			`var`
			`dcval : int;`
			`var`
			`dcval_ : JSAMPLE;`
			`begin`
			`{ Each IDCT routine is responsible for range-limiting its results and`
			`converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could`
			`be quite far out of range if the input data is corrupt, so a bulletproof`
			`range-limiting step is required. We use a mask-and-table-lookup method`
			`to do the combined operations quickly. See the comments with`
			`prepare_range_limit_table (in jdmaster.c) for more info. }`

			`range_limit := JSAMPROW(@(cinfo^.sample_range_limit^[CENTERJSAMPLE]));`

			`{ Pass 1: process columns from input, store into work array. }`

			`inptr := coef_block;`
			`quantptr := ISLOW_MULT_TYPE_FIELD_PTR (compptr^.dct_table);`
			`wsptr := @workspace;`
			`for ctr := DCTSIZE downto 1 do`
			`begin`
			`{ Don't bother to process column 4, because second pass won't use it }`
			`if (ctr = DCTSIZE-4) then`
			`begin`
			`Inc(JCOEF_PTR(inptr));`
			`Inc(ISLOW_MULT_TYPE_PTR(quantptr));`
			`Inc(int_ptr(wsptr));`

			`continue;`
			`end;`
			`if (inptr^[DCTSIZE1]=0) and (inptr^[DCTSIZE2]=0) and (inptr^[DCTSIZE*3]=0) and`
			`(inptr^[DCTSIZE5]=0) and (inptr^[DCTSIZE6]=0) and (inptr^[DCTSIZE*7]=0) then`
			`begin`
			`{ AC terms all zero; we need not examine term 4 for 4x4 output }`
			`dcval := (ISLOW_MULT_TYPE(inptr^[DCTSIZE0]) `
			`quantptr^[DCTSIZE*0]) shl PASS1_BITS;`

			`wsptr^[DCTSIZE*0] := dcval;`
			`wsptr^[DCTSIZE*1] := dcval;`
			`wsptr^[DCTSIZE*2] := dcval;`
			`wsptr^[DCTSIZE*3] := dcval;`

			`Inc(JCOEF_PTR(inptr));`
			`Inc(ISLOW_MULT_TYPE_PTR(quantptr));`
			`Inc(int_ptr(wsptr));`

			`continue;`
			`end;`

			`{ Even part }`

			`tmp0 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE0]) quantptr^[DCTSIZE*0]);`

			`tmp0 := tmp0 shl (CONST_BITS+1);`

			`z2 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE2]) quantptr^[DCTSIZE*2]);`
			`z3 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE6]) quantptr^[DCTSIZE*6]);`

			`tmp2 := MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865);`

			`tmp10 := tmp0 + tmp2;`
			`tmp12 := tmp0 - tmp2;`

			`{ Odd part }`

			`z1 := ISLOW_MULT_TYPE(inptr^[DCTSIZE7]) quantptr^[DCTSIZE*7];`
			`z2 := ISLOW_MULT_TYPE(inptr^[DCTSIZE5]) quantptr^[DCTSIZE*5];`
			`z3 := ISLOW_MULT_TYPE(inptr^[DCTSIZE3]) quantptr^[DCTSIZE*3];`
			`z4 := ISLOW_MULT_TYPE(inptr^[DCTSIZE1]) quantptr^[DCTSIZE*1];`

			`tmp0 := MULTIPLY(z1, - FIX_0_211164243) { sqrt(2) * (c3-c1) }`
			`+ MULTIPLY(z2, FIX_1_451774981) { sqrt(2) * (c3+c7) }`
			`+ MULTIPLY(z3, - FIX_2_172734803) { sqrt(2) * (-c1-c5) }`
			`+ MULTIPLY(z4, FIX_1_061594337); { sqrt(2) * (c5+c7) }`

			`tmp2 := MULTIPLY(z1, - FIX_0_509795579) { sqrt(2) * (c7-c5) }`
			`+ MULTIPLY(z2, - FIX_0_601344887) { sqrt(2) * (c5-c1) }`
			`+ MULTIPLY(z3, FIX_0_899976223) { sqrt(2) * (c3-c7) }`
			`+ MULTIPLY(z4, FIX_2_562915447); { sqrt(2) * (c1+c3) }`

			`{ Final output stage }`

			`wsptr^[DCTSIZE*0] := int(DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1));`
			`wsptr^[DCTSIZE*3] := int(DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1));`
			`wsptr^[DCTSIZE*1] := int(DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1));`
			`wsptr^[DCTSIZE*2] := int(DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1));`

			`Inc(JCOEF_PTR(inptr));`
			`Inc(ISLOW_MULT_TYPE_PTR(quantptr));`
			`Inc(int_ptr(wsptr));`
			`end;`

			`{ Pass 2: process 4 rows from work array, store into output array. }`

			`wsptr := @workspace;`
			`for ctr := 0 to pred(4) do`
			`begin`
			`outptr := JSAMPROW(@ output_buf^[ctr]^[output_col]);`
			`{ It's not clear whether a zero row test is worthwhile here ... }`

			`{$ifndef NO_ZERO_ROW_TEST}`
			`if (wsptr^[1]=0) and (wsptr^[2]=0) and (wsptr^[3]=0) and`
			`(wsptr^[5]=0) and (wsptr^[6]=0) and (wsptr^[7]=0) then`
			`begin`
			`{ AC terms all zero }`
			`dcval_ := range_limit^[int(DESCALE(INT32(wsptr^[0]), PASS1_BITS+3))`
			`and RANGE_MASK];`

			`outptr^[0] := dcval_;`
			`outptr^[1] := dcval_;`
			`outptr^[2] := dcval_;`
			`outptr^[3] := dcval_;`

			`Inc(int_ptr(wsptr), DCTSIZE); { advance pointer to next row }`
			`continue;`
			`end;`
			`{$endif}`

			`{ Even part }`

			`tmp0 := (INT32(wsptr^[0])) shl (CONST_BITS+1);`

			`tmp2 := MULTIPLY(INT32(wsptr^[2]), FIX_1_847759065)`
			`+ MULTIPLY(INT32(wsptr^[6]), - FIX_0_765366865);`

			`tmp10 := tmp0 + tmp2;`
			`tmp12 := tmp0 - tmp2;`

			`{ Odd part }`

			`z1 := INT32(wsptr^[7]);`
			`z2 := INT32(wsptr^[5]);`
			`z3 := INT32(wsptr^[3]);`
			`z4 := INT32(wsptr^[1]);`

			`tmp0 := MULTIPLY(z1, - FIX_0_211164243) { sqrt(2) * (c3-c1) }`
			`+ MULTIPLY(z2, FIX_1_451774981) { sqrt(2) * (c3+c7) }`
			`+ MULTIPLY(z3, - FIX_2_172734803) { sqrt(2) * (-c1-c5) }`
			`+ MULTIPLY(z4, FIX_1_061594337); { sqrt(2) * (c5+c7) }`

			`tmp2 := MULTIPLY(z1, - FIX_0_509795579) { sqrt(2) * (c7-c5) }`
			`+ MULTIPLY(z2, - FIX_0_601344887) { sqrt(2) * (c5-c1) }`
			`+ MULTIPLY(z3, FIX_0_899976223) { sqrt(2) * (c3-c7) }`
			`+ MULTIPLY(z4, FIX_2_562915447); { sqrt(2) * (c1+c3) }`

			`{ Final output stage }`

			`outptr^[0] := range_limit^[ int(DESCALE(tmp10 + tmp2,`
			`CONST_BITS+PASS1_BITS+3+1))`
			`and RANGE_MASK];`
			`outptr^[3] := range_limit^[ int(DESCALE(tmp10 - tmp2,`
			`CONST_BITS+PASS1_BITS+3+1))`
			`and RANGE_MASK];`
			`outptr^[1] := range_limit^[ int(DESCALE(tmp12 + tmp0,`
			`CONST_BITS+PASS1_BITS+3+1))`
			`and RANGE_MASK];`
			`outptr^[2] := range_limit^[ int(DESCALE(tmp12 - tmp0,`
			`CONST_BITS+PASS1_BITS+3+1))`
			`and RANGE_MASK];`

			`Inc(int_ptr(wsptr), DCTSIZE); { advance pointer to next row }`
			`end;`
			`end;`


			`{ Perform dequantization and inverse DCT on one block of coefficients,`
			`producing a reduced-size 2x2 output block. }`

			`{GLOBAL}`
			`procedure jpeg_idct_2x2 (cinfo : j_decompress_ptr;`
			`compptr : jpeg_component_info_ptr;`
			`coef_block : JCOEFPTR;`
			`output_buf : JSAMPARRAY;`
			`output_col : JDIMENSION);`
			`type`
			`PWorkspace = ^TWorkspace;`
			`TWorkspace = array[0..(DCTSIZE*2)-1] of int; { buffers data between passes }`
			`var`
			`tmp0, tmp10, z1 : INT32;`
			`inptr : JCOEFPTR;`
			`quantptr : ISLOW_MULT_TYPE_FIELD_PTR;`
			`wsptr : PWorkspace;`
			`outptr : JSAMPROW;`
			`range_limit : JSAMPROW;`
			`ctr : int;`
			`workspace : TWorkspace; { buffers data between passes }`
			`{SHIFT_TEMPS}`
			`var`
			`dcval : int;`
			`var`
			`dcval_ : JSAMPLE;`
			`begin`
			`{ Each IDCT routine is responsible for range-limiting its results and`
			`converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could`
			`be quite far out of range if the input data is corrupt, so a bulletproof`
			`range-limiting step is required. We use a mask-and-table-lookup method`
			`to do the combined operations quickly. See the comments with`
			`prepare_range_limit_table (in jdmaster.c) for more info. }`

			`range_limit := JSAMPROW(@(cinfo^.sample_range_limit^[CENTERJSAMPLE]));`
			`{ Pass 1: process columns from input, store into work array. }`

			`inptr := coef_block;`
			`quantptr := ISLOW_MULT_TYPE_FIELD_PTR (compptr^.dct_table);`
			`wsptr := @workspace;`
			`for ctr := DCTSIZE downto 1 do`
			`begin`
			`{ Don't bother to process columns 2,4,6 }`
			`if (ctr = DCTSIZE-2) or (ctr = DCTSIZE-4) or (ctr = DCTSIZE-6) then`
			`begin`
			`Inc(JCOEF_PTR(inptr));`
			`Inc(ISLOW_MULT_TYPE_PTR(quantptr));`
			`Inc(int_ptr(wsptr));`

			`continue;`
			`end;`
			`if (inptr^[DCTSIZE1]=0) and (inptr^[DCTSIZE3]=0) and`
			`(inptr^[DCTSIZE5]=0) and (inptr^[DCTSIZE7]=0) then`
			`begin`
			`{ AC terms all zero; we need not examine terms 2,4,6 for 2x2 output }`
			`dcval := (ISLOW_MULT_TYPE(inptr^[DCTSIZE0]) `
			`quantptr^[DCTSIZE*0]) shl PASS1_BITS;`

			`wsptr^[DCTSIZE*0] := dcval;`
			`wsptr^[DCTSIZE*1] := dcval;`

			`Inc(JCOEF_PTR(inptr));`
			`Inc(ISLOW_MULT_TYPE_PTR(quantptr));`
			`Inc(int_ptr(wsptr));`

			`continue;`
			`end;`

			`{ Even part }`

			`z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE0]) quantptr^[DCTSIZE*0]);`

			`tmp10 := z1 shl (CONST_BITS+2);`

			`{ Odd part }`

			`z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE7]) quantptr^[DCTSIZE*7]);`
			`tmp0 := MULTIPLY(z1, - FIX_0_720959822); { sqrt(2) * (c7-c5+c3-c1) }`
			`z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE5]) quantptr^[DCTSIZE*5]);`
			`Inc(tmp0, MULTIPLY(z1, FIX_0_850430095)); { sqrt(2) * (-c1+c3+c5+c7) }`
			`z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE3]) quantptr^[DCTSIZE*3]);`
			`Inc(tmp0, MULTIPLY(z1, - FIX_1_272758580)); { sqrt(2) * (-c1+c3-c5-c7) }`
			`z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE1]) quantptr^[DCTSIZE*1]);`
			`Inc(tmp0, MULTIPLY(z1, FIX_3_624509785)); { sqrt(2) * (c1+c3+c5+c7) }`

			`{ Final output stage }`

			`wsptr^[DCTSIZE*0] := int (DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2));`
			`wsptr^[DCTSIZE*1] := int (DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2));`

			`Inc(JCOEF_PTR(inptr));`
			`Inc(ISLOW_MULT_TYPE_PTR(quantptr));`
			`Inc(int_ptr(wsptr));`
			`end;`

			`{ Pass 2: process 2 rows from work array, store into output array. }`

			`wsptr := @workspace;`
			`for ctr := 0 to pred(2) do`
			`begin`
			`outptr := JSAMPROW(@ output_buf^[ctr]^[output_col]);`
			`{ It's not clear whether a zero row test is worthwhile here ... }`

			`{$ifndef NO_ZERO_ROW_TEST}`
			`if (wsptr^[1]=0) and (wsptr^[3]=0) and (wsptr^[5]=0) and (wsptr^[7]= 0) then`
			`begin`
			`{ AC terms all zero }`
			`dcval_ := range_limit^[ int(DESCALE(INT32(wsptr^[0]), PASS1_BITS+3))`
			`and RANGE_MASK];`

			`outptr^[0] := dcval_;`
			`outptr^[1] := dcval_;`

			`Inc(int_ptr(wsptr), DCTSIZE); { advance pointer to next row }`
			`continue;`
			`end;`
			`{$endif}`

			`{ Even part }`

			`tmp10 := (INT32 (wsptr^[0])) shl (CONST_BITS+2);`

			`{ Odd part }`

			`tmp0 := MULTIPLY( INT32(wsptr^[7]), - FIX_0_720959822) { sqrt(2) * (c7-c5+c3-c1) }`
			`+ MULTIPLY( INT32(wsptr^[5]), FIX_0_850430095) { sqrt(2) * (-c1+c3+c5+c7) }`
			`+ MULTIPLY( INT32(wsptr^[3]), - FIX_1_272758580) { sqrt(2) * (-c1+c3-c5-c7) }`
			`+ MULTIPLY( INT32(wsptr^[1]), FIX_3_624509785); { sqrt(2) * (c1+c3+c5+c7) }`

			`{ Final output stage }`

			`outptr^[0] := range_limit^[ int(DESCALE(tmp10 + tmp0,`
			`CONST_BITS+PASS1_BITS+3+2))`
			`and RANGE_MASK];`
			`outptr^[1] := range_limit^[ int(DESCALE(tmp10 - tmp0,`
			`CONST_BITS+PASS1_BITS+3+2))`
			`and RANGE_MASK];`

			`Inc(int_ptr(wsptr), DCTSIZE); { advance pointer to next row }`
			`end;`
			`end;`


			`{ Perform dequantization and inverse DCT on one block of coefficients,`
			`producing a reduced-size 1x1 output block. }`

			`{GLOBAL}`
			`procedure jpeg_idct_1x1 (cinfo : j_decompress_ptr;`
			`compptr : jpeg_component_info_ptr;`
			`coef_block : JCOEFPTR;`
			`output_buf : JSAMPARRAY;`
			`output_col : JDIMENSION);`
			`var`
			`dcval : int;`
			`quantptr : ISLOW_MULT_TYPE_FIELD_PTR;`
			`range_limit : JSAMPROW;`
			`{SHIFT_TEMPS}`
			`begin`
			`{ Each IDCT routine is responsible for range-limiting its results and`
			`converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could`
			`be quite far out of range if the input data is corrupt, so a bulletproof`
			`range-limiting step is required. We use a mask-and-table-lookup method`
			`to do the combined operations quickly. See the comments with`
			`prepare_range_limit_table (in jdmaster.c) for more info. }`

			`range_limit := JSAMPROW(@(cinfo^.sample_range_limit^[CENTERJSAMPLE]));`
			`{ Pass 1: process columns from input, store into work array. }`

			`{ We hardly need an inverse DCT routine for this: just take the`
			`average pixel value, which is one-eighth of the DC coefficient. }`

			`quantptr := ISLOW_MULT_TYPE_FIELD_PTR (compptr^.dct_table);`
			`dcval := (ISLOW_MULT_TYPE(coef_block^[0]) * quantptr^[0]);`
			`dcval := int (DESCALE( INT32(dcval), 3));`

			`output_buf^[0]^[output_col] := range_limit^[dcval and RANGE_MASK];`
			`end;`

			`end.`