unit imjidctred; { This file contains inverse-DCT routines that produce reduced-size output: either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block. The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M) algorithm used in jidctint.c. We simply replace each 8-to-8 1-D IDCT step with an 8-to-4 step that produces the four averages of two adjacent outputs (or an 8-to-2 step producing two averages of four outputs, for 2x2 output). These steps were derived by computing the corresponding values at the end of the normal LL&M code, then simplifying as much as possible. 1x1 is trivial: just take the DC coefficient divided by 8. See jidctint.c for additional comments. } { Original : jidctred.c ; Copyright (C) 1994-1998, Thomas G. Lane. } interface {$I imjconfig.inc} uses imjmorecfg, imjinclude, imjpeglib, imjdct; { Private declarations for DCT subsystem } { Perform dequantization and inverse DCT on one block of coefficients, producing a reduced-size 1x1 output block. } {GLOBAL} procedure jpeg_idct_1x1 (cinfo : j_decompress_ptr; compptr : jpeg_component_info_ptr; coef_block : JCOEFPTR; output_buf : JSAMPARRAY; output_col : JDIMENSION); { Perform dequantization and inverse DCT on one block of coefficients, producing a reduced-size 2x2 output block. } {GLOBAL} procedure jpeg_idct_2x2 (cinfo : j_decompress_ptr; compptr : jpeg_component_info_ptr; coef_block : JCOEFPTR; output_buf : JSAMPARRAY; output_col : JDIMENSION); { Perform dequantization and inverse DCT on one block of coefficients, producing a reduced-size 4x4 output block. } {GLOBAL} procedure jpeg_idct_4x4 (cinfo : j_decompress_ptr; compptr : jpeg_component_info_ptr; coef_block : JCOEFPTR; output_buf : JSAMPARRAY; output_col : JDIMENSION); implementation { This module is specialized to the case DCTSIZE = 8. } {$ifndef DCTSIZE_IS_8} Sorry, this code only copes with 8x8 DCTs. { deliberate syntax err } {$endif} { Scaling is the same as in jidctint.c. } {$ifdef BITS_IN_JSAMPLE_IS_8} const CONST_BITS = 13; PASS1_BITS = 2; {$else} const CONST_BITS = 13; PASS1_BITS = 1; { lose a little precision to avoid overflow } {$endif} const FIX_0_211164243 = INT32(Round((INT32(1) shl CONST_BITS) * 0.211164243)); {1730} FIX_0_509795579 = INT32(Round((INT32(1) shl CONST_BITS) * 0.509795579)); {4176} FIX_0_601344887 = INT32(Round((INT32(1) shl CONST_BITS) * 0.601344887)); {4926} FIX_0_720959822 = INT32(Round((INT32(1) shl CONST_BITS) * 0.720959822)); {5906} FIX_0_765366865 = INT32(Round((INT32(1) shl CONST_BITS) * 0.765366865)); {6270} FIX_0_850430095 = INT32(Round((INT32(1) shl CONST_BITS) * 0.850430095)); {6967} FIX_0_899976223 = INT32(Round((INT32(1) shl CONST_BITS) * 0.899976223)); {7373} FIX_1_061594337 = INT32(Round((INT32(1) shl CONST_BITS) * 1.061594337)); {8697} FIX_1_272758580 = INT32(Round((INT32(1) shl CONST_BITS) * 1.272758580)); {10426} FIX_1_451774981 = INT32(Round((INT32(1) shl CONST_BITS) * 1.451774981)); {11893} FIX_1_847759065 = INT32(Round((INT32(1) shl CONST_BITS) * 1.847759065)); {15137} FIX_2_172734803 = INT32(Round((INT32(1) shl CONST_BITS) * 2.172734803)); {17799} FIX_2_562915447 = INT32(Round((INT32(1) shl CONST_BITS) * 2.562915447)); {20995} FIX_3_624509785 = INT32(Round((INT32(1) shl CONST_BITS) * 3.624509785)); {29692} { Multiply an INT32 variable by an INT32 constant to yield an INT32 result. For 8-bit samples with the recommended scaling, all the variable and constant values involved are no more than 16 bits wide, so a 16x16->32 bit multiply can be used instead of a full 32x32 multiply. For 12-bit samples, a full 32-bit multiplication will be needed. } {$ifdef BITS_IN_JSAMPLE_IS_8} {function Multiply(X, Y: Integer): integer; assembler; asm mov ax, X imul Y mov al, ah mov ah, dl end;} {MULTIPLY16C16(var,const)} function Multiply(X, Y: Integer): INT32; begin Multiply := X*INT32(Y); end; {$else} function Multiply(X, Y: INT32): INT32; begin Multiply := X*Y; end; {$endif} { Dequantize a coefficient by multiplying it by the multiplier-table entry; produce an int result. In this module, both inputs and result are 16 bits or less, so either int or short multiply will work. } function DEQUANTIZE(coef,quantval : int) : int; begin Dequantize := ( ISLOW_MULT_TYPE(coef) * quantval); end; { Descale and correctly round an INT32 value that's scaled by N bits. We assume RIGHT_SHIFT rounds towards minus infinity, so adding the fudge factor is correct for either sign of X. } function DESCALE(x : INT32; n : int) : INT32; var shift_temp : INT32; begin {$ifdef RIGHT_SHIFT_IS_UNSIGNED} shift_temp := x + (INT32(1) shl (n-1)); if shift_temp < 0 then Descale := (shift_temp shr n) or ((not INT32(0)) shl (32-n)) else Descale := (shift_temp shr n); {$else} Descale := (x + (INT32(1) shl (n-1)) shr n; {$endif} end; { Perform dequantization and inverse DCT on one block of coefficients, producing a reduced-size 4x4 output block. } {GLOBAL} procedure jpeg_idct_4x4 (cinfo : j_decompress_ptr; compptr : jpeg_component_info_ptr; coef_block : JCOEFPTR; output_buf : JSAMPARRAY; output_col : JDIMENSION); type PWorkspace = ^TWorkspace; TWorkspace = array[0..(DCTSIZE*4)-1] of int; { buffers data between passes } var tmp0, tmp2, tmp10, tmp12 : INT32; z1, z2, z3, z4 : INT32; inptr : JCOEFPTR; quantptr : ISLOW_MULT_TYPE_FIELD_PTR; wsptr : PWorkspace; outptr : JSAMPROW; range_limit : JSAMPROW; ctr : int; workspace : TWorkspace; { buffers data between passes } {SHIFT_TEMPS} var dcval : int; var dcval_ : JSAMPLE; begin { Each IDCT routine is responsible for range-limiting its results and converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could be quite far out of range if the input data is corrupt, so a bulletproof range-limiting step is required. We use a mask-and-table-lookup method to do the combined operations quickly. See the comments with prepare_range_limit_table (in jdmaster.c) for more info. } range_limit := JSAMPROW(@(cinfo^.sample_range_limit^[CENTERJSAMPLE])); { Pass 1: process columns from input, store into work array. } inptr := coef_block; quantptr := ISLOW_MULT_TYPE_FIELD_PTR (compptr^.dct_table); wsptr := @workspace; for ctr := DCTSIZE downto 1 do begin { Don't bother to process column 4, because second pass won't use it } if (ctr = DCTSIZE-4) then begin Inc(JCOEF_PTR(inptr)); Inc(ISLOW_MULT_TYPE_PTR(quantptr)); Inc(int_ptr(wsptr)); continue; end; if (inptr^[DCTSIZE*1]=0) and (inptr^[DCTSIZE*2]=0) and (inptr^[DCTSIZE*3]=0) and (inptr^[DCTSIZE*5]=0) and (inptr^[DCTSIZE*6]=0) and (inptr^[DCTSIZE*7]=0) then begin { AC terms all zero; we need not examine term 4 for 4x4 output } dcval := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*0]) * quantptr^[DCTSIZE*0]) shl PASS1_BITS; wsptr^[DCTSIZE*0] := dcval; wsptr^[DCTSIZE*1] := dcval; wsptr^[DCTSIZE*2] := dcval; wsptr^[DCTSIZE*3] := dcval; Inc(JCOEF_PTR(inptr)); Inc(ISLOW_MULT_TYPE_PTR(quantptr)); Inc(int_ptr(wsptr)); continue; end; { Even part } tmp0 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*0]) * quantptr^[DCTSIZE*0]); tmp0 := tmp0 shl (CONST_BITS+1); z2 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*2]) * quantptr^[DCTSIZE*2]); z3 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*6]) * quantptr^[DCTSIZE*6]); tmp2 := MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865); tmp10 := tmp0 + tmp2; tmp12 := tmp0 - tmp2; { Odd part } z1 := ISLOW_MULT_TYPE(inptr^[DCTSIZE*7]) * quantptr^[DCTSIZE*7]; z2 := ISLOW_MULT_TYPE(inptr^[DCTSIZE*5]) * quantptr^[DCTSIZE*5]; z3 := ISLOW_MULT_TYPE(inptr^[DCTSIZE*3]) * quantptr^[DCTSIZE*3]; z4 := ISLOW_MULT_TYPE(inptr^[DCTSIZE*1]) * quantptr^[DCTSIZE*1]; tmp0 := MULTIPLY(z1, - FIX_0_211164243) { sqrt(2) * (c3-c1) } + MULTIPLY(z2, FIX_1_451774981) { sqrt(2) * (c3+c7) } + MULTIPLY(z3, - FIX_2_172734803) { sqrt(2) * (-c1-c5) } + MULTIPLY(z4, FIX_1_061594337); { sqrt(2) * (c5+c7) } tmp2 := MULTIPLY(z1, - FIX_0_509795579) { sqrt(2) * (c7-c5) } + MULTIPLY(z2, - FIX_0_601344887) { sqrt(2) * (c5-c1) } + MULTIPLY(z3, FIX_0_899976223) { sqrt(2) * (c3-c7) } + MULTIPLY(z4, FIX_2_562915447); { sqrt(2) * (c1+c3) } { Final output stage } wsptr^[DCTSIZE*0] := int(DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1)); wsptr^[DCTSIZE*3] := int(DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1)); wsptr^[DCTSIZE*1] := int(DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1)); wsptr^[DCTSIZE*2] := int(DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1)); Inc(JCOEF_PTR(inptr)); Inc(ISLOW_MULT_TYPE_PTR(quantptr)); Inc(int_ptr(wsptr)); end; { Pass 2: process 4 rows from work array, store into output array. } wsptr := @workspace; for ctr := 0 to pred(4) do begin outptr := JSAMPROW(@ output_buf^[ctr]^[output_col]); { It's not clear whether a zero row test is worthwhile here ... } {$ifndef NO_ZERO_ROW_TEST} if (wsptr^[1]=0) and (wsptr^[2]=0) and (wsptr^[3]=0) and (wsptr^[5]=0) and (wsptr^[6]=0) and (wsptr^[7]=0) then begin { AC terms all zero } dcval_ := range_limit^[int(DESCALE(INT32(wsptr^[0]), PASS1_BITS+3)) and RANGE_MASK]; outptr^[0] := dcval_; outptr^[1] := dcval_; outptr^[2] := dcval_; outptr^[3] := dcval_; Inc(int_ptr(wsptr), DCTSIZE); { advance pointer to next row } continue; end; {$endif} { Even part } tmp0 := (INT32(wsptr^[0])) shl (CONST_BITS+1); tmp2 := MULTIPLY(INT32(wsptr^[2]), FIX_1_847759065) + MULTIPLY(INT32(wsptr^[6]), - FIX_0_765366865); tmp10 := tmp0 + tmp2; tmp12 := tmp0 - tmp2; { Odd part } z1 := INT32(wsptr^[7]); z2 := INT32(wsptr^[5]); z3 := INT32(wsptr^[3]); z4 := INT32(wsptr^[1]); tmp0 := MULTIPLY(z1, - FIX_0_211164243) { sqrt(2) * (c3-c1) } + MULTIPLY(z2, FIX_1_451774981) { sqrt(2) * (c3+c7) } + MULTIPLY(z3, - FIX_2_172734803) { sqrt(2) * (-c1-c5) } + MULTIPLY(z4, FIX_1_061594337); { sqrt(2) * (c5+c7) } tmp2 := MULTIPLY(z1, - FIX_0_509795579) { sqrt(2) * (c7-c5) } + MULTIPLY(z2, - FIX_0_601344887) { sqrt(2) * (c5-c1) } + MULTIPLY(z3, FIX_0_899976223) { sqrt(2) * (c3-c7) } + MULTIPLY(z4, FIX_2_562915447); { sqrt(2) * (c1+c3) } { Final output stage } outptr^[0] := range_limit^[ int(DESCALE(tmp10 + tmp2, CONST_BITS+PASS1_BITS+3+1)) and RANGE_MASK]; outptr^[3] := range_limit^[ int(DESCALE(tmp10 - tmp2, CONST_BITS+PASS1_BITS+3+1)) and RANGE_MASK]; outptr^[1] := range_limit^[ int(DESCALE(tmp12 + tmp0, CONST_BITS+PASS1_BITS+3+1)) and RANGE_MASK]; outptr^[2] := range_limit^[ int(DESCALE(tmp12 - tmp0, CONST_BITS+PASS1_BITS+3+1)) and RANGE_MASK]; Inc(int_ptr(wsptr), DCTSIZE); { advance pointer to next row } end; end; { Perform dequantization and inverse DCT on one block of coefficients, producing a reduced-size 2x2 output block. } {GLOBAL} procedure jpeg_idct_2x2 (cinfo : j_decompress_ptr; compptr : jpeg_component_info_ptr; coef_block : JCOEFPTR; output_buf : JSAMPARRAY; output_col : JDIMENSION); type PWorkspace = ^TWorkspace; TWorkspace = array[0..(DCTSIZE*2)-1] of int; { buffers data between passes } var tmp0, tmp10, z1 : INT32; inptr : JCOEFPTR; quantptr : ISLOW_MULT_TYPE_FIELD_PTR; wsptr : PWorkspace; outptr : JSAMPROW; range_limit : JSAMPROW; ctr : int; workspace : TWorkspace; { buffers data between passes } {SHIFT_TEMPS} var dcval : int; var dcval_ : JSAMPLE; begin { Each IDCT routine is responsible for range-limiting its results and converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could be quite far out of range if the input data is corrupt, so a bulletproof range-limiting step is required. We use a mask-and-table-lookup method to do the combined operations quickly. See the comments with prepare_range_limit_table (in jdmaster.c) for more info. } range_limit := JSAMPROW(@(cinfo^.sample_range_limit^[CENTERJSAMPLE])); { Pass 1: process columns from input, store into work array. } inptr := coef_block; quantptr := ISLOW_MULT_TYPE_FIELD_PTR (compptr^.dct_table); wsptr := @workspace; for ctr := DCTSIZE downto 1 do begin { Don't bother to process columns 2,4,6 } if (ctr = DCTSIZE-2) or (ctr = DCTSIZE-4) or (ctr = DCTSIZE-6) then begin Inc(JCOEF_PTR(inptr)); Inc(ISLOW_MULT_TYPE_PTR(quantptr)); Inc(int_ptr(wsptr)); continue; end; if (inptr^[DCTSIZE*1]=0) and (inptr^[DCTSIZE*3]=0) and (inptr^[DCTSIZE*5]=0) and (inptr^[DCTSIZE*7]=0) then begin { AC terms all zero; we need not examine terms 2,4,6 for 2x2 output } dcval := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*0]) * quantptr^[DCTSIZE*0]) shl PASS1_BITS; wsptr^[DCTSIZE*0] := dcval; wsptr^[DCTSIZE*1] := dcval; Inc(JCOEF_PTR(inptr)); Inc(ISLOW_MULT_TYPE_PTR(quantptr)); Inc(int_ptr(wsptr)); continue; end; { Even part } z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*0]) * quantptr^[DCTSIZE*0]); tmp10 := z1 shl (CONST_BITS+2); { Odd part } z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*7]) * quantptr^[DCTSIZE*7]); tmp0 := MULTIPLY(z1, - FIX_0_720959822); { sqrt(2) * (c7-c5+c3-c1) } z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*5]) * quantptr^[DCTSIZE*5]); Inc(tmp0, MULTIPLY(z1, FIX_0_850430095)); { sqrt(2) * (-c1+c3+c5+c7) } z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*3]) * quantptr^[DCTSIZE*3]); Inc(tmp0, MULTIPLY(z1, - FIX_1_272758580)); { sqrt(2) * (-c1+c3-c5-c7) } z1 := (ISLOW_MULT_TYPE(inptr^[DCTSIZE*1]) * quantptr^[DCTSIZE*1]); Inc(tmp0, MULTIPLY(z1, FIX_3_624509785)); { sqrt(2) * (c1+c3+c5+c7) } { Final output stage } wsptr^[DCTSIZE*0] := int (DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2)); wsptr^[DCTSIZE*1] := int (DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2)); Inc(JCOEF_PTR(inptr)); Inc(ISLOW_MULT_TYPE_PTR(quantptr)); Inc(int_ptr(wsptr)); end; { Pass 2: process 2 rows from work array, store into output array. } wsptr := @workspace; for ctr := 0 to pred(2) do begin outptr := JSAMPROW(@ output_buf^[ctr]^[output_col]); { It's not clear whether a zero row test is worthwhile here ... } {$ifndef NO_ZERO_ROW_TEST} if (wsptr^[1]=0) and (wsptr^[3]=0) and (wsptr^[5]=0) and (wsptr^[7]= 0) then begin { AC terms all zero } dcval_ := range_limit^[ int(DESCALE(INT32(wsptr^[0]), PASS1_BITS+3)) and RANGE_MASK]; outptr^[0] := dcval_; outptr^[1] := dcval_; Inc(int_ptr(wsptr), DCTSIZE); { advance pointer to next row } continue; end; {$endif} { Even part } tmp10 := (INT32 (wsptr^[0])) shl (CONST_BITS+2); { Odd part } tmp0 := MULTIPLY( INT32(wsptr^[7]), - FIX_0_720959822) { sqrt(2) * (c7-c5+c3-c1) } + MULTIPLY( INT32(wsptr^[5]), FIX_0_850430095) { sqrt(2) * (-c1+c3+c5+c7) } + MULTIPLY( INT32(wsptr^[3]), - FIX_1_272758580) { sqrt(2) * (-c1+c3-c5-c7) } + MULTIPLY( INT32(wsptr^[1]), FIX_3_624509785); { sqrt(2) * (c1+c3+c5+c7) } { Final output stage } outptr^[0] := range_limit^[ int(DESCALE(tmp10 + tmp0, CONST_BITS+PASS1_BITS+3+2)) and RANGE_MASK]; outptr^[1] := range_limit^[ int(DESCALE(tmp10 - tmp0, CONST_BITS+PASS1_BITS+3+2)) and RANGE_MASK]; Inc(int_ptr(wsptr), DCTSIZE); { advance pointer to next row } end; end; { Perform dequantization and inverse DCT on one block of coefficients, producing a reduced-size 1x1 output block. } {GLOBAL} procedure jpeg_idct_1x1 (cinfo : j_decompress_ptr; compptr : jpeg_component_info_ptr; coef_block : JCOEFPTR; output_buf : JSAMPARRAY; output_col : JDIMENSION); var dcval : int; quantptr : ISLOW_MULT_TYPE_FIELD_PTR; range_limit : JSAMPROW; {SHIFT_TEMPS} begin { Each IDCT routine is responsible for range-limiting its results and converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could be quite far out of range if the input data is corrupt, so a bulletproof range-limiting step is required. We use a mask-and-table-lookup method to do the combined operations quickly. See the comments with prepare_range_limit_table (in jdmaster.c) for more info. } range_limit := JSAMPROW(@(cinfo^.sample_range_limit^[CENTERJSAMPLE])); { Pass 1: process columns from input, store into work array. } { We hardly need an inverse DCT routine for this: just take the average pixel value, which is one-eighth of the DC coefficient. } quantptr := ISLOW_MULT_TYPE_FIELD_PTR (compptr^.dct_table); dcval := (ISLOW_MULT_TYPE(coef_block^[0]) * quantptr^[0]); dcval := int (DESCALE( INT32(dcval), 3)); output_buf^[0]^[output_col] := range_limit^[dcval and RANGE_MASK]; end; end.