Path: blob/master/src/java.desktop/share/native/libjavajpeg/jidctflt.c
41152 views
/*1* reserved comment block2* DO NOT REMOVE OR ALTER!3*/4/*5* jidctflt.c6*7* Copyright (C) 1994-1998, Thomas G. Lane.8* This file is part of the Independent JPEG Group's software.9* For conditions of distribution and use, see the accompanying README file.10*11* This file contains a floating-point implementation of the12* inverse DCT (Discrete Cosine Transform). In the IJG code, this routine13* must also perform dequantization of the input coefficients.14*15* This implementation should be more accurate than either of the integer16* IDCT implementations. However, it may not give the same results on all17* machines because of differences in roundoff behavior. Speed will depend18* on the hardware's floating point capacity.19*20* A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT21* on each row (or vice versa, but it's more convenient to emit a row at22* a time). Direct algorithms are also available, but they are much more23* complex and seem not to be any faster when reduced to code.24*25* This implementation is based on Arai, Agui, and Nakajima's algorithm for26* scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in27* Japanese, but the algorithm is described in the Pennebaker & Mitchell28* JPEG textbook (see REFERENCES section in file README). The following code29* is based directly on figure 4-8 in P&M.30* While an 8-point DCT cannot be done in less than 11 multiplies, it is31* possible to arrange the computation so that many of the multiplies are32* simple scalings of the final outputs. These multiplies can then be33* folded into the multiplications or divisions by the JPEG quantization34* table entries. The AA&N method leaves only 5 multiplies and 29 adds35* to be done in the DCT itself.36* The primary disadvantage of this method is that with a fixed-point37* implementation, accuracy is lost due to imprecise representation of the38* scaled quantization values. However, that problem does not arise if39* we use floating point arithmetic.40*/4142#define JPEG_INTERNALS43#include "jinclude.h"44#include "jpeglib.h"45#include "jdct.h" /* Private declarations for DCT subsystem */4647#ifdef DCT_FLOAT_SUPPORTED484950/*51* This module is specialized to the case DCTSIZE = 8.52*/5354#if DCTSIZE != 855Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */56#endif575859/* Dequantize a coefficient by multiplying it by the multiplier-table60* entry; produce a float result.61*/6263#define DEQUANTIZE(coef,quantval) (((FAST_FLOAT) (coef)) * (quantval))646566/*67* Perform dequantization and inverse DCT on one block of coefficients.68*/6970GLOBAL(void)71jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,72JCOEFPTR coef_block,73JSAMPARRAY output_buf, JDIMENSION output_col)74{75FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;76FAST_FLOAT tmp10, tmp11, tmp12, tmp13;77FAST_FLOAT z5, z10, z11, z12, z13;78JCOEFPTR inptr;79FLOAT_MULT_TYPE * quantptr;80FAST_FLOAT * wsptr;81JSAMPROW outptr;82JSAMPLE *range_limit = IDCT_range_limit(cinfo);83int ctr;84FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */85SHIFT_TEMPS8687/* Pass 1: process columns from input, store into work array. */8889inptr = coef_block;90quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;91wsptr = workspace;92for (ctr = DCTSIZE; ctr > 0; ctr--) {93/* Due to quantization, we will usually find that many of the input94* coefficients are zero, especially the AC terms. We can exploit this95* by short-circuiting the IDCT calculation for any column in which all96* the AC terms are zero. In that case each output is equal to the97* DC coefficient (with scale factor as needed).98* With typical images and quantization tables, half or more of the99* column DCT calculations can be simplified this way.100*/101102if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&103inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&104inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&105inptr[DCTSIZE*7] == 0) {106/* AC terms all zero */107FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);108109wsptr[DCTSIZE*0] = dcval;110wsptr[DCTSIZE*1] = dcval;111wsptr[DCTSIZE*2] = dcval;112wsptr[DCTSIZE*3] = dcval;113wsptr[DCTSIZE*4] = dcval;114wsptr[DCTSIZE*5] = dcval;115wsptr[DCTSIZE*6] = dcval;116wsptr[DCTSIZE*7] = dcval;117118inptr++; /* advance pointers to next column */119quantptr++;120wsptr++;121continue;122}123124/* Even part */125126tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);127tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);128tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);129tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);130131tmp10 = tmp0 + tmp2; /* phase 3 */132tmp11 = tmp0 - tmp2;133134tmp13 = tmp1 + tmp3; /* phases 5-3 */135tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */136137tmp0 = tmp10 + tmp13; /* phase 2 */138tmp3 = tmp10 - tmp13;139tmp1 = tmp11 + tmp12;140tmp2 = tmp11 - tmp12;141142/* Odd part */143144tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);145tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);146tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);147tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);148149z13 = tmp6 + tmp5; /* phase 6 */150z10 = tmp6 - tmp5;151z11 = tmp4 + tmp7;152z12 = tmp4 - tmp7;153154tmp7 = z11 + z13; /* phase 5 */155tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */156157z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */158tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */159tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */160161tmp6 = tmp12 - tmp7; /* phase 2 */162tmp5 = tmp11 - tmp6;163tmp4 = tmp10 + tmp5;164165wsptr[DCTSIZE*0] = tmp0 + tmp7;166wsptr[DCTSIZE*7] = tmp0 - tmp7;167wsptr[DCTSIZE*1] = tmp1 + tmp6;168wsptr[DCTSIZE*6] = tmp1 - tmp6;169wsptr[DCTSIZE*2] = tmp2 + tmp5;170wsptr[DCTSIZE*5] = tmp2 - tmp5;171wsptr[DCTSIZE*4] = tmp3 + tmp4;172wsptr[DCTSIZE*3] = tmp3 - tmp4;173174inptr++; /* advance pointers to next column */175quantptr++;176wsptr++;177}178179/* Pass 2: process rows from work array, store into output array. */180/* Note that we must descale the results by a factor of 8 == 2**3. */181182wsptr = workspace;183for (ctr = 0; ctr < DCTSIZE; ctr++) {184outptr = output_buf[ctr] + output_col;185/* Rows of zeroes can be exploited in the same way as we did with columns.186* However, the column calculation has created many nonzero AC terms, so187* the simplification applies less often (typically 5% to 10% of the time).188* And testing floats for zero is relatively expensive, so we don't bother.189*/190191/* Even part */192193tmp10 = wsptr[0] + wsptr[4];194tmp11 = wsptr[0] - wsptr[4];195196tmp13 = wsptr[2] + wsptr[6];197tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13;198199tmp0 = tmp10 + tmp13;200tmp3 = tmp10 - tmp13;201tmp1 = tmp11 + tmp12;202tmp2 = tmp11 - tmp12;203204/* Odd part */205206z13 = wsptr[5] + wsptr[3];207z10 = wsptr[5] - wsptr[3];208z11 = wsptr[1] + wsptr[7];209z12 = wsptr[1] - wsptr[7];210211tmp7 = z11 + z13;212tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562);213214z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */215tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */216tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */217218tmp6 = tmp12 - tmp7;219tmp5 = tmp11 - tmp6;220tmp4 = tmp10 + tmp5;221222/* Final output stage: scale down by a factor of 8 and range-limit */223224outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3)225& RANGE_MASK];226outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3)227& RANGE_MASK];228outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3)229& RANGE_MASK];230outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3)231& RANGE_MASK];232outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3)233& RANGE_MASK];234outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3)235& RANGE_MASK];236outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3)237& RANGE_MASK];238outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3)239& RANGE_MASK];240241wsptr += DCTSIZE; /* advance pointer to next row */242}243}244245#endif /* DCT_FLOAT_SUPPORTED */246247248