Path: blob/master/src/java.desktop/share/native/libmlib_image/mlib_ImageConvMxN_Fp.c
41149 views
/*1* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/242526/*27* FUNCTION28* mlib_ImageConvMxN_Fp - image convolution with edge condition29*30* SYNOPSIS31* mlib_status mlib_ImageConvMxN_Fp(mlib_image *dst,32* const mlib_image *src,33* const mlib_d64 *kernel,34* mlib_s32 m,35* mlib_s32 n,36* mlib_s32 dm,37* mlib_s32 dn,38* mlib_s32 cmask,39* mlib_edge edge)40*41* ARGUMENTS42* dst Pointer to destination image.43* src Pointer to source image.44* m Kernel width (m must be not less than 1).45* n Kernel height (n must be not less than 1).46* dm, dn Position of key element in convolution kernel.47* kernel Pointer to convolution kernel.48* cmask Channel mask to indicate the channels to be convolved.49* Each bit of which represents a channel in the image. The50* channels corresponded to 1 bits are those to be processed.51* edge Type of edge condition.52*53* DESCRIPTION54* 2-D convolution, MxN kernel.55*56* The center of the source image is mapped to the center of the57* destination image.58* The unselected channels are not overwritten. If both src and dst have59* just one channel, cmask is ignored.60*61* The edge condition can be one of the following:62* MLIB_EDGE_DST_NO_WRITE (default)63* MLIB_EDGE_DST_FILL_ZERO64* MLIB_EDGE_DST_COPY_SRC65* MLIB_EDGE_SRC_EXTEND66*67* RESTRICTION68* The src and the dst must be the same type and have same number69* of channels (1, 2, 3, or 4).70* m >= 1, n >= 1,71* 0 <= dm < m, 0 <= dn < n.72*/7374#include "mlib_image.h"75#include "mlib_ImageCheck.h"76#include "mlib_SysMath.h"77#include "mlib_ImageConv.h"7879/***************************************************************/80static void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst,81const mlib_f32 *src,82const mlib_d64 *kernel,83mlib_s32 n,84mlib_s32 m,85mlib_s32 nch,86mlib_s32 dnch);8788static void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst,89const mlib_f32 *src,90mlib_s32 n,91mlib_s32 nch,92mlib_s32 dx_l,93mlib_s32 dx_r);9495static void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst,96const mlib_d64 *src,97const mlib_d64 *kernel,98mlib_s32 n,99mlib_s32 m,100mlib_s32 nch,101mlib_s32 dnch);102103static void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst,104const mlib_d64 *src,105mlib_s32 n,106mlib_s32 nch,107mlib_s32 dx_l,108mlib_s32 dx_r);109110/***************************************************************/111#if 0112static void mlib_ImageConvMxNMulAdd2_F32(mlib_f32 *hdst,113mlib_f32 *vdst,114const mlib_f32 *src,115const mlib_d64 *hfilter,116const mlib_d64 *vfilter,117mlib_s32 n,118mlib_s32 m,119mlib_s32 nch,120mlib_s32 dnch);121122static void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst,123mlib_d64 *vdst,124const mlib_d64 *src,125const mlib_d64 *hfilter,126const mlib_d64 *vfilter,127mlib_s32 n,128mlib_s32 m,129mlib_s32 nch,130mlib_s32 dnch);131#endif /* 0 */132133/***************************************************************/134mlib_status mlib_ImageConvMxN_Fp(mlib_image *dst,135const mlib_image *src,136const mlib_d64 *kernel,137mlib_s32 m,138mlib_s32 n,139mlib_s32 dm,140mlib_s32 dn,141mlib_s32 cmask,142mlib_edge edge)143{144mlib_type type;145146MLIB_IMAGE_CHECK(dst);147type = mlib_ImageGetType(dst);148149if (type != MLIB_FLOAT && type != MLIB_DOUBLE)150return MLIB_FAILURE;151152return mlib_ImageConvMxN_f(dst, src, kernel, m, n, dm, dn, 0, cmask, edge);153}154155/***************************************************************/156void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst,157const mlib_f32 *src,158const mlib_d64 *kernel,159mlib_s32 n,160mlib_s32 m,161mlib_s32 nch,162mlib_s32 dnch)163{164mlib_f32 *hdst1 = dst + dnch;165mlib_s32 i, j;166167for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {168const mlib_f32 *src2 = src + 2 * nch;169mlib_f32 hval0 = (mlib_f32) kernel[0];170mlib_f32 hval1 = (mlib_f32) kernel[1];171mlib_f32 hval2 = (mlib_f32) kernel[2];172mlib_f32 val0 = src[0];173mlib_f32 val1 = src[nch];174mlib_f32 hdvl = dst[0];175176for (i = 0; i < n; i++) {177mlib_f32 hdvl0 = val0 * hval0 + hdvl;178mlib_f32 val2 = src2[i * nch];179180hdvl = hdst1[i * dnch];181hdvl0 += val1 * hval1;182hdvl0 += val2 * hval2;183val0 = val1;184val1 = val2;185186dst[i * dnch] = hdvl0;187}188}189190if (j < m - 1) {191const mlib_f32 *src2 = src + 2 * nch;192mlib_f32 hval0 = (mlib_f32) kernel[0];193mlib_f32 hval1 = (mlib_f32) kernel[1];194mlib_f32 val0 = src[0];195mlib_f32 val1 = src[nch];196mlib_f32 hdvl = dst[0];197for (i = 0; i < n; i++) {198mlib_f32 hdvl0 = val0 * hval0 + hdvl;199mlib_f32 val2 = src2[i * nch];200201hdvl = hdst1[i * dnch];202hdvl0 += val1 * hval1;203val0 = val1;204val1 = val2;205206dst[i * dnch] = hdvl0;207}208209}210else if (j < m) {211const mlib_f32 *src2 = src + 2 * nch;212mlib_f32 hval0 = (mlib_f32) kernel[0];213mlib_f32 val0 = src[0];214mlib_f32 val1 = src[nch];215mlib_f32 hdvl = dst[0];216217for (i = 0; i < n; i++) {218mlib_f32 hdvl0 = val0 * hval0 + hdvl;219mlib_f32 val2 = src2[i * nch];220221hdvl = hdst1[i * dnch];222val0 = val1;223val1 = val2;224225dst[i * dnch] = hdvl0;226}227}228}229230/***************************************************************/231void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst,232const mlib_f32 *src,233mlib_s32 n,234mlib_s32 nch,235mlib_s32 dx_l,236mlib_s32 dx_r)237{238mlib_s32 i;239mlib_f32 val = src[0];240241for (i = 0; i < dx_l; i++)242dst[i] = val;243for (; i < n - dx_r; i++)244dst[i] = src[nch * (i - dx_l)];245val = dst[n - dx_r - 1];246for (; i < n; i++)247dst[i] = val;248}249250/***************************************************************/251mlib_status mlib_convMxNext_f32(mlib_image *dst,252const mlib_image *src,253const mlib_d64 *kernel,254mlib_s32 m,255mlib_s32 n,256mlib_s32 dx_l,257mlib_s32 dx_r,258mlib_s32 dy_t,259mlib_s32 dy_b,260mlib_s32 cmask)261{262mlib_d64 dspace[1024], *dsa = dspace;263mlib_s32 wid_e = mlib_ImageGetWidth(src);264mlib_f32 *fsa;265mlib_f32 *da = mlib_ImageGetData(dst);266mlib_f32 *sa = mlib_ImageGetData(src);267mlib_s32 dlb = mlib_ImageGetStride(dst) >> 2;268mlib_s32 slb = mlib_ImageGetStride(src) >> 2;269mlib_s32 dw = mlib_ImageGetWidth(dst);270mlib_s32 dh = mlib_ImageGetHeight(dst);271mlib_s32 nch = mlib_ImageGetChannels(dst);272mlib_s32 i, j, j1, k;273274if (3 * wid_e + m > 1024) {275dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64));276277if (dsa == NULL)278return MLIB_FAILURE;279}280281fsa = (mlib_f32 *) dsa;282283for (j = 0; j < dh; j++, da += dlb) {284for (k = 0; k < nch; k++)285if (cmask & (1 << (nch - 1 - k))) {286const mlib_f32 *sa1 = sa + k;287mlib_f32 *da1 = da + k;288const mlib_d64 *kernel1 = kernel;289290for (i = 0; i < dw; i++)291da1[i * nch] = 0.f;292for (j1 = 0; j1 < n; j1++, kernel1 += m) {293mlib_ImageConvMxNF322F32_ext(fsa, sa1, dw + m - 1, nch, dx_l, dx_r);294mlib_ImageConvMxNMulAdd_F32(da1, fsa, kernel1, dw, m, 1, nch);295296if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2))297sa1 += slb;298}299}300301if ((j >= dy_t) && (j < dh + n - dy_b - 2))302sa += slb;303}304305if (dsa != dspace)306mlib_free(dsa);307return MLIB_SUCCESS;308}309310/***************************************************************/311#if 0312313void mlib_ImageConvMxNMulAdd2_F32(mlib_f32 *hdst,314mlib_f32 *vdst,315const mlib_f32 *src,316const mlib_d64 *hfilter,317const mlib_d64 *vfilter,318mlib_s32 n,319mlib_s32 m,320mlib_s32 nch,321mlib_s32 dnch)322{323mlib_f32 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;324mlib_s32 i, j;325326for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {327mlib_f32 *src2 = src + 2 * nch;328mlib_f32 hval0 = (mlib_f32) hfilter[0];329mlib_f32 vval0 = (mlib_f32) vfilter[0];330mlib_f32 hval1 = (mlib_f32) hfilter[1];331mlib_f32 vval1 = (mlib_f32) vfilter[1];332mlib_f32 hval2 = (mlib_f32) hfilter[2];333mlib_f32 vval2 = (mlib_f32) vfilter[2];334mlib_f32 val0 = src[0];335mlib_f32 val1 = src[nch];336mlib_f32 hdvl = hdst[0];337mlib_f32 vdvl = vdst[0];338339for (i = 0; i < n; i++) {340mlib_f32 hdvl0 = val0 * hval0 + hdvl;341mlib_f32 vdvl0 = val0 * vval0 + vdvl;342mlib_f32 val2 = src2[i * nch];343344hdvl = hdst1[i * dnch];345vdvl = vdst1[i * dnch];346hdvl0 += val1 * hval1;347vdvl0 += val1 * vval1;348hdvl0 += val2 * hval2;349vdvl0 += val2 * vval2;350val0 = val1;351val1 = val2;352353hdst[i * dnch] = hdvl0;354vdst[i * dnch] = vdvl0;355}356}357358if (j < m - 1) {359mlib_f32 *src2 = src + 2 * nch;360mlib_f32 hval0 = (mlib_f32) hfilter[0];361mlib_f32 vval0 = (mlib_f32) vfilter[0];362mlib_f32 hval1 = (mlib_f32) hfilter[1];363mlib_f32 vval1 = (mlib_f32) vfilter[1];364mlib_f32 val0 = src[0];365mlib_f32 val1 = src[nch];366mlib_f32 hdvl = hdst[0];367mlib_f32 vdvl = vdst[0];368369for (i = 0; i < n; i++) {370mlib_f32 hdvl0 = val0 * hval0 + hdvl;371mlib_f32 vdvl0 = val0 * vval0 + vdvl;372mlib_f32 val2 = src2[i * nch];373374hdvl = hdst1[i * dnch];375vdvl = vdst1[i * dnch];376hdvl0 += val1 * hval1;377vdvl0 += val1 * vval1;378val0 = val1;379val1 = val2;380381hdst[i * dnch] = hdvl0;382vdst[i * dnch] = vdvl0;383}384385}386else if (j < m) {387mlib_f32 *src2 = src + 2 * nch;388mlib_f32 hval0 = (mlib_f32) hfilter[0];389mlib_f32 vval0 = (mlib_f32) vfilter[0];390mlib_f32 val0 = src[0];391mlib_f32 val1 = src[nch];392mlib_f32 hdvl = hdst[0];393mlib_f32 vdvl = vdst[0];394395for (i = 0; i < n; i++) {396mlib_f32 hdvl0 = val0 * hval0 + hdvl;397mlib_f32 vdvl0 = val0 * vval0 + vdvl;398mlib_f32 val2 = src2[i * nch];399400hdvl = hdst1[i * dnch];401vdvl = vdst1[i * dnch];402val0 = val1;403val1 = val2;404405hdst[i * dnch] = hdvl0;406vdst[i * dnch] = vdvl0;407}408}409}410411/***************************************************************/412void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst,413mlib_d64 *vdst,414const mlib_d64 *src,415const mlib_d64 *hfilter,416const mlib_d64 *vfilter,417mlib_s32 n,418mlib_s32 m,419mlib_s32 nch,420mlib_s32 dnch)421{422mlib_d64 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;423mlib_s32 i, j;424425for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {426mlib_d64 *src2 = src + 2 * nch;427mlib_d64 hval0 = hfilter[0];428mlib_d64 vval0 = vfilter[0];429mlib_d64 hval1 = hfilter[1];430mlib_d64 vval1 = vfilter[1];431mlib_d64 hval2 = hfilter[2];432mlib_d64 vval2 = vfilter[2];433mlib_d64 val0 = src[0];434mlib_d64 val1 = src[nch];435mlib_d64 hdvl = hdst[0];436mlib_d64 vdvl = vdst[0];437438for (i = 0; i < n; i++) {439mlib_d64 hdvl0 = val0 * hval0 + hdvl;440mlib_d64 vdvl0 = val0 * vval0 + vdvl;441mlib_d64 val2 = src2[i * nch];442443hdvl = hdst1[i * dnch];444vdvl = vdst1[i * dnch];445hdvl0 += val1 * hval1;446vdvl0 += val1 * vval1;447hdvl0 += val2 * hval2;448vdvl0 += val2 * vval2;449val0 = val1;450val1 = val2;451452hdst[i * dnch] = hdvl0;453vdst[i * dnch] = vdvl0;454}455}456457if (j < m - 1) {458mlib_d64 *src2 = src + 2 * nch;459mlib_d64 hval0 = hfilter[0];460mlib_d64 vval0 = vfilter[0];461mlib_d64 hval1 = hfilter[1];462mlib_d64 vval1 = vfilter[1];463mlib_d64 val0 = src[0];464mlib_d64 val1 = src[nch];465mlib_d64 hdvl = hdst[0];466mlib_d64 vdvl = vdst[0];467468for (i = 0; i < n; i++) {469mlib_d64 hdvl0 = val0 * hval0 + hdvl;470mlib_d64 vdvl0 = val0 * vval0 + vdvl;471mlib_d64 val2 = src2[i * nch];472473hdvl = hdst1[i * dnch];474vdvl = vdst1[i * dnch];475hdvl0 += val1 * hval1;476vdvl0 += val1 * vval1;477val0 = val1;478val1 = val2;479480hdst[i * dnch] = hdvl0;481vdst[i * dnch] = vdvl0;482}483484}485else if (j < m) {486mlib_d64 *src2 = src + 2 * nch;487mlib_d64 hval0 = hfilter[0];488mlib_d64 vval0 = vfilter[0];489mlib_d64 val0 = src[0];490mlib_d64 val1 = src[nch];491mlib_d64 hdvl = hdst[0];492mlib_d64 vdvl = vdst[0];493494for (i = 0; i < n; i++) {495mlib_d64 hdvl0 = val0 * hval0 + hdvl;496mlib_d64 vdvl0 = val0 * vval0 + vdvl;497mlib_d64 val2 = src2[i * nch];498499hdvl = hdst1[i * dnch];500vdvl = vdst1[i * dnch];501val0 = val1;502val1 = val2;503504hdst[i * dnch] = hdvl0;505vdst[i * dnch] = vdvl0;506}507}508}509510#endif /* 0 */511512/***************************************************************/513void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst,514const mlib_d64 *src,515const mlib_d64 *kernel,516mlib_s32 n,517mlib_s32 m,518mlib_s32 nch,519mlib_s32 dnch)520{521mlib_d64 *hdst1 = dst + dnch;522mlib_s32 i, j;523524for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {525const mlib_d64 *src2 = src + 2 * nch;526mlib_d64 hval0 = kernel[0];527mlib_d64 hval1 = kernel[1];528mlib_d64 hval2 = kernel[2];529mlib_d64 val0 = src[0];530mlib_d64 val1 = src[nch];531mlib_d64 hdvl = dst[0];532533for (i = 0; i < n; i++) {534mlib_d64 hdvl0 = val0 * hval0 + hdvl;535mlib_d64 val2 = src2[i * nch];536537hdvl = hdst1[i * dnch];538hdvl0 += val1 * hval1;539hdvl0 += val2 * hval2;540val0 = val1;541val1 = val2;542543dst[i * dnch] = hdvl0;544}545}546547if (j < m - 1) {548const mlib_d64 *src2 = src + 2 * nch;549mlib_d64 hval0 = kernel[0];550mlib_d64 hval1 = kernel[1];551mlib_d64 val0 = src[0];552mlib_d64 val1 = src[nch];553mlib_d64 hdvl = dst[0];554555for (i = 0; i < n; i++) {556mlib_d64 hdvl0 = val0 * hval0 + hdvl;557mlib_d64 val2 = src2[i * nch];558559hdvl = hdst1[i * dnch];560hdvl0 += val1 * hval1;561val0 = val1;562val1 = val2;563564dst[i * dnch] = hdvl0;565}566567}568else if (j < m) {569const mlib_d64 *src2 = src + 2 * nch;570mlib_d64 hval0 = kernel[0];571mlib_d64 val0 = src[0];572mlib_d64 val1 = src[nch];573mlib_d64 hdvl = dst[0];574575for (i = 0; i < n; i++) {576mlib_d64 hdvl0 = val0 * hval0 + hdvl;577mlib_d64 val2 = src2[i * nch];578579hdvl = hdst1[i * dnch];580val0 = val1;581val1 = val2;582583dst[i * dnch] = hdvl0;584}585}586}587588/***************************************************************/589void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst,590const mlib_d64 *src,591mlib_s32 n,592mlib_s32 nch,593mlib_s32 dx_l,594mlib_s32 dx_r)595{596mlib_s32 i;597mlib_d64 val = src[0];598599for (i = 0; i < dx_l; i++)600dst[i] = val;601for (; i < n - dx_r; i++)602dst[i] = src[nch * (i - dx_l)];603val = dst[n - dx_r - 1];604for (; i < n; i++)605dst[i] = val;606}607608/***************************************************************/609mlib_status mlib_convMxNext_d64(mlib_image *dst,610const mlib_image *src,611const mlib_d64 *kernel,612mlib_s32 m,613mlib_s32 n,614mlib_s32 dx_l,615mlib_s32 dx_r,616mlib_s32 dy_t,617mlib_s32 dy_b,618mlib_s32 cmask)619{620mlib_d64 dspace[1024], *dsa = dspace;621mlib_s32 wid_e = mlib_ImageGetWidth(src);622mlib_d64 *da = mlib_ImageGetData(dst);623mlib_d64 *sa = mlib_ImageGetData(src);624mlib_s32 dlb = mlib_ImageGetStride(dst) >> 3;625mlib_s32 slb = mlib_ImageGetStride(src) >> 3;626mlib_s32 dw = mlib_ImageGetWidth(dst);627mlib_s32 dh = mlib_ImageGetHeight(dst);628mlib_s32 nch = mlib_ImageGetChannels(dst);629mlib_s32 i, j, j1, k;630631if (3 * wid_e + m > 1024) {632dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64));633634if (dsa == NULL)635return MLIB_FAILURE;636}637638for (j = 0; j < dh; j++, da += dlb) {639for (k = 0; k < nch; k++)640if (cmask & (1 << (nch - 1 - k))) {641mlib_d64 *sa1 = sa + k;642mlib_d64 *da1 = da + k;643const mlib_d64 *kernel1 = kernel;644645for (i = 0; i < dw; i++)646da1[i * nch] = 0.;647for (j1 = 0; j1 < n; j1++, kernel1 += m) {648mlib_ImageConvMxND642D64_ext(dsa, sa1, dw + m - 1, nch, dx_l, dx_r);649mlib_ImageConvMxNMulAdd_D64(da1, dsa, kernel1, dw, m, 1, nch);650651if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2))652sa1 += slb;653}654}655656if ((j >= dy_t) && (j < dh + n - dy_b - 2))657sa += slb;658}659660if (dsa != dspace)661mlib_free(dsa);662return MLIB_SUCCESS;663}664665/***************************************************************/666667668