Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8nw.c
41152 views
1
/*
2
* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
27
/*
28
* FUNCTION
29
* Internal functions for mlib_ImageConv* on U8/S16/U16 types and
30
* MLIB_EDGE_DST_NO_WRITE mask
31
*/
32
33
#include "mlib_image.h"
34
#include "mlib_ImageConv.h"
35
#include "mlib_c_ImageConv.h"
36
37
/*
38
This define switches between functions of different data types
39
*/
40
#define IMG_TYPE 1
41
42
/***************************************************************/
43
#if IMG_TYPE == 1
44
45
#define DTYPE mlib_u8
46
#define CONV_FUNC(KERN) mlib_c_conv##KERN##nw_u8
47
#define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_u8
48
#define DSCALE (1 << 24)
49
#define FROM_S32(x) (((x) >> 24) ^ 128)
50
#define S64TOS32(x) (x)
51
#define SAT_OFF -(1u << 31)
52
53
#elif IMG_TYPE == 2
54
55
#define DTYPE mlib_s16
56
#define CONV_FUNC(KERN) mlib_conv##KERN##nw_s16
57
#define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_s16
58
#define DSCALE 65536.0
59
#define FROM_S32(x) ((x) >> 16)
60
#define S64TOS32(x) ((x) & 0xffffffff)
61
#define SAT_OFF
62
63
#elif IMG_TYPE == 3
64
65
#define DTYPE mlib_u16
66
#define CONV_FUNC(KERN) mlib_conv##KERN##nw_u16
67
#define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_u16
68
#define DSCALE 65536.0
69
#define FROM_S32(x) (((x) >> 16) ^ 0x8000)
70
#define S64TOS32(x) (x)
71
#define SAT_OFF -(1u << 31)
72
73
#endif /* IMG_TYPE == 1 */
74
75
/***************************************************************/
76
#define BUFF_SIZE 1600
77
78
#define CACHE_SIZE (64*1024)
79
80
/***************************************************************/
81
#define FTYPE mlib_d64
82
83
#ifndef MLIB_USE_FTOI_CLAMPING
84
85
#define CLAMP_S32(x) \
86
(((x) <= MLIB_S32_MIN) ? MLIB_S32_MIN : (((x) >= MLIB_S32_MAX) ? MLIB_S32_MAX : (mlib_s32)(x)))
87
88
#else
89
90
#define CLAMP_S32(x) ((mlib_s32)(x))
91
92
#endif /* MLIB_USE_FTOI_CLAMPING */
93
94
/***************************************************************/
95
#define D2I(x) CLAMP_S32((x) SAT_OFF)
96
97
/***************************************************************/
98
#ifdef _LITTLE_ENDIAN
99
100
#define STORE2(res0, res1) \
101
dp[0 ] = res1; \
102
dp[chan1] = res0
103
104
#else
105
106
#define STORE2(res0, res1) \
107
dp[0 ] = res0; \
108
dp[chan1] = res1
109
110
#endif /* _LITTLE_ENDIAN */
111
112
/***************************************************************/
113
#ifdef _NO_LONGLONG
114
115
#define LOAD_BUFF(buff) \
116
buff[i ] = sp[0]; \
117
buff[i + 1] = sp[chan1]
118
119
#else /* _NO_LONGLONG */
120
121
#ifdef _LITTLE_ENDIAN
122
123
#define LOAD_BUFF(buff) \
124
*(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | S64TOS32((mlib_s64)sp[0])
125
126
#else /* _LITTLE_ENDIAN */
127
128
#define LOAD_BUFF(buff) \
129
*(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | S64TOS32((mlib_s64)sp[chan1])
130
131
#endif /* _LITTLE_ENDIAN */
132
#endif /* _NO_LONGLONG */
133
134
/***************************************************************/
135
typedef union {
136
mlib_d64 d64;
137
struct {
138
mlib_s32 i0;
139
mlib_s32 i1;
140
} i32s;
141
struct {
142
mlib_s32 f0;
143
mlib_s32 f1;
144
} f32s;
145
} d64_2x32;
146
147
/***************************************************************/
148
#define DEF_VARS(type) \
149
type *adr_src, *sl, *sp = NULL; \
150
type *adr_dst, *dl, *dp = NULL; \
151
FTYPE *pbuff = buff; \
152
mlib_s32 wid, hgt, sll, dll; \
153
mlib_s32 nchannel, chan1; \
154
mlib_s32 i, j, c
155
156
/***************************************************************/
157
#define GET_SRC_DST_PARAMETERS(type) \
158
hgt = mlib_ImageGetHeight(src); \
159
wid = mlib_ImageGetWidth(src); \
160
nchannel = mlib_ImageGetChannels(src); \
161
sll = mlib_ImageGetStride(src) / sizeof(type); \
162
dll = mlib_ImageGetStride(dst) / sizeof(type); \
163
adr_src = (type *)mlib_ImageGetData(src); \
164
adr_dst = (type *)mlib_ImageGetData(dst)
165
166
/***************************************************************/
167
#if IMG_TYPE == 1
168
169
/* Test for the presence of any "1" bit in bits
170
8 to 31 of val. If present, then val is either
171
negative or >255. If over/underflows of 8 bits
172
are uncommon, then this technique can be a win,
173
since only a single test, rather than two, is
174
necessary to determine if clamping is needed.
175
On the other hand, if over/underflows are common,
176
it adds an extra test.
177
*/
178
#define CLAMP_STORE(dst, val) \
179
if (val & 0xffffff00) { \
180
if (val < MLIB_U8_MIN) \
181
dst = MLIB_U8_MIN; \
182
else \
183
dst = MLIB_U8_MAX; \
184
} else { \
185
dst = (mlib_u8)val; \
186
}
187
188
#elif IMG_TYPE == 2
189
190
#define CLAMP_STORE(dst, val) \
191
if (val >= MLIB_S16_MAX) \
192
dst = MLIB_S16_MAX; \
193
else if (val <= MLIB_S16_MIN) \
194
dst = MLIB_S16_MIN; \
195
else \
196
dst = (mlib_s16)val
197
198
#elif IMG_TYPE == 3
199
200
#define CLAMP_STORE(dst, val) \
201
if (val >= MLIB_U16_MAX) \
202
dst = MLIB_U16_MAX; \
203
else if (val <= MLIB_U16_MIN) \
204
dst = MLIB_U16_MIN; \
205
else \
206
dst = (mlib_u16)val
207
208
#endif /* IMG_TYPE == 1 */
209
210
/***************************************************************/
211
#define MAX_KER 7
212
#define MAX_N 15
213
214
static mlib_status mlib_ImageConv1xN(mlib_image *dst,
215
const mlib_image *src,
216
const mlib_d64 *k,
217
mlib_s32 n,
218
mlib_s32 dn,
219
mlib_s32 cmask)
220
{
221
FTYPE buff[BUFF_SIZE];
222
mlib_s32 off, kh;
223
mlib_s32 d0, d1;
224
const FTYPE *pk;
225
FTYPE k0, k1, k2, k3;
226
FTYPE p0, p1, p2, p3, p4;
227
DEF_VARS(DTYPE);
228
DTYPE *sl_c, *dl_c, *sl0;
229
mlib_s32 l, hsize, max_hsize;
230
GET_SRC_DST_PARAMETERS(DTYPE);
231
232
hgt -= (n - 1);
233
adr_dst += dn*dll;
234
235
max_hsize = (CACHE_SIZE/sizeof(DTYPE))/sll;
236
237
if (!max_hsize) max_hsize = 1;
238
239
if (max_hsize > BUFF_SIZE) {
240
pbuff = mlib_malloc(sizeof(FTYPE)*max_hsize);
241
}
242
243
chan1 = nchannel;
244
245
sl_c = adr_src;
246
dl_c = adr_dst;
247
248
for (l = 0; l < hgt; l += hsize) {
249
hsize = hgt - l;
250
251
if (hsize > max_hsize) hsize = max_hsize;
252
253
for (c = 0; c < nchannel; c++) {
254
if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
255
256
sl = sl_c + c;
257
dl = dl_c + c;
258
259
for (j = 0; j < hsize; j++) pbuff[j] = 0.0;
260
261
for (i = 0; i < wid; i++) {
262
sl0 = sl;
263
264
for (off = 0; off < (n - 4); off += 4) {
265
pk = k + off;
266
sp = sl0;
267
268
k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
269
p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
270
sp += 3*sll;
271
272
for (j = 0; j < hsize; j += 2) {
273
p0 = p2; p1 = p3; p2 = p4;
274
p3 = sp[0];
275
p4 = sp[sll];
276
277
pbuff[j ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
278
pbuff[j + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
279
280
sp += 2*sll;
281
}
282
283
sl0 += 4*sll;
284
}
285
286
pk = k + off;
287
sp = sl0;
288
289
k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
290
p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
291
292
dp = dl;
293
kh = n - off;
294
295
if (kh == 4) {
296
sp += 3*sll;
297
298
for (j = 0; j <= (hsize - 2); j += 2) {
299
p0 = p2; p1 = p3; p2 = p4;
300
p3 = sp[0];
301
p4 = sp[sll];
302
303
d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
304
d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1]);
305
306
dp[0 ] = FROM_S32(d0);
307
dp[dll] = FROM_S32(d1);
308
309
pbuff[j] = 0;
310
pbuff[j + 1] = 0;
311
312
sp += 2*sll;
313
dp += 2*dll;
314
}
315
316
if (j < hsize) {
317
p0 = p2; p1 = p3; p2 = p4;
318
p3 = sp[0];
319
320
d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
321
322
pbuff[j] = 0;
323
324
dp[0] = FROM_S32(d0);
325
}
326
327
} else if (kh == 3) {
328
sp += 2*sll;
329
330
for (j = 0; j <= (hsize - 2); j += 2) {
331
p0 = p2; p1 = p3;
332
p2 = sp[0];
333
p3 = sp[sll];
334
335
d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
336
d1 = D2I(p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1]);
337
338
dp[0 ] = FROM_S32(d0);
339
dp[dll] = FROM_S32(d1);
340
341
pbuff[j] = 0;
342
pbuff[j + 1] = 0;
343
344
sp += 2*sll;
345
dp += 2*dll;
346
}
347
348
if (j < hsize) {
349
p0 = p2; p1 = p3;
350
p2 = sp[0];
351
352
d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
353
354
pbuff[j] = 0;
355
356
dp[0] = FROM_S32(d0);
357
}
358
359
} else if (kh == 2) {
360
sp += sll;
361
362
for (j = 0; j <= (hsize - 2); j += 2) {
363
p0 = p2;
364
p1 = sp[0];
365
p2 = sp[sll];
366
367
d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
368
d1 = D2I(p1*k0 + p2*k1 + pbuff[j + 1]);
369
370
dp[0 ] = FROM_S32(d0);
371
dp[dll] = FROM_S32(d1);
372
373
pbuff[j] = 0;
374
pbuff[j + 1] = 0;
375
376
sp += 2*sll;
377
dp += 2*dll;
378
}
379
380
if (j < hsize) {
381
p0 = p2;
382
p1 = sp[0];
383
384
d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
385
386
pbuff[j] = 0;
387
388
dp[0] = FROM_S32(d0);
389
}
390
391
} else /* if (kh == 1) */ {
392
for (j = 0; j < hsize; j++) {
393
p0 = sp[0];
394
395
d0 = D2I(p0*k0 + pbuff[j]);
396
397
dp[0] = FROM_S32(d0);
398
399
pbuff[j] = 0;
400
401
sp += sll;
402
dp += dll;
403
}
404
}
405
406
sl += chan1;
407
dl += chan1;
408
}
409
}
410
411
sl_c += max_hsize*sll;
412
dl_c += max_hsize*dll;
413
}
414
415
if (pbuff != buff) mlib_free(pbuff);
416
417
return MLIB_SUCCESS;
418
}
419
420
/***************************************************************/
421
mlib_status CONV_FUNC(MxN)(mlib_image *dst,
422
const mlib_image *src,
423
const mlib_s32 *kernel,
424
mlib_s32 m,
425
mlib_s32 n,
426
mlib_s32 dm,
427
mlib_s32 dn,
428
mlib_s32 scale,
429
mlib_s32 cmask)
430
{
431
FTYPE buff[BUFF_SIZE], *buffs_arr[2*(MAX_N + 1)];
432
FTYPE **buffs = buffs_arr, *buffd;
433
FTYPE akernel[256], *k = akernel, fscale = DSCALE;
434
mlib_s32 mn, l, off, kw, bsize, buff_ind;
435
mlib_s32 d0, d1;
436
FTYPE k0, k1, k2, k3, k4, k5, k6;
437
FTYPE p0, p1, p2, p3, p4, p5, p6, p7;
438
d64_2x32 dd;
439
DEF_VARS(DTYPE);
440
mlib_s32 chan2;
441
mlib_s32 *buffo, *buffi;
442
mlib_status status = MLIB_SUCCESS;
443
444
GET_SRC_DST_PARAMETERS(DTYPE);
445
446
if (scale > 30) {
447
fscale *= 1.0/(1 << 30);
448
scale -= 30;
449
}
450
451
fscale /= (1 << scale);
452
453
mn = m*n;
454
455
if (mn > 256) {
456
k = mlib_malloc(mn*sizeof(mlib_d64));
457
458
if (k == NULL) return MLIB_FAILURE;
459
}
460
461
for (i = 0; i < mn; i++) {
462
k[i] = kernel[i]*fscale;
463
}
464
465
if (m == 1) {
466
status = mlib_ImageConv1xN(dst, src, k, n, dn, cmask);
467
FREE_AND_RETURN_STATUS;
468
}
469
470
bsize = (n + 3)*wid;
471
472
if ((bsize > BUFF_SIZE) || (n > MAX_N)) {
473
pbuff = mlib_malloc(sizeof(FTYPE)*bsize + sizeof(FTYPE *)*2*(n + 1));
474
475
if (pbuff == NULL) {
476
status = MLIB_FAILURE;
477
FREE_AND_RETURN_STATUS;
478
}
479
buffs = (FTYPE **)(pbuff + bsize);
480
}
481
482
for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*wid;
483
for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l];
484
buffd = buffs[n] + wid;
485
buffo = (mlib_s32*)(buffd + wid);
486
buffi = buffo + (wid &~ 1);
487
488
chan1 = nchannel;
489
chan2 = chan1 + chan1;
490
491
wid -= (m - 1);
492
hgt -= (n - 1);
493
adr_dst += dn*dll + dm*nchannel;
494
495
for (c = 0; c < nchannel; c++) {
496
if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
497
498
sl = adr_src + c;
499
dl = adr_dst + c;
500
501
for (l = 0; l < n; l++) {
502
FTYPE *buff = buffs[l];
503
504
for (i = 0; i < wid + (m - 1); i++) {
505
buff[i] = (FTYPE)sl[i*chan1];
506
}
507
508
sl += sll;
509
}
510
511
buff_ind = 0;
512
513
for (i = 0; i < wid; i++) buffd[i] = 0.0;
514
515
for (j = 0; j < hgt; j++) {
516
FTYPE **buffc = buffs + buff_ind;
517
FTYPE *buffn = buffc[n];
518
FTYPE *pk = k;
519
520
for (l = 0; l < n; l++) {
521
FTYPE *buff_l = buffc[l];
522
523
for (off = 0; off < m;) {
524
FTYPE *buff = buff_l + off;
525
526
kw = m - off;
527
528
if (kw > 2*MAX_KER) kw = MAX_KER; else
529
if (kw > MAX_KER) kw = kw/2;
530
off += kw;
531
532
sp = sl;
533
dp = dl;
534
535
p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
536
p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
537
538
k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
539
k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
540
pk += kw;
541
542
if (kw == 7) {
543
544
if (l < (n - 1) || off < m) {
545
for (i = 0; i <= (wid - 2); i += 2) {
546
p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
547
548
p6 = buff[i + 6]; p7 = buff[i + 7];
549
550
buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
551
buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
552
}
553
554
} else {
555
for (i = 0; i <= (wid - 2); i += 2) {
556
p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
557
558
p6 = buff[i + 6]; p7 = buff[i + 7];
559
560
LOAD_BUFF(buffi);
561
562
dd.d64 = *(FTYPE *)(buffi + i);
563
buffn[i ] = (FTYPE)dd.i32s.i0;
564
buffn[i + 1] = (FTYPE)dd.i32s.i1;
565
566
d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]);
567
d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
568
569
dp[0 ] = FROM_S32(d0);
570
dp[chan1] = FROM_S32(d1);
571
572
buffd[i ] = 0.0;
573
buffd[i + 1] = 0.0;
574
575
sp += chan2;
576
dp += chan2;
577
}
578
}
579
580
} else if (kw == 6) {
581
582
if (l < (n - 1) || off < m) {
583
for (i = 0; i <= (wid - 2); i += 2) {
584
p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
585
586
p5 = buff[i + 5]; p6 = buff[i + 6];
587
588
buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
589
buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
590
}
591
592
} else {
593
for (i = 0; i <= (wid - 2); i += 2) {
594
p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
595
596
p5 = buff[i + 5]; p6 = buff[i + 6];
597
598
buffn[i ] = (FTYPE)sp[0];
599
buffn[i + 1] = (FTYPE)sp[chan1];
600
601
d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ]);
602
d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
603
604
dp[0 ] = FROM_S32(d0);
605
dp[chan1] = FROM_S32(d1);
606
607
buffd[i ] = 0.0;
608
buffd[i + 1] = 0.0;
609
610
sp += chan2;
611
dp += chan2;
612
}
613
}
614
615
} else if (kw == 5) {
616
617
if (l < (n - 1) || off < m) {
618
for (i = 0; i <= (wid - 2); i += 2) {
619
p0 = p2; p1 = p3; p2 = p4; p3 = p5;
620
621
p4 = buff[i + 4]; p5 = buff[i + 5];
622
623
buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
624
buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
625
}
626
627
} else {
628
for (i = 0; i <= (wid - 2); i += 2) {
629
p0 = p2; p1 = p3; p2 = p4; p3 = p5;
630
631
p4 = buff[i + 4]; p5 = buff[i + 5];
632
633
buffn[i ] = (FTYPE)sp[0];
634
buffn[i + 1] = (FTYPE)sp[chan1];
635
636
d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ]);
637
d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
638
639
dp[0 ] = FROM_S32(d0);
640
dp[chan1] = FROM_S32(d1);
641
642
buffd[i ] = 0.0;
643
buffd[i + 1] = 0.0;
644
645
sp += chan2;
646
dp += chan2;
647
}
648
}
649
650
} else if (kw == 4) {
651
652
if (l < (n - 1) || off < m) {
653
for (i = 0; i <= (wid - 2); i += 2) {
654
p0 = p2; p1 = p3; p2 = p4;
655
656
p3 = buff[i + 3]; p4 = buff[i + 4];
657
658
buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
659
buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
660
}
661
662
} else {
663
for (i = 0; i <= (wid - 2); i += 2) {
664
p0 = p2; p1 = p3; p2 = p4;
665
666
p3 = buff[i + 3]; p4 = buff[i + 4];
667
668
buffn[i ] = (FTYPE)sp[0];
669
buffn[i + 1] = (FTYPE)sp[chan1];
670
671
d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ]);
672
d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
673
674
dp[0 ] = FROM_S32(d0);
675
dp[chan1] = FROM_S32(d1);
676
677
buffd[i ] = 0.0;
678
buffd[i + 1] = 0.0;
679
680
sp += chan2;
681
dp += chan2;
682
}
683
}
684
685
} else if (kw == 3) {
686
687
if (l < (n - 1) || off < m) {
688
for (i = 0; i <= (wid - 2); i += 2) {
689
p0 = p2; p1 = p3;
690
691
p2 = buff[i + 2]; p3 = buff[i + 3];
692
693
buffd[i ] += p0*k0 + p1*k1 + p2*k2;
694
buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
695
}
696
697
} else {
698
for (i = 0; i <= (wid - 2); i += 2) {
699
p0 = p2; p1 = p3;
700
701
p2 = buff[i + 2]; p3 = buff[i + 3];
702
703
buffn[i ] = (FTYPE)sp[0];
704
buffn[i + 1] = (FTYPE)sp[chan1];
705
706
d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i ]);
707
d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
708
709
dp[0 ] = FROM_S32(d0);
710
dp[chan1] = FROM_S32(d1);
711
712
buffd[i ] = 0.0;
713
buffd[i + 1] = 0.0;
714
715
sp += chan2;
716
dp += chan2;
717
}
718
}
719
720
} else /*if (kw == 2)*/ {
721
722
if (l < (n - 1) || off < m) {
723
for (i = 0; i <= (wid - 2); i += 2) {
724
p0 = p2;
725
726
p1 = buff[i + 1]; p2 = buff[i + 2];
727
728
buffd[i ] += p0*k0 + p1*k1;
729
buffd[i + 1] += p1*k0 + p2*k1;
730
}
731
732
} else {
733
for (i = 0; i <= (wid - 2); i += 2) {
734
p0 = p2;
735
736
p1 = buff[i + 1]; p2 = buff[i + 2];
737
738
buffn[i ] = (FTYPE)sp[0];
739
buffn[i + 1] = (FTYPE)sp[chan1];
740
741
d0 = D2I(p0*k0 + p1*k1 + buffd[i ]);
742
d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1]);
743
744
dp[0 ] = FROM_S32(d0);
745
dp[chan1] = FROM_S32(d1);
746
747
buffd[i ] = 0.0;
748
buffd[i + 1] = 0.0;
749
750
sp += chan2;
751
dp += chan2;
752
}
753
}
754
}
755
}
756
}
757
758
/* last pixels */
759
for (; i < wid; i++) {
760
FTYPE *pk = k, s = 0;
761
mlib_s32 x, d0;
762
763
for (l = 0; l < n; l++) {
764
FTYPE *buff = buffc[l] + i;
765
766
for (x = 0; x < m; x++) s += buff[x] * (*pk++);
767
}
768
769
d0 = D2I(s);
770
dp[0] = FROM_S32(d0);
771
772
buffn[i] = (FTYPE)sp[0];
773
774
sp += chan1;
775
dp += chan1;
776
}
777
778
for (l = 0; l < (m - 1); l++) buffn[wid + l] = sp[l*chan1];
779
780
/* next line */
781
sl += sll;
782
dl += dll;
783
784
buff_ind++;
785
786
if (buff_ind >= n + 1) buff_ind = 0;
787
}
788
}
789
790
FREE_AND_RETURN_STATUS;
791
}
792
793
/***************************************************************/
794
/* for x86, using integer multiplies is faster */
795
796
#define STORE_RES(res, x) \
797
x >>= shift2; \
798
CLAMP_STORE(res, x)
799
800
mlib_status CONV_FUNC_I(MxN)(mlib_image *dst,
801
const mlib_image *src,
802
const mlib_s32 *kernel,
803
mlib_s32 m,
804
mlib_s32 n,
805
mlib_s32 dm,
806
mlib_s32 dn,
807
mlib_s32 scale,
808
mlib_s32 cmask)
809
{
810
mlib_s32 buff[BUFF_SIZE], *buffd = buff;
811
mlib_s32 l, off, kw;
812
mlib_s32 d0, d1, shift1, shift2;
813
mlib_s32 k0, k1, k2, k3, k4, k5, k6;
814
mlib_s32 p0, p1, p2, p3, p4, p5, p6, p7;
815
DTYPE *adr_src, *sl, *sp = NULL;
816
DTYPE *adr_dst, *dl, *dp = NULL;
817
mlib_s32 wid, hgt, sll, dll;
818
mlib_s32 nchannel, chan1;
819
mlib_s32 i, j, c;
820
mlib_s32 chan2;
821
mlib_s32 k_locl[MAX_N*MAX_N], *k = k_locl;
822
GET_SRC_DST_PARAMETERS(DTYPE);
823
824
#if IMG_TYPE != 1
825
shift1 = 16;
826
#else
827
shift1 = 8;
828
#endif /* IMG_TYPE != 1 */
829
shift2 = scale - shift1;
830
831
chan1 = nchannel;
832
chan2 = chan1 + chan1;
833
834
wid -= (m - 1);
835
hgt -= (n - 1);
836
adr_dst += dn*dll + dm*nchannel;
837
838
if (wid > BUFF_SIZE) {
839
buffd = mlib_malloc(sizeof(mlib_s32)*wid);
840
841
if (buffd == NULL) return MLIB_FAILURE;
842
}
843
844
if (m*n > MAX_N*MAX_N) {
845
k = mlib_malloc(sizeof(mlib_s32)*(m*n));
846
847
if (k == NULL) {
848
if (buffd != buff) mlib_free(buffd);
849
return MLIB_FAILURE;
850
}
851
}
852
853
for (i = 0; i < m*n; i++) {
854
k[i] = kernel[i] >> shift1;
855
}
856
857
for (c = 0; c < nchannel; c++) {
858
if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
859
860
sl = adr_src + c;
861
dl = adr_dst + c;
862
863
for (i = 0; i < wid; i++) buffd[i] = 0;
864
865
for (j = 0; j < hgt; j++) {
866
mlib_s32 *pk = k;
867
868
for (l = 0; l < n; l++) {
869
DTYPE *sp0 = sl + l*sll;
870
871
for (off = 0; off < m;) {
872
sp = sp0 + off*chan1;
873
dp = dl;
874
875
kw = m - off;
876
877
if (kw > 2*MAX_KER) kw = MAX_KER; else
878
if (kw > MAX_KER) kw = kw/2;
879
off += kw;
880
881
p2 = sp[0]; p3 = sp[chan1]; p4 = sp[chan2];
882
p5 = sp[chan2 + chan1]; p6 = sp[chan2 + chan2]; p7 = sp[5*chan1];
883
884
k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
885
k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
886
pk += kw;
887
888
sp += (kw - 1)*chan1;
889
890
if (kw == 7) {
891
892
if (l < (n - 1) || off < m) {
893
for (i = 0; i <= (wid - 2); i += 2) {
894
p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
895
p6 = sp[0];
896
p7 = sp[chan1];
897
898
buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
899
buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
900
901
sp += chan2;
902
}
903
904
} else {
905
for (i = 0; i <= (wid - 2); i += 2) {
906
p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
907
p6 = sp[0];
908
p7 = sp[chan1];
909
910
d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]);
911
d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
912
913
STORE_RES(dp[0 ], d0);
914
STORE_RES(dp[chan1], d1);
915
916
buffd[i ] = 0;
917
buffd[i + 1] = 0;
918
919
sp += chan2;
920
dp += chan2;
921
}
922
}
923
924
} else if (kw == 6) {
925
926
if (l < (n - 1) || off < m) {
927
for (i = 0; i <= (wid - 2); i += 2) {
928
p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
929
p5 = sp[0];
930
p6 = sp[chan1];
931
932
buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
933
buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
934
935
sp += chan2;
936
}
937
938
} else {
939
for (i = 0; i <= (wid - 2); i += 2) {
940
p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
941
p5 = sp[0];
942
p6 = sp[chan1];
943
944
d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ]);
945
d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
946
947
STORE_RES(dp[0 ], d0);
948
STORE_RES(dp[chan1], d1);
949
950
buffd[i ] = 0;
951
buffd[i + 1] = 0;
952
953
sp += chan2;
954
dp += chan2;
955
}
956
}
957
958
} else if (kw == 5) {
959
960
if (l < (n - 1) || off < m) {
961
for (i = 0; i <= (wid - 2); i += 2) {
962
p0 = p2; p1 = p3; p2 = p4; p3 = p5;
963
p4 = sp[0];
964
p5 = sp[chan1];
965
966
buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
967
buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
968
969
sp += chan2;
970
}
971
972
} else {
973
for (i = 0; i <= (wid - 2); i += 2) {
974
p0 = p2; p1 = p3; p2 = p4; p3 = p5;
975
p4 = sp[0];
976
p5 = sp[chan1];
977
978
d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ]);
979
d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
980
981
STORE_RES(dp[0 ], d0);
982
STORE_RES(dp[chan1], d1);
983
984
buffd[i ] = 0;
985
buffd[i + 1] = 0;
986
987
sp += chan2;
988
dp += chan2;
989
}
990
}
991
992
} else if (kw == 4) {
993
994
if (l < (n - 1) || off < m) {
995
for (i = 0; i <= (wid - 2); i += 2) {
996
p0 = p2; p1 = p3; p2 = p4;
997
p3 = sp[0];
998
p4 = sp[chan1];
999
1000
buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
1001
buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
1002
1003
sp += chan2;
1004
}
1005
1006
} else {
1007
for (i = 0; i <= (wid - 2); i += 2) {
1008
p0 = p2; p1 = p3; p2 = p4;
1009
p3 = sp[0];
1010
p4 = sp[chan1];
1011
1012
d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ]);
1013
d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
1014
1015
STORE_RES(dp[0 ], d0);
1016
STORE_RES(dp[chan1], d1);
1017
1018
buffd[i ] = 0;
1019
buffd[i + 1] = 0;
1020
1021
sp += chan2;
1022
dp += chan2;
1023
}
1024
}
1025
1026
} else if (kw == 3) {
1027
1028
if (l < (n - 1) || off < m) {
1029
for (i = 0; i <= (wid - 2); i += 2) {
1030
p0 = p2; p1 = p3;
1031
p2 = sp[0];
1032
p3 = sp[chan1];
1033
1034
buffd[i ] += p0*k0 + p1*k1 + p2*k2;
1035
buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
1036
1037
sp += chan2;
1038
}
1039
1040
} else {
1041
for (i = 0; i <= (wid - 2); i += 2) {
1042
p0 = p2; p1 = p3;
1043
p2 = sp[0];
1044
p3 = sp[chan1];
1045
1046
d0 = (p0*k0 + p1*k1 + p2*k2 + buffd[i ]);
1047
d1 = (p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
1048
1049
STORE_RES(dp[0 ], d0);
1050
STORE_RES(dp[chan1], d1);
1051
1052
buffd[i ] = 0;
1053
buffd[i + 1] = 0;
1054
1055
sp += chan2;
1056
dp += chan2;
1057
}
1058
}
1059
1060
} else if (kw == 2) {
1061
1062
if (l < (n - 1) || off < m) {
1063
for (i = 0; i <= (wid - 2); i += 2) {
1064
p0 = p2;
1065
p1 = sp[0];
1066
p2 = sp[chan1];
1067
1068
buffd[i ] += p0*k0 + p1*k1;
1069
buffd[i + 1] += p1*k0 + p2*k1;
1070
1071
sp += chan2;
1072
}
1073
1074
} else {
1075
for (i = 0; i <= (wid - 2); i += 2) {
1076
p0 = p2;
1077
p1 = sp[0];
1078
p2 = sp[chan1];
1079
1080
d0 = (p0*k0 + p1*k1 + buffd[i ]);
1081
d1 = (p1*k0 + p2*k1 + buffd[i + 1]);
1082
1083
STORE_RES(dp[0 ], d0);
1084
STORE_RES(dp[chan1], d1);
1085
1086
buffd[i ] = 0;
1087
buffd[i + 1] = 0;
1088
1089
sp += chan2;
1090
dp += chan2;
1091
}
1092
}
1093
1094
} else /*if (kw == 1)*/ {
1095
1096
if (l < (n - 1) || off < m) {
1097
for (i = 0; i <= (wid - 2); i += 2) {
1098
p0 = sp[0];
1099
p1 = sp[chan1];
1100
1101
buffd[i ] += p0*k0;
1102
buffd[i + 1] += p1*k0;
1103
1104
sp += chan2;
1105
}
1106
1107
} else {
1108
for (i = 0; i <= (wid - 2); i += 2) {
1109
p0 = sp[0];
1110
p1 = sp[chan1];
1111
1112
d0 = (p0*k0 + buffd[i ]);
1113
d1 = (p1*k0 + buffd[i + 1]);
1114
1115
STORE_RES(dp[0 ], d0);
1116
STORE_RES(dp[chan1], d1);
1117
1118
buffd[i ] = 0;
1119
buffd[i + 1] = 0;
1120
1121
sp += chan2;
1122
dp += chan2;
1123
}
1124
}
1125
}
1126
}
1127
}
1128
1129
/* last pixels */
1130
for (; i < wid; i++) {
1131
mlib_s32 *pk = k, s = 0;
1132
mlib_s32 x;
1133
1134
for (l = 0; l < n; l++) {
1135
sp = sl + l*sll + i*chan1;
1136
1137
for (x = 0; x < m; x++) {
1138
s += sp[0] * pk[0];
1139
sp += chan1;
1140
pk ++;
1141
}
1142
}
1143
1144
STORE_RES(dp[0], s);
1145
1146
sp += chan1;
1147
dp += chan1;
1148
}
1149
1150
sl += sll;
1151
dl += dll;
1152
}
1153
}
1154
1155
if (buffd != buff) mlib_free(buffd);
1156
if (k != k_locl) mlib_free(k);
1157
1158
return MLIB_SUCCESS;
1159
}
1160
1161
/***************************************************************/
1162
1163