1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
|
/*
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef SWSCALE_SWSCALE_INTERNAL_H
#define SWSCALE_SWSCALE_INTERNAL_H
#include "config.h"
#if HAVE_ALTIVEC_H
#include <altivec.h>
#endif
#include "libavutil/avutil.h"
#define STR(s) AV_TOSTRING(s) //AV_STRINGIFY is too long
#define MAX_FILTER_SIZE 256
#if ARCH_X86
#define VOFW 5120
#else
#define VOFW 2048 // faster on PPC and not tested on others
#endif
#define VOF (VOFW*2)
#if HAVE_BIGENDIAN
#define ALT32_CORR (-1)
#else
#define ALT32_CORR 1
#endif
#if ARCH_X86_64
# define APCK_PTR2 8
# define APCK_COEF 16
# define APCK_SIZE 24
#else
# define APCK_PTR2 4
# define APCK_COEF 8
# define APCK_SIZE 16
#endif
struct SwsContext;
typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
int srcStride[], int srcSliceY, int srcSliceH,
uint8_t* dst[], int dstStride[]);
/* This struct should be aligned on at least a 32-byte boundary. */
typedef struct SwsContext {
/**
* info on struct for av_log
*/
const AVClass *av_class;
/**
* Note that src, dst, srcStride, dstStride will be copied in the
* sws_scale() wrapper so they can be freely modified here.
*/
SwsFunc swScale;
int srcW; ///< Width of source luma/alpha planes.
int srcH; ///< Height of source luma/alpha planes.
int dstH; ///< Height of destination luma/alpha planes.
int chrSrcW; ///< Width of source chroma planes.
int chrSrcH; ///< Height of source chroma planes.
int chrDstW; ///< Width of destination chroma planes.
int chrDstH; ///< Height of destination chroma planes.
int lumXInc, chrXInc;
int lumYInc, chrYInc;
enum PixelFormat dstFormat; ///< Destination pixel format.
enum PixelFormat srcFormat; ///< Source pixel format.
int dstFormatBpp; ///< Number of bits per pixel of the destination pixel format.
int srcFormatBpp; ///< Number of bits per pixel of the source pixel format.
int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image.
int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image.
int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image.
int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
double param[2]; ///< Input parameters for scaling algorithms that need them.
uint32_t pal_yuv[256];
uint32_t pal_rgb[256];
/**
* @name Scaled horizontal lines ring buffer.
* The horizontal scaler keeps just enough scaled lines in a ring buffer
* so they may be passed to the vertical scaler. The pointers to the
* allocated buffers for each line are duplicated in sequence in the ring
* buffer to simplify indexing and avoid wrapping around between lines
* inside the vertical scaler code. The wrapping is done before the
* vertical scaler is called.
*/
//@{
int16_t **lumPixBuf; ///< Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler.
int16_t **chrPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer.
int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer.
int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer.
int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source.
int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source.
//@}
uint8_t formatConvBuffer[VOF]; //FIXME dynamic allocation, but we have to change a lot of code for this to be useful
/**
* @name Horizontal and vertical filters.
* To better understand the following fields, here is a pseudo-code of
* their usage in filtering a horizontal line:
* @code
* for (i = 0; i < width; i++) {
* dst[i] = 0;
* for (j = 0; j < filterSize; j++)
* dst[i] += src[ filterPos[i] + j ] * filter[ filterSize * i + j ];
* dst[i] >>= FRAC_BITS; // The actual implementation is fixed-point.
* }
* @endcode
*/
//@{
int16_t *hLumFilter; ///< Array of horizontal filter coefficients for luma/alpha planes.
int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes.
int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes.
int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes.
int16_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
int16_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes.
int16_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
int16_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes.
int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels.
int hChrFilterSize; ///< Horizontal filter size for chroma pixels.
int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels.
int vChrFilterSize; ///< Vertical filter size for chroma pixels.
//@}
int lumMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes.
int chrMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for chroma planes.
uint8_t *lumMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for luma/alpha planes.
uint8_t *chrMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for chroma planes.
int canMMX2BeUsed;
int dstY; ///< Last destination vertical line output from last slice.
int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
void * yuvTable; // pointer to the yuv->rgb table start so it can be freed()
uint8_t * table_rV[256];
uint8_t * table_gU[256];
int table_gV[256];
uint8_t * table_bU[256];
//Colorspace stuff
int contrast, brightness, saturation; // for sws_getColorspaceDetails
int srcColorspaceTable[4];
int dstColorspaceTable[4];
int srcRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (source image).
int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image).
int yuv2rgb_y_offset;
int yuv2rgb_y_coeff;
int yuv2rgb_v2r_coeff;
int yuv2rgb_v2g_coeff;
int yuv2rgb_u2g_coeff;
int yuv2rgb_u2b_coeff;
#define RED_DITHER "0*8"
#define GREEN_DITHER "1*8"
#define BLUE_DITHER "2*8"
#define Y_COEFF "3*8"
#define VR_COEFF "4*8"
#define UB_COEFF "5*8"
#define VG_COEFF "6*8"
#define UG_COEFF "7*8"
#define Y_OFFSET "8*8"
#define U_OFFSET "9*8"
#define V_OFFSET "10*8"
#define LUM_MMX_FILTER_OFFSET "11*8"
#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
#define ESP_OFFSET "11*8+4*4*256*2+8"
#define VROUNDER_OFFSET "11*8+4*4*256*2+16"
#define U_TEMP "11*8+4*4*256*2+24"
#define V_TEMP "11*8+4*4*256*2+32"
#define Y_TEMP "11*8+4*4*256*2+40"
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
DECLARE_ALIGNED(8, uint64_t, redDither);
DECLARE_ALIGNED(8, uint64_t, greenDither);
DECLARE_ALIGNED(8, uint64_t, blueDither);
DECLARE_ALIGNED(8, uint64_t, yCoeff);
DECLARE_ALIGNED(8, uint64_t, vrCoeff);
DECLARE_ALIGNED(8, uint64_t, ubCoeff);
DECLARE_ALIGNED(8, uint64_t, vgCoeff);
DECLARE_ALIGNED(8, uint64_t, ugCoeff);
DECLARE_ALIGNED(8, uint64_t, yOffset);
DECLARE_ALIGNED(8, uint64_t, uOffset);
DECLARE_ALIGNED(8, uint64_t, vOffset);
int32_t lumMmxFilter[4*MAX_FILTER_SIZE];
int32_t chrMmxFilter[4*MAX_FILTER_SIZE];
int dstW; ///< Width of destination luma/alpha planes.
DECLARE_ALIGNED(8, uint64_t, esp);
DECLARE_ALIGNED(8, uint64_t, vRounder);
DECLARE_ALIGNED(8, uint64_t, u_temp);
DECLARE_ALIGNED(8, uint64_t, v_temp);
DECLARE_ALIGNED(8, uint64_t, y_temp);
int32_t alpMmxFilter[4*MAX_FILTER_SIZE];
#if HAVE_ALTIVEC
vector signed short CY;
vector signed short CRV;
vector signed short CBU;
vector signed short CGU;
vector signed short CGV;
vector signed short OY;
vector unsigned short CSHIFT;
vector signed short *vYCoeffsBank, *vCCoeffsBank;
#endif
#if ARCH_BFIN
DECLARE_ALIGNED(4, uint32_t, oy);
DECLARE_ALIGNED(4, uint32_t, oc);
DECLARE_ALIGNED(4, uint32_t, zero);
DECLARE_ALIGNED(4, uint32_t, cy);
DECLARE_ALIGNED(4, uint32_t, crv);
DECLARE_ALIGNED(4, uint32_t, rmask);
DECLARE_ALIGNED(4, uint32_t, cbu);
DECLARE_ALIGNED(4, uint32_t, bmask);
DECLARE_ALIGNED(4, uint32_t, cgu);
DECLARE_ALIGNED(4, uint32_t, cgv);
DECLARE_ALIGNED(4, uint32_t, gmask);
#endif
#if HAVE_VIS
DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10];
#endif
/* function pointers for swScale() */
void (*yuv2nv12X )(struct SwsContext *c,
const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest,
int dstW, int chrDstW, int dstFormat);
void (*yuv2yuv1 )(struct SwsContext *c,
const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
uint8_t *dest,
uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
int dstW, int chrDstW);
void (*yuv2yuvX )(struct SwsContext *c,
const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
const int16_t **alpSrc,
uint8_t *dest,
uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
int dstW, int chrDstW);
void (*yuv2packed1)(struct SwsContext *c,
const uint16_t *buf0,
const uint16_t *uvbuf0, const uint16_t *uvbuf1,
const uint16_t *abuf0,
uint8_t *dest,
int dstW, int uvalpha, int dstFormat, int flags, int y);
void (*yuv2packed2)(struct SwsContext *c,
const uint16_t *buf0, const uint16_t *buf1,
const uint16_t *uvbuf0, const uint16_t *uvbuf1,
const uint16_t *abuf0, const uint16_t *abuf1,
uint8_t *dest,
int dstW, int yalpha, int uvalpha, int y);
void (*yuv2packedX)(struct SwsContext *c,
const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
const int16_t **alpSrc, uint8_t *dest,
int dstW, int dstY);
void (*lumToYV12)(uint8_t *dst, const uint8_t *src,
int width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler.
void (*alpToYV12)(uint8_t *dst, const uint8_t *src,
int width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler.
void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
const uint8_t *src1, const uint8_t *src2,
int width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void (*hyscale_fast)(struct SwsContext *c,
int16_t *dst, int dstWidth,
const uint8_t *src, int srcW, int xInc);
void (*hcscale_fast)(struct SwsContext *c,
int16_t *dst, int dstWidth,
const uint8_t *src1, const uint8_t *src2,
int srcW, int xInc);
void (*hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW,
int xInc, const int16_t *filter, const int16_t *filterPos,
int filterSize);
void (*lumConvertRange)(uint16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
void (*chrConvertRange)(uint16_t *dst, int width); ///< Color range conversion function for chroma planes if needed.
int lumSrcOffset; ///< Offset given to luma src pointers passed to horizontal input functions.
int chrSrcOffset; ///< Offset given to chroma src pointers passed to horizontal input functions.
int alpSrcOffset; ///< Offset given to alpha src pointers passed to horizontal input functions.
int needs_hcscale; ///< Set if there are chroma planes to be converted.
} SwsContext;
//FIXME check init (where 0)
SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c);
int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
int fullRange, int brightness,
int contrast, int saturation);
void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4],
int brightness, int contrast, int saturation);
SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c);
SwsFunc ff_yuv2rgb_init_vis(SwsContext *c);
SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c);
SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c);
SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c);
void ff_bfin_get_unscaled_swscale(SwsContext *c);
void ff_yuv2packedX_altivec(SwsContext *c,
const int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, int dstW, int dstY);
const char *sws_format_name(enum PixelFormat format);
//FIXME replace this with something faster
#define is16BPS(x) ( \
(x)==PIX_FMT_GRAY16BE \
|| (x)==PIX_FMT_GRAY16LE \
|| (x)==PIX_FMT_RGB48BE \
|| (x)==PIX_FMT_RGB48LE \
|| (x)==PIX_FMT_YUV420P16LE \
|| (x)==PIX_FMT_YUV422P16LE \
|| (x)==PIX_FMT_YUV444P16LE \
|| (x)==PIX_FMT_YUV420P16BE \
|| (x)==PIX_FMT_YUV422P16BE \
|| (x)==PIX_FMT_YUV444P16BE \
)
#define isBE(x) ((x)&1)
#define isPlanar8YUV(x) ( \
(x)==PIX_FMT_YUV410P \
|| (x)==PIX_FMT_YUV420P \
|| (x)==PIX_FMT_YUVA420P \
|| (x)==PIX_FMT_YUV411P \
|| (x)==PIX_FMT_YUV422P \
|| (x)==PIX_FMT_YUV444P \
|| (x)==PIX_FMT_YUV440P \
|| (x)==PIX_FMT_NV12 \
|| (x)==PIX_FMT_NV21 \
)
#define isPlanarYUV(x) ( \
isPlanar8YUV(x) \
|| (x)==PIX_FMT_YUV420P16LE \
|| (x)==PIX_FMT_YUV422P16LE \
|| (x)==PIX_FMT_YUV444P16LE \
|| (x)==PIX_FMT_YUV420P16BE \
|| (x)==PIX_FMT_YUV422P16BE \
|| (x)==PIX_FMT_YUV444P16BE \
)
#define isYUV(x) ( \
(x)==PIX_FMT_UYVY422 \
|| (x)==PIX_FMT_YUYV422 \
|| isPlanarYUV(x) \
)
#define isGray(x) ( \
(x)==PIX_FMT_GRAY8 \
|| (x)==PIX_FMT_GRAY16BE \
|| (x)==PIX_FMT_GRAY16LE \
)
#define isGray16(x) ( \
(x)==PIX_FMT_GRAY16BE \
|| (x)==PIX_FMT_GRAY16LE \
)
#define isRGBinInt(x) ( \
(x)==PIX_FMT_RGB48BE \
|| (x)==PIX_FMT_RGB48LE \
|| (x)==PIX_FMT_RGB32 \
|| (x)==PIX_FMT_RGB32_1 \
|| (x)==PIX_FMT_RGB24 \
|| (x)==PIX_FMT_RGB565BE \
|| (x)==PIX_FMT_RGB565LE \
|| (x)==PIX_FMT_RGB555BE \
|| (x)==PIX_FMT_RGB555LE \
|| (x)==PIX_FMT_RGB444BE \
|| (x)==PIX_FMT_RGB444LE \
|| (x)==PIX_FMT_RGB8 \
|| (x)==PIX_FMT_RGB4 \
|| (x)==PIX_FMT_RGB4_BYTE \
|| (x)==PIX_FMT_MONOBLACK \
|| (x)==PIX_FMT_MONOWHITE \
)
#define isBGRinInt(x) ( \
(x)==PIX_FMT_BGR32 \
|| (x)==PIX_FMT_BGR32_1 \
|| (x)==PIX_FMT_BGR24 \
|| (x)==PIX_FMT_BGR565BE \
|| (x)==PIX_FMT_BGR565LE \
|| (x)==PIX_FMT_BGR555BE \
|| (x)==PIX_FMT_BGR555LE \
|| (x)==PIX_FMT_BGR444BE \
|| (x)==PIX_FMT_BGR444LE \
|| (x)==PIX_FMT_BGR8 \
|| (x)==PIX_FMT_BGR4 \
|| (x)==PIX_FMT_BGR4_BYTE \
|| (x)==PIX_FMT_MONOBLACK \
|| (x)==PIX_FMT_MONOWHITE \
)
#define isRGBinBytes(x) ( \
(x)==PIX_FMT_RGB48BE \
|| (x)==PIX_FMT_RGB48LE \
|| (x)==PIX_FMT_RGBA \
|| (x)==PIX_FMT_ARGB \
|| (x)==PIX_FMT_RGB24 \
)
#define isBGRinBytes(x) ( \
(x)==PIX_FMT_BGRA \
|| (x)==PIX_FMT_ABGR \
|| (x)==PIX_FMT_BGR24 \
)
#define isAnyRGB(x) ( \
isRGBinInt(x) \
|| isBGRinInt(x) \
)
#define isALPHA(x) ( \
(x)==PIX_FMT_BGR32 \
|| (x)==PIX_FMT_BGR32_1 \
|| (x)==PIX_FMT_RGB32 \
|| (x)==PIX_FMT_RGB32_1 \
|| (x)==PIX_FMT_YUVA420P \
)
#define usePal(x) (av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL)
extern const uint64_t ff_dither4[2];
extern const uint64_t ff_dither8[2];
extern const AVClass sws_context_class;
/**
* Sets c->swScale to an unscaled converter if one exists for the specific
* source and destination formats, bit depths, flags, etc.
*/
void ff_get_unscaled_swscale(SwsContext *c);
/**
* Returns the SWS_CPU_CAPS for the optimized code compiled into swscale.
*/
int ff_hardcodedcpuflags(void);
/**
* Returns function pointer to fastest main scaler path function depending
* on architecture and available optimizations.
*/
SwsFunc ff_getSwsFunc(SwsContext *c);
#endif /* SWSCALE_SWSCALE_INTERNAL_H */
|