SDL  2.0
yuv_rgb.c
Go to the documentation of this file.
1 // Copyright 2016 Adrien Descamps
2 // Distributed under BSD 3-Clause License
3 #include "../../SDL_internal.h"
4 
5 #if SDL_HAVE_YUV
6 
7 #include "yuv_rgb.h"
8 
9 #include "SDL_cpuinfo.h"
10 /*#include <x86intrin.h>*/
11 
12 #define PRECISION 6
13 #define PRECISION_FACTOR (1<<PRECISION)
14 
15 typedef struct
16 {
18  int16_t matrix[3][3];
19 } RGB2YUVParam;
20 // |Y| |y_shift| |matrix[0][0] matrix[0][1] matrix[0][2]| |R|
21 // |U| = | 128 | + 1/PRECISION_FACTOR * |matrix[1][0] matrix[1][1] matrix[1][2]| * |G|
22 // |V| | 128 | |matrix[2][0] matrix[2][1] matrix[2][2]| |B|
23 
24 typedef struct
25 {
32 } YUV2RGBParam;
33 // |R| |y_factor 0 v_r_factor| |Y-y_shift|
34 // |G| = 1/PRECISION_FACTOR * |y_factor u_g_factor v_g_factor| * | U-128 |
35 // |B| |y_factor u_b_factor 0 | | V-128 |
36 
37 #define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5)
38 
39 // for ITU-T T.871, values can be found in section 7
40 // for ITU-R BT.601-7 values are derived from equations in sections 2.5.1-2.5.3, assuming RGB is encoded using full range ([0-1]<->[0-255])
41 // for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255])
42 // all values are rounded to the fourth decimal
43 
44 static const YUV2RGBParam YUV2RGB[3] = {
45  // ITU-T T.871 (JPEG)
46  {/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)},
47  // ITU-R BT.601-7
48  {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)},
49  // ITU-R BT.709-6
50  {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)}
51 };
52 
53 static const RGB2YUVParam RGB2YUV[3] = {
54  // ITU-T T.871 (JPEG)
55  {/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}},
56  // ITU-R BT.601-7
57  {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}},
58  // ITU-R BT.709-6
59  {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}}
60 };
61 
62 /* The various layouts of YUV data we support */
63 #define YUV_FORMAT_420 1
64 #define YUV_FORMAT_422 2
65 #define YUV_FORMAT_NV12 3
66 
67 /* The various formats of RGB pixel that we support */
68 #define RGB_FORMAT_RGB565 1
69 #define RGB_FORMAT_RGB24 2
70 #define RGB_FORMAT_RGBA 3
71 #define RGB_FORMAT_BGRA 4
72 #define RGB_FORMAT_ARGB 5
73 #define RGB_FORMAT_ABGR 6
74 
75 // divide by PRECISION_FACTOR and clamp to [0:255] interval
76 // input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range
78 {
79  static const uint8_t lut[512] =
80  {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
81  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
82  0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
83  47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
84  91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
85  126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
86  159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
87  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
88  225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
89  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
90  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
91  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
92  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
93  };
94  return lut[(v+128*PRECISION_FACTOR)>>PRECISION];
95 }
96 
97 
98 #define STD_FUNCTION_NAME yuv420_rgb565_std
99 #define YUV_FORMAT YUV_FORMAT_420
100 #define RGB_FORMAT RGB_FORMAT_RGB565
101 #include "yuv_rgb_std_func.h"
102 
103 #define STD_FUNCTION_NAME yuv420_rgb24_std
104 #define YUV_FORMAT YUV_FORMAT_420
105 #define RGB_FORMAT RGB_FORMAT_RGB24
106 #include "yuv_rgb_std_func.h"
107 
108 #define STD_FUNCTION_NAME yuv420_rgba_std
109 #define YUV_FORMAT YUV_FORMAT_420
110 #define RGB_FORMAT RGB_FORMAT_RGBA
111 #include "yuv_rgb_std_func.h"
112 
113 #define STD_FUNCTION_NAME yuv420_bgra_std
114 #define YUV_FORMAT YUV_FORMAT_420
115 #define RGB_FORMAT RGB_FORMAT_BGRA
116 #include "yuv_rgb_std_func.h"
117 
118 #define STD_FUNCTION_NAME yuv420_argb_std
119 #define YUV_FORMAT YUV_FORMAT_420
120 #define RGB_FORMAT RGB_FORMAT_ARGB
121 #include "yuv_rgb_std_func.h"
122 
123 #define STD_FUNCTION_NAME yuv420_abgr_std
124 #define YUV_FORMAT YUV_FORMAT_420
125 #define RGB_FORMAT RGB_FORMAT_ABGR
126 #include "yuv_rgb_std_func.h"
127 
128 #define STD_FUNCTION_NAME yuv422_rgb565_std
129 #define YUV_FORMAT YUV_FORMAT_422
130 #define RGB_FORMAT RGB_FORMAT_RGB565
131 #include "yuv_rgb_std_func.h"
132 
133 #define STD_FUNCTION_NAME yuv422_rgb24_std
134 #define YUV_FORMAT YUV_FORMAT_422
135 #define RGB_FORMAT RGB_FORMAT_RGB24
136 #include "yuv_rgb_std_func.h"
137 
138 #define STD_FUNCTION_NAME yuv422_rgba_std
139 #define YUV_FORMAT YUV_FORMAT_422
140 #define RGB_FORMAT RGB_FORMAT_RGBA
141 #include "yuv_rgb_std_func.h"
142 
143 #define STD_FUNCTION_NAME yuv422_bgra_std
144 #define YUV_FORMAT YUV_FORMAT_422
145 #define RGB_FORMAT RGB_FORMAT_BGRA
146 #include "yuv_rgb_std_func.h"
147 
148 #define STD_FUNCTION_NAME yuv422_argb_std
149 #define YUV_FORMAT YUV_FORMAT_422
150 #define RGB_FORMAT RGB_FORMAT_ARGB
151 #include "yuv_rgb_std_func.h"
152 
153 #define STD_FUNCTION_NAME yuv422_abgr_std
154 #define YUV_FORMAT YUV_FORMAT_422
155 #define RGB_FORMAT RGB_FORMAT_ABGR
156 #include "yuv_rgb_std_func.h"
157 
158 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
159 #define YUV_FORMAT YUV_FORMAT_NV12
160 #define RGB_FORMAT RGB_FORMAT_RGB565
161 #include "yuv_rgb_std_func.h"
162 
163 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
164 #define YUV_FORMAT YUV_FORMAT_NV12
165 #define RGB_FORMAT RGB_FORMAT_RGB24
166 #include "yuv_rgb_std_func.h"
167 
168 #define STD_FUNCTION_NAME yuvnv12_rgba_std
169 #define YUV_FORMAT YUV_FORMAT_NV12
170 #define RGB_FORMAT RGB_FORMAT_RGBA
171 #include "yuv_rgb_std_func.h"
172 
173 #define STD_FUNCTION_NAME yuvnv12_bgra_std
174 #define YUV_FORMAT YUV_FORMAT_NV12
175 #define RGB_FORMAT RGB_FORMAT_BGRA
176 #include "yuv_rgb_std_func.h"
177 
178 #define STD_FUNCTION_NAME yuvnv12_argb_std
179 #define YUV_FORMAT YUV_FORMAT_NV12
180 #define RGB_FORMAT RGB_FORMAT_ARGB
181 #include "yuv_rgb_std_func.h"
182 
183 #define STD_FUNCTION_NAME yuvnv12_abgr_std
184 #define YUV_FORMAT YUV_FORMAT_NV12
185 #define RGB_FORMAT RGB_FORMAT_ABGR
187 
190  const uint8_t *RGB, uint32_t RGB_stride,
191  uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
192  YCbCrType yuv_type)
193 {
194  const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
195 
196  uint32_t x, y;
197  for(y=0; y<(height-1); y+=2)
198  {
199  const uint8_t *rgb_ptr1=RGB+y*RGB_stride,
200  *rgb_ptr2=RGB+(y+1)*RGB_stride;
201 
202  uint8_t *y_ptr1=Y+y*Y_stride,
203  *y_ptr2=Y+(y+1)*Y_stride,
204  *u_ptr=U+(y/2)*UV_stride,
205  *v_ptr=V+(y/2)*UV_stride;
206 
207  for(x=0; x<(width-1); x+=2)
208  {
209  // compute yuv for the four pixels, u and v values are summed
210  int32_t y_tmp, u_tmp, v_tmp;
211 
212  y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2];
213  u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2];
214  v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2];
215  y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
216 
217  y_tmp = param->matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5];
218  u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5];
219  v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5];
220  y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
221 
222  y_tmp = param->matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2];
223  u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2];
224  v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2];
225  y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
226 
227  y_tmp = param->matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5];
228  u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5];
229  v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5];
230  y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
231 
232  u_ptr[0] = clampU8(u_tmp/4+(128<<PRECISION));
233  v_ptr[0] = clampU8(v_tmp/4+(128<<PRECISION));
234 
235  rgb_ptr1 += 6;
236  rgb_ptr2 += 6;
237  y_ptr1 += 2;
238  y_ptr2 += 2;
239  u_ptr += 1;
240  v_ptr += 1;
241  }
242  }
243 }
244 
245 #ifdef __SSE2__
246 
247 #define SSE_FUNCTION_NAME yuv420_rgb565_sse
248 #define STD_FUNCTION_NAME yuv420_rgb565_std
249 #define YUV_FORMAT YUV_FORMAT_420
250 #define RGB_FORMAT RGB_FORMAT_RGB565
251 #define SSE_ALIGNED
252 #include "yuv_rgb_sse_func.h"
253 
254 #define SSE_FUNCTION_NAME yuv420_rgb565_sseu
255 #define STD_FUNCTION_NAME yuv420_rgb565_std
256 #define YUV_FORMAT YUV_FORMAT_420
257 #define RGB_FORMAT RGB_FORMAT_RGB565
258 #include "yuv_rgb_sse_func.h"
259 
260 #define SSE_FUNCTION_NAME yuv420_rgb24_sse
261 #define STD_FUNCTION_NAME yuv420_rgb24_std
262 #define YUV_FORMAT YUV_FORMAT_420
263 #define RGB_FORMAT RGB_FORMAT_RGB24
264 #define SSE_ALIGNED
265 #include "yuv_rgb_sse_func.h"
266 
267 #define SSE_FUNCTION_NAME yuv420_rgb24_sseu
268 #define STD_FUNCTION_NAME yuv420_rgb24_std
269 #define YUV_FORMAT YUV_FORMAT_420
270 #define RGB_FORMAT RGB_FORMAT_RGB24
271 #include "yuv_rgb_sse_func.h"
272 
273 #define SSE_FUNCTION_NAME yuv420_rgba_sse
274 #define STD_FUNCTION_NAME yuv420_rgba_std
275 #define YUV_FORMAT YUV_FORMAT_420
276 #define RGB_FORMAT RGB_FORMAT_RGBA
277 #define SSE_ALIGNED
278 #include "yuv_rgb_sse_func.h"
279 
280 #define SSE_FUNCTION_NAME yuv420_rgba_sseu
281 #define STD_FUNCTION_NAME yuv420_rgba_std
282 #define YUV_FORMAT YUV_FORMAT_420
283 #define RGB_FORMAT RGB_FORMAT_RGBA
284 #include "yuv_rgb_sse_func.h"
285 
286 #define SSE_FUNCTION_NAME yuv420_bgra_sse
287 #define STD_FUNCTION_NAME yuv420_bgra_std
288 #define YUV_FORMAT YUV_FORMAT_420
289 #define RGB_FORMAT RGB_FORMAT_BGRA
290 #define SSE_ALIGNED
291 #include "yuv_rgb_sse_func.h"
292 
293 #define SSE_FUNCTION_NAME yuv420_bgra_sseu
294 #define STD_FUNCTION_NAME yuv420_bgra_std
295 #define YUV_FORMAT YUV_FORMAT_420
296 #define RGB_FORMAT RGB_FORMAT_BGRA
297 #include "yuv_rgb_sse_func.h"
298 
299 #define SSE_FUNCTION_NAME yuv420_argb_sse
300 #define STD_FUNCTION_NAME yuv420_argb_std
301 #define YUV_FORMAT YUV_FORMAT_420
302 #define RGB_FORMAT RGB_FORMAT_ARGB
303 #define SSE_ALIGNED
304 #include "yuv_rgb_sse_func.h"
305 
306 #define SSE_FUNCTION_NAME yuv420_argb_sseu
307 #define STD_FUNCTION_NAME yuv420_argb_std
308 #define YUV_FORMAT YUV_FORMAT_420
309 #define RGB_FORMAT RGB_FORMAT_ARGB
310 #include "yuv_rgb_sse_func.h"
311 
312 #define SSE_FUNCTION_NAME yuv420_abgr_sse
313 #define STD_FUNCTION_NAME yuv420_abgr_std
314 #define YUV_FORMAT YUV_FORMAT_420
315 #define RGB_FORMAT RGB_FORMAT_ABGR
316 #define SSE_ALIGNED
317 #include "yuv_rgb_sse_func.h"
318 
319 #define SSE_FUNCTION_NAME yuv420_abgr_sseu
320 #define STD_FUNCTION_NAME yuv420_abgr_std
321 #define YUV_FORMAT YUV_FORMAT_420
322 #define RGB_FORMAT RGB_FORMAT_ABGR
323 #include "yuv_rgb_sse_func.h"
324 
325 #define SSE_FUNCTION_NAME yuv422_rgb565_sse
326 #define STD_FUNCTION_NAME yuv422_rgb565_std
327 #define YUV_FORMAT YUV_FORMAT_422
328 #define RGB_FORMAT RGB_FORMAT_RGB565
329 #define SSE_ALIGNED
330 #include "yuv_rgb_sse_func.h"
331 
332 #define SSE_FUNCTION_NAME yuv422_rgb565_sseu
333 #define STD_FUNCTION_NAME yuv422_rgb565_std
334 #define YUV_FORMAT YUV_FORMAT_422
335 #define RGB_FORMAT RGB_FORMAT_RGB565
336 #include "yuv_rgb_sse_func.h"
337 
338 #define SSE_FUNCTION_NAME yuv422_rgb24_sse
339 #define STD_FUNCTION_NAME yuv422_rgb24_std
340 #define YUV_FORMAT YUV_FORMAT_422
341 #define RGB_FORMAT RGB_FORMAT_RGB24
342 #define SSE_ALIGNED
343 #include "yuv_rgb_sse_func.h"
344 
345 #define SSE_FUNCTION_NAME yuv422_rgb24_sseu
346 #define STD_FUNCTION_NAME yuv422_rgb24_std
347 #define YUV_FORMAT YUV_FORMAT_422
348 #define RGB_FORMAT RGB_FORMAT_RGB24
349 #include "yuv_rgb_sse_func.h"
350 
351 #define SSE_FUNCTION_NAME yuv422_rgba_sse
352 #define STD_FUNCTION_NAME yuv422_rgba_std
353 #define YUV_FORMAT YUV_FORMAT_422
354 #define RGB_FORMAT RGB_FORMAT_RGBA
355 #define SSE_ALIGNED
356 #include "yuv_rgb_sse_func.h"
357 
358 #define SSE_FUNCTION_NAME yuv422_rgba_sseu
359 #define STD_FUNCTION_NAME yuv422_rgba_std
360 #define YUV_FORMAT YUV_FORMAT_422
361 #define RGB_FORMAT RGB_FORMAT_RGBA
362 #include "yuv_rgb_sse_func.h"
363 
364 #define SSE_FUNCTION_NAME yuv422_bgra_sse
365 #define STD_FUNCTION_NAME yuv422_bgra_std
366 #define YUV_FORMAT YUV_FORMAT_422
367 #define RGB_FORMAT RGB_FORMAT_BGRA
368 #define SSE_ALIGNED
369 #include "yuv_rgb_sse_func.h"
370 
371 #define SSE_FUNCTION_NAME yuv422_bgra_sseu
372 #define STD_FUNCTION_NAME yuv422_bgra_std
373 #define YUV_FORMAT YUV_FORMAT_422
374 #define RGB_FORMAT RGB_FORMAT_BGRA
375 #include "yuv_rgb_sse_func.h"
376 
377 #define SSE_FUNCTION_NAME yuv422_argb_sse
378 #define STD_FUNCTION_NAME yuv422_argb_std
379 #define YUV_FORMAT YUV_FORMAT_422
380 #define RGB_FORMAT RGB_FORMAT_ARGB
381 #define SSE_ALIGNED
382 #include "yuv_rgb_sse_func.h"
383 
384 #define SSE_FUNCTION_NAME yuv422_argb_sseu
385 #define STD_FUNCTION_NAME yuv422_argb_std
386 #define YUV_FORMAT YUV_FORMAT_422
387 #define RGB_FORMAT RGB_FORMAT_ARGB
388 #include "yuv_rgb_sse_func.h"
389 
390 #define SSE_FUNCTION_NAME yuv422_abgr_sse
391 #define STD_FUNCTION_NAME yuv422_abgr_std
392 #define YUV_FORMAT YUV_FORMAT_422
393 #define RGB_FORMAT RGB_FORMAT_ABGR
394 #define SSE_ALIGNED
395 #include "yuv_rgb_sse_func.h"
396 
397 #define SSE_FUNCTION_NAME yuv422_abgr_sseu
398 #define STD_FUNCTION_NAME yuv422_abgr_std
399 #define YUV_FORMAT YUV_FORMAT_422
400 #define RGB_FORMAT RGB_FORMAT_ABGR
401 #include "yuv_rgb_sse_func.h"
402 
403 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sse
404 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
405 #define YUV_FORMAT YUV_FORMAT_NV12
406 #define RGB_FORMAT RGB_FORMAT_RGB565
407 #define SSE_ALIGNED
408 #include "yuv_rgb_sse_func.h"
409 
410 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu
411 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
412 #define YUV_FORMAT YUV_FORMAT_NV12
413 #define RGB_FORMAT RGB_FORMAT_RGB565
414 #include "yuv_rgb_sse_func.h"
415 
416 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sse
417 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
418 #define YUV_FORMAT YUV_FORMAT_NV12
419 #define RGB_FORMAT RGB_FORMAT_RGB24
420 #define SSE_ALIGNED
421 #include "yuv_rgb_sse_func.h"
422 
423 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu
424 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
425 #define YUV_FORMAT YUV_FORMAT_NV12
426 #define RGB_FORMAT RGB_FORMAT_RGB24
427 #include "yuv_rgb_sse_func.h"
428 
429 #define SSE_FUNCTION_NAME yuvnv12_rgba_sse
430 #define STD_FUNCTION_NAME yuvnv12_rgba_std
431 #define YUV_FORMAT YUV_FORMAT_NV12
432 #define RGB_FORMAT RGB_FORMAT_RGBA
433 #define SSE_ALIGNED
434 #include "yuv_rgb_sse_func.h"
435 
436 #define SSE_FUNCTION_NAME yuvnv12_rgba_sseu
437 #define STD_FUNCTION_NAME yuvnv12_rgba_std
438 #define YUV_FORMAT YUV_FORMAT_NV12
439 #define RGB_FORMAT RGB_FORMAT_RGBA
440 #include "yuv_rgb_sse_func.h"
441 
442 #define SSE_FUNCTION_NAME yuvnv12_bgra_sse
443 #define STD_FUNCTION_NAME yuvnv12_bgra_std
444 #define YUV_FORMAT YUV_FORMAT_NV12
445 #define RGB_FORMAT RGB_FORMAT_BGRA
446 #define SSE_ALIGNED
447 #include "yuv_rgb_sse_func.h"
448 
449 #define SSE_FUNCTION_NAME yuvnv12_bgra_sseu
450 #define STD_FUNCTION_NAME yuvnv12_bgra_std
451 #define YUV_FORMAT YUV_FORMAT_NV12
452 #define RGB_FORMAT RGB_FORMAT_BGRA
453 #include "yuv_rgb_sse_func.h"
454 
455 #define SSE_FUNCTION_NAME yuvnv12_argb_sse
456 #define STD_FUNCTION_NAME yuvnv12_argb_std
457 #define YUV_FORMAT YUV_FORMAT_NV12
458 #define RGB_FORMAT RGB_FORMAT_ARGB
459 #define SSE_ALIGNED
460 #include "yuv_rgb_sse_func.h"
461 
462 #define SSE_FUNCTION_NAME yuvnv12_argb_sseu
463 #define STD_FUNCTION_NAME yuvnv12_argb_std
464 #define YUV_FORMAT YUV_FORMAT_NV12
465 #define RGB_FORMAT RGB_FORMAT_ARGB
466 #include "yuv_rgb_sse_func.h"
467 
468 #define SSE_FUNCTION_NAME yuvnv12_abgr_sse
469 #define STD_FUNCTION_NAME yuvnv12_abgr_std
470 #define YUV_FORMAT YUV_FORMAT_NV12
471 #define RGB_FORMAT RGB_FORMAT_ABGR
472 #define SSE_ALIGNED
473 #include "yuv_rgb_sse_func.h"
474 
475 #define SSE_FUNCTION_NAME yuvnv12_abgr_sseu
476 #define STD_FUNCTION_NAME yuvnv12_abgr_std
477 #define YUV_FORMAT YUV_FORMAT_NV12
478 #define RGB_FORMAT RGB_FORMAT_ABGR
479 #include "yuv_rgb_sse_func.h"
480 
481 
482 #define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
483 R1 = _mm_unpacklo_epi8(RGB1, RGB4); \
484 R2 = _mm_unpackhi_epi8(RGB1, RGB4); \
485 G1 = _mm_unpacklo_epi8(RGB2, RGB5); \
486 G2 = _mm_unpackhi_epi8(RGB2, RGB5); \
487 B1 = _mm_unpacklo_epi8(RGB3, RGB6); \
488 B2 = _mm_unpackhi_epi8(RGB3, RGB6);
489 
490 #define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
491 RGB1 = _mm_unpacklo_epi8(R1, G2); \
492 RGB2 = _mm_unpackhi_epi8(R1, G2); \
493 RGB3 = _mm_unpacklo_epi8(R2, B1); \
494 RGB4 = _mm_unpackhi_epi8(R2, B1); \
495 RGB5 = _mm_unpacklo_epi8(G1, B2); \
496 RGB6 = _mm_unpackhi_epi8(G1, B2); \
497 
498 #define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
499 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
500 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
501 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
502 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
503 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
504 
505 #define RGB2YUV_16(R, G, B, Y, U, V) \
506 Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \
507  _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \
508 Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \
509 Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \
510 Y = _mm_srai_epi16(Y, PRECISION); \
511 U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \
512  _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \
513 U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \
514 U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \
515 U = _mm_srai_epi16(U, PRECISION); \
516 V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \
517  _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \
518 V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \
519 V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \
520 V = _mm_srai_epi16(V, PRECISION);
521 
522 #define RGB2YUV_32 \
523  __m128i r1, r2, b1, b2, g1, g2; \
524  __m128i r_16, g_16, b_16; \
525  __m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \
526  __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \
527  rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \
528  rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \
529  rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \
530  rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \
531  rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \
532  /* unpack rgb24 data to r, g and b data in separate channels*/ \
533  UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
534  /* process pixels of first line */ \
535  r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
536  g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
537  b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
538  RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
539  r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
540  g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
541  b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
542  RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
543  y = _mm_packus_epi16(y1_16, y2_16); \
544  u1 = _mm_packus_epi16(u1_16, u2_16); \
545  v1 = _mm_packus_epi16(v1_16, v2_16); \
546  /* save Y values */ \
547  SAVE_SI128((__m128i*)(y_ptr1), y); \
548  /* process pixels of second line */ \
549  r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
550  g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
551  b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
552  RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
553  r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
554  g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
555  b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
556  RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
557  y = _mm_packus_epi16(y1_16, y2_16); \
558  u2 = _mm_packus_epi16(u1_16, u2_16); \
559  v2 = _mm_packus_epi16(v1_16, v2_16); \
560  /* save Y values */ \
561  SAVE_SI128((__m128i*)(y_ptr2), y); \
562  /* vertical subsampling of u/v values */ \
563  u1_tmp = _mm_avg_epu8(u1, u2); \
564  v1_tmp = _mm_avg_epu8(v1, v2); \
565  /* do the same again with next data */ \
566  rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \
567  rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \
568  rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \
569  rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \
570  rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \
571  rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \
572  /* unpack rgb24 data to r, g and b data in separate channels*/ \
573  UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
574  /* process pixels of first line */ \
575  r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
576  g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
577  b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
578  RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
579  r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
580  g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
581  b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
582  RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
583  y = _mm_packus_epi16(y1_16, y2_16); \
584  u1 = _mm_packus_epi16(u1_16, u2_16); \
585  v1 = _mm_packus_epi16(v1_16, v2_16); \
586  /* save Y values */ \
587  SAVE_SI128((__m128i*)(y_ptr1+16), y); \
588  /* process pixels of second line */ \
589  r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
590  g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
591  b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
592  RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
593  r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
594  g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
595  b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
596  RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
597  y = _mm_packus_epi16(y1_16, y2_16); \
598  u2 = _mm_packus_epi16(u1_16, u2_16); \
599  v2 = _mm_packus_epi16(v1_16, v2_16); \
600  /* save Y values */ \
601  SAVE_SI128((__m128i*)(y_ptr2+16), y); \
602  /* vertical subsampling of u/v values */ \
603  u2_tmp = _mm_avg_epu8(u1, u2); \
604  v2_tmp = _mm_avg_epu8(v1, v2); \
605  /* horizontal subsampling of u/v values */ \
606  u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \
607  v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \
608  u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \
609  v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \
610  u1 = _mm_avg_epu8(u1, u2); \
611  v1 = _mm_avg_epu8(v1, v2); \
612  SAVE_SI128((__m128i*)(u_ptr), u1); \
613  SAVE_SI128((__m128i*)(v_ptr), v1);
614 
616  const uint8_t *RGB, uint32_t RGB_stride,
617  uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
618  YCbCrType yuv_type)
619 {
620  #define LOAD_SI128 _mm_load_si128
621  #define SAVE_SI128 _mm_stream_si128
622  const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
623 
624  uint32_t xpos, ypos;
625  for(ypos=0; ypos<(height-1); ypos+=2)
626  {
627  const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
628  *rgb_ptr2=RGB+(ypos+1)*RGB_stride;
629 
630  uint8_t *y_ptr1=Y+ypos*Y_stride,
631  *y_ptr2=Y+(ypos+1)*Y_stride,
632  *u_ptr=U+(ypos/2)*UV_stride,
633  *v_ptr=V+(ypos/2)*UV_stride;
634 
635  for(xpos=0; xpos<(width-31); xpos+=32)
636  {
637  RGB2YUV_32
638 
639  rgb_ptr1+=96;
640  rgb_ptr2+=96;
641  y_ptr1+=32;
642  y_ptr2+=32;
643  u_ptr+=16;
644  v_ptr+=16;
645  }
646  }
647  #undef LOAD_SI128
648  #undef SAVE_SI128
649 }
650 
652  const uint8_t *RGB, uint32_t RGB_stride,
653  uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
654  YCbCrType yuv_type)
655 {
656  #define LOAD_SI128 _mm_loadu_si128
657  #define SAVE_SI128 _mm_storeu_si128
658  const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
659 
660  uint32_t xpos, ypos;
661  for(ypos=0; ypos<(height-1); ypos+=2)
662  {
663  const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
664  *rgb_ptr2=RGB+(ypos+1)*RGB_stride;
665 
666  uint8_t *y_ptr1=Y+ypos*Y_stride,
667  *y_ptr2=Y+(ypos+1)*Y_stride,
668  *u_ptr=U+(ypos/2)*UV_stride,
669  *v_ptr=V+(ypos/2)*UV_stride;
670 
671  for(xpos=0; xpos<(width-31); xpos+=32)
672  {
673  RGB2YUV_32
674 
675  rgb_ptr1+=96;
676  rgb_ptr2+=96;
677  y_ptr1+=32;
678  y_ptr2+=32;
679  u_ptr+=16;
680  v_ptr+=16;
681  }
682  }
683  #undef LOAD_SI128
684  #undef SAVE_SI128
685 }
686 
687 
688 #endif //__SSE2__
689 
690 #endif /* SDL_HAVE_YUV */
YUV2RGBParam::u_g_factor
int16_t u_g_factor
Definition: yuv_rgb.c:29
YUV2RGB
static const YUV2RGBParam YUV2RGB[3]
Definition: yuv_rgb.c:44
YUV2RGBParam
Definition: yuv_rgb.c:25
clampU8
static uint8_t clampU8(int32_t v)
Definition: yuv_rgb.c:77
YUV2RGBParam::v_g_factor
int16_t v_g_factor
Definition: yuv_rgb.c:30
width
GLint GLint GLsizei width
Definition: SDL_opengl.h:1572
rgb24_yuv420_std
void rgb24_yuv420_std(uint32_t width, uint32_t height, const uint8_t *RGB, uint32_t RGB_stride, uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, YCbCrType yuv_type)
Definition: yuv_rgb.c:188
yuv_rgb_std_func.h
yuv_rgb.h
YUV2RGBParam::y_shift
uint8_t y_shift
Definition: yuv_rgb.c:26
RGB
@ RGB
Definition: edid.h:20
v
const GLdouble * v
Definition: SDL_opengl.h:2064
RGB2YUV
static const RGB2YUVParam RGB2YUV[3]
Definition: yuv_rgb.c:53
RGB2YUVParam
Definition: yuv_rgb.c:16
V
#define V(value)
Definition: yuv_rgb.c:37
YUV2RGBParam::v_r_factor
int16_t v_r_factor
Definition: yuv_rgb.c:28
x
GLint GLint GLint GLint GLint x
Definition: SDL_opengl.h:1574
RGB2YUVParam::y_shift
uint8_t y_shift
Definition: yuv_rgb.c:17
int32_t
signed int int32_t
Definition: SDL_config_windows.h:62
height
GLint GLint GLsizei GLsizei height
Definition: SDL_opengl.h:1572
SDL_cpuinfo.h
param
GLfloat param
Definition: SDL_opengl_glext.h:373
YUV2RGBParam::y_factor
int16_t y_factor
Definition: yuv_rgb.c:27
int16_t
signed short int16_t
Definition: SDL_config_windows.h:60
yuv_rgb_sse_func.h
y
GLint GLint GLint GLint GLint GLint y
Definition: SDL_opengl.h:1574
PRECISION
#define PRECISION
Definition: yuv_rgb.c:12
rgb24_yuv420_sse
void rgb24_yuv420_sse(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
uint32_t
unsigned int uint32_t
Definition: SDL_config_windows.h:63
uint8_t
unsigned char uint8_t
Definition: SDL_config_windows.h:59
Y
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp if else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if SINGLE_SCANLINE ifc b endif else if vars_spilled word LINE_SAVED_REGS endif subs Y
Definition: pixman-arm-simd-asm.h:554
YCbCrType
YCbCrType
Definition: yuv_rgb.h:23
matrix
GLuint GLenum matrix
Definition: SDL_opengl_glext.h:9999
rgb24_yuv420_sseu
void rgb24_yuv420_sseu(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
YUV2RGBParam::u_b_factor
int16_t u_b_factor
Definition: yuv_rgb.c:31
PRECISION_FACTOR
#define PRECISION_FACTOR
Definition: yuv_rgb.c:13