imgresample.c 29.1 KB
Newer Older
glantau's avatar
glantau committed
1
/*
2
 * High quality image resampling with polyphase filters
glantau's avatar
glantau committed
3
 * Copyright (c) 2001 Fabrice Bellard.
glantau's avatar
glantau committed
4
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
glantau's avatar
glantau committed
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
glantau's avatar
glantau committed
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
glantau's avatar
glantau committed
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
glantau's avatar
glantau committed
14 15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
glantau's avatar
glantau committed
16
 *
glantau's avatar
glantau committed
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
glantau's avatar
glantau committed
20
 */
21

michaelni's avatar
michaelni committed
22 23 24 25
/**
 * @file imgresample.c
 * High quality image resampling with polyphase filters .
 */
26

glantau's avatar
glantau committed
27
#include "avcodec.h"
28
#include "swscale.h"
29
#include "dsputil.h"
glantau's avatar
glantau committed
30 31 32 33 34 35 36

#define NB_COMPONENTS 3

#define PHASE_BITS 4
#define NB_PHASES  (1 << PHASE_BITS)
#define NB_TAPS    4
#define FCENTER    1  /* index of the center of the filter */
37
//#define TEST    1  /* Test it */
glantau's avatar
glantau committed
38 39 40 41 42 43 44 45

#define POS_FRAC_BITS 16
#define POS_FRAC      (1 << POS_FRAC_BITS)
/* 6 bits precision is needed for MMX */
#define FILTER_BITS   8

#define LINE_BUF_HEIGHT (NB_TAPS * 4)

46
struct SwsContext {
47
    AVClass *av_class;
48 49 50 51
    struct ImgReSampleContext *resampling_ctx;
    enum PixelFormat src_pix_fmt, dst_pix_fmt;
};

glantau's avatar
glantau committed
52
struct ImgReSampleContext {
53 54 55 56
    int iwidth, iheight, owidth, oheight;
    int topBand, bottomBand, leftBand, rightBand;
    int padtop, padbottom, padleft, padright;
    int pad_owidth, pad_oheight;
glantau's avatar
glantau committed
57
    int h_incr, v_incr;
58 59
    DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
    DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
kabi's avatar
kabi committed
60
    uint8_t *line_buf;
glantau's avatar
glantau committed
61 62
};

63 64
void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);

glantau's avatar
glantau committed
65 66 67 68 69 70
static inline int get_phase(int pos)
{
    return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
}

/* This function must be optimized */
71
static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
72 73
                            int src_width, int src_start, int src_incr,
                            int16_t *filters)
glantau's avatar
glantau committed
74 75
{
    int src_pos, phase, sum, i;
76
    const uint8_t *s;
kabi's avatar
kabi committed
77
    int16_t *filter;
glantau's avatar
glantau committed
78 79 80 81 82 83 84

    src_pos = src_start;
    for(i=0;i<dst_width;i++) {
#ifdef TEST
        /* test */
        if ((src_pos >> POS_FRAC_BITS) < 0 ||
            (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
85
            av_abort();
glantau's avatar
glantau committed
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
#endif
        s = src + (src_pos >> POS_FRAC_BITS);
        phase = get_phase(src_pos);
        filter = filters + phase * NB_TAPS;
#if NB_TAPS == 4
        sum = s[0] * filter[0] +
            s[1] * filter[1] +
            s[2] * filter[2] +
            s[3] * filter[3];
#else
        {
            int j;
            sum = 0;
            for(j=0;j<NB_TAPS;j++)
                sum += s[j] * filter[j];
        }
#endif
        sum = sum >> FILTER_BITS;
        if (sum < 0)
            sum = 0;
        else if (sum > 255)
            sum = 255;
        dst[0] = sum;
        src_pos += src_incr;
        dst++;
    }
}

/* This function must be optimized */
115
static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
116
                       int wrap, int16_t *filter)
glantau's avatar
glantau committed
117 118
{
    int sum, i;
119
    const uint8_t *s;
glantau's avatar
glantau committed
120 121 122 123 124 125 126 127 128 129 130

    s = src;
    for(i=0;i<dst_width;i++) {
#if NB_TAPS == 4
        sum = s[0 * wrap] * filter[0] +
            s[1 * wrap] * filter[1] +
            s[2 * wrap] * filter[2] +
            s[3 * wrap] * filter[3];
#else
        {
            int j;
kabi's avatar
kabi committed
131
            uint8_t *s1 = s;
glantau's avatar
glantau committed
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150

            sum = 0;
            for(j=0;j<NB_TAPS;j++) {
                sum += s1[0] * filter[j];
                s1 += wrap;
            }
        }
#endif
        sum = sum >> FILTER_BITS;
        if (sum < 0)
            sum = 0;
        else if (sum > 255)
            sum = 255;
        dst[0] = sum;
        dst++;
        s++;
    }
}

151
#ifdef HAVE_MMX
glantau's avatar
glantau committed
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170

#include "i386/mmx.h"

#define FILTER4(reg) \
{\
        s = src + (src_pos >> POS_FRAC_BITS);\
        phase = get_phase(src_pos);\
        filter = filters + phase * NB_TAPS;\
        movq_m2r(*s, reg);\
        punpcklbw_r2r(mm7, reg);\
        movq_m2r(*filter, mm6);\
        pmaddwd_r2r(reg, mm6);\
        movq_r2r(mm6, reg);\
        psrlq_i2r(32, reg);\
        paddd_r2r(mm6, reg);\
        psrad_i2r(FILTER_BITS, reg);\
        src_pos += src_incr;\
}

171
#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
glantau's avatar
glantau committed
172 173

/* XXX: do four pixels at a time */
174
static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
175
                                 const uint8_t *src, int src_width,
kabi's avatar
kabi committed
176
                                 int src_start, int src_incr, int16_t *filters)
glantau's avatar
glantau committed
177 178
{
    int src_pos, phase;
179
    const uint8_t *s;
kabi's avatar
kabi committed
180
    int16_t *filter;
glantau's avatar
glantau committed
181
    mmx_t tmp;
182

glantau's avatar
glantau committed
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
    src_pos = src_start;
    pxor_r2r(mm7, mm7);

    while (dst_width >= 4) {

        FILTER4(mm0);
        FILTER4(mm1);
        FILTER4(mm2);
        FILTER4(mm3);

        packuswb_r2r(mm7, mm0);
        packuswb_r2r(mm7, mm1);
        packuswb_r2r(mm7, mm3);
        packuswb_r2r(mm7, mm2);
        movq_r2m(mm0, tmp);
        dst[0] = tmp.ub[0];
        movq_r2m(mm1, tmp);
        dst[1] = tmp.ub[0];
        movq_r2m(mm2, tmp);
        dst[2] = tmp.ub[0];
        movq_r2m(mm3, tmp);
        dst[3] = tmp.ub[0];
        dst += 4;
        dst_width -= 4;
    }
    while (dst_width > 0) {
        FILTER4(mm0);
        packuswb_r2r(mm7, mm0);
        movq_r2m(mm0, tmp);
        dst[0] = tmp.ub[0];
        dst++;
        dst_width--;
    }
    emms();
}

219
static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
220
                            int wrap, int16_t *filter)
glantau's avatar
glantau committed
221 222
{
    int sum, i, v;
223
    const uint8_t *s;
glantau's avatar
glantau committed
224 225
    mmx_t tmp;
    mmx_t coefs[4];
226

glantau's avatar
glantau committed
227 228 229 230 231 232 233
    for(i=0;i<4;i++) {
        v = filter[i];
        coefs[i].uw[0] = v;
        coefs[i].uw[1] = v;
        coefs[i].uw[2] = v;
        coefs[i].uw[3] = v;
    }
234

glantau's avatar
glantau committed
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
    pxor_r2r(mm7, mm7);
    s = src;
    while (dst_width >= 4) {
        movq_m2r(s[0 * wrap], mm0);
        punpcklbw_r2r(mm7, mm0);
        movq_m2r(s[1 * wrap], mm1);
        punpcklbw_r2r(mm7, mm1);
        movq_m2r(s[2 * wrap], mm2);
        punpcklbw_r2r(mm7, mm2);
        movq_m2r(s[3 * wrap], mm3);
        punpcklbw_r2r(mm7, mm3);

        pmullw_m2r(coefs[0], mm0);
        pmullw_m2r(coefs[1], mm1);
        pmullw_m2r(coefs[2], mm2);
        pmullw_m2r(coefs[3], mm3);

        paddw_r2r(mm1, mm0);
        paddw_r2r(mm3, mm2);
        paddw_r2r(mm2, mm0);
        psraw_i2r(FILTER_BITS, mm0);
256

glantau's avatar
glantau committed
257 258 259
        packuswb_r2r(mm7, mm0);
        movq_r2m(mm0, tmp);

kabi's avatar
kabi committed
260
        *(uint32_t *)dst = tmp.ud[0];
glantau's avatar
glantau committed
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
        dst += 4;
        s += 4;
        dst_width -= 4;
    }
    while (dst_width > 0) {
        sum = s[0 * wrap] * filter[0] +
            s[1 * wrap] * filter[1] +
            s[2 * wrap] * filter[2] +
            s[3 * wrap] * filter[3];
        sum = sum >> FILTER_BITS;
        if (sum < 0)
            sum = 0;
        else if (sum > 255)
            sum = 255;
        dst[0] = sum;
        dst++;
        s++;
        dst_width--;
    }
    emms();
}
282
#endif /* HAVE_MMX */
glantau's avatar
glantau committed
283

284
#ifdef HAVE_ALTIVEC
285
typedef         union {
286 287 288 289
    vector unsigned char v;
    unsigned char c[16];
} vec_uc_t;

290
typedef         union {
291 292 293 294
    vector signed short v;
    signed short s[8];
} vec_ss_t;

295
void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
296
                          int wrap, int16_t *filter)
297 298
{
    int sum, i;
299
    const uint8_t *s;
300 301
    vector unsigned char *tv, tmp, dstv, zero;
    vec_ss_t srchv[4], srclv[4], fv[4];
302
    vector signed short zeros, sumhv, sumlv;
303 304 305 306 307 308 309 310 311 312 313 314 315
    s = src;

    for(i=0;i<4;i++)
    {
        /*
           The vec_madds later on does an implicit >>15 on the result.
           Since FILTER_BITS is 8, and we have 15 bits of magnitude in
           a signed short, we have just enough bits to pre-shift our
           filter constants <<7 to compensate for vec_madds.
        */
        fv[i].s[0] = filter[i] << (15-FILTER_BITS);
        fv[i].v = vec_splat(fv[i].v, 0);
    }
316

317 318 319 320 321 322 323 324 325 326 327
    zero = vec_splat_u8(0);
    zeros = vec_splat_s16(0);


    /*
       When we're resampling, we'd ideally like both our input buffers,
       and output buffers to be 16-byte aligned, so we can do both aligned
       reads and writes. Sadly we can't always have this at the moment, so
       we opt for aligned writes, as unaligned writes have a huge overhead.
       To do this, do enough scalar resamples to get dst 16-byte aligned.
    */
328
    i = (-(int)dst) & 0xf;
329 330 331 332 333 334 335 336 337 338 339 340 341
    while(i>0) {
        sum = s[0 * wrap] * filter[0] +
        s[1 * wrap] * filter[1] +
        s[2 * wrap] * filter[2] +
        s[3 * wrap] * filter[3];
        sum = sum >> FILTER_BITS;
        if (sum<0) sum = 0; else if (sum>255) sum=255;
        dst[0] = sum;
        dst++;
        s++;
        dst_width--;
        i--;
    }
342

343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
    /* Do our altivec resampling on 16 pixels at once. */
    while(dst_width>=16) {
        /*
           Read 16 (potentially unaligned) bytes from each of
           4 lines into 4 vectors, and split them into shorts.
           Interleave the multipy/accumulate for the resample
           filter with the loads to hide the 3 cycle latency
           the vec_madds have.
        */
        tv = (vector unsigned char *) &s[0 * wrap];
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
        srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
        srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
        sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
        sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);

        tv = (vector unsigned char *) &s[1 * wrap];
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
        srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
        srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
        sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
        sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);

        tv = (vector unsigned char *) &s[2 * wrap];
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
        srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
        srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
        sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
        sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);

        tv = (vector unsigned char *) &s[3 * wrap];
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
        srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
        srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
        sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
        sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
379

380 381 382 383 384 385
        /*
           Pack the results into our destination vector,
           and do an aligned write of that back to memory.
        */
        dstv = vec_packsu(sumhv, sumlv) ;
        vec_st(dstv, 0, (vector unsigned char *) dst);
386

387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
        dst+=16;
        s+=16;
        dst_width-=16;
    }

    /*
       If there are any leftover pixels, resample them
       with the slow scalar method.
    */
    while(dst_width>0) {
        sum = s[0 * wrap] * filter[0] +
        s[1 * wrap] * filter[1] +
        s[2 * wrap] * filter[2] +
        s[3 * wrap] * filter[3];
        sum = sum >> FILTER_BITS;
        if (sum<0) sum = 0; else if (sum>255) sum=255;
        dst[0] = sum;
        dst++;
        s++;
        dst_width--;
    }
}
409
#endif /* HAVE_ALTIVEC */
410

glantau's avatar
glantau committed
411
/* slow version to handle limit cases. Does not need optimisation */
412
static void h_resample_slow(uint8_t *dst, int dst_width,
413
                            const uint8_t *src, int src_width,
kabi's avatar
kabi committed
414
                            int src_start, int src_incr, int16_t *filters)
glantau's avatar
glantau committed
415 416
{
    int src_pos, phase, sum, j, v, i;
417
    const uint8_t *s, *src_end;
kabi's avatar
kabi committed
418
    int16_t *filter;
glantau's avatar
glantau committed
419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447

    src_end = src + src_width;
    src_pos = src_start;
    for(i=0;i<dst_width;i++) {
        s = src + (src_pos >> POS_FRAC_BITS);
        phase = get_phase(src_pos);
        filter = filters + phase * NB_TAPS;
        sum = 0;
        for(j=0;j<NB_TAPS;j++) {
            if (s < src)
                v = src[0];
            else if (s >= src_end)
                v = src_end[-1];
            else
                v = s[0];
            sum += v * filter[j];
            s++;
        }
        sum = sum >> FILTER_BITS;
        if (sum < 0)
            sum = 0;
        else if (sum > 255)
            sum = 255;
        dst[0] = sum;
        src_pos += src_incr;
        dst++;
    }
}

448
static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
449 450
                       int src_width, int src_start, int src_incr,
                       int16_t *filters)
glantau's avatar
glantau committed
451 452 453 454 455 456 457 458 459 460 461 462
{
    int n, src_end;

    if (src_start < 0) {
        n = (0 - src_start + src_incr - 1) / src_incr;
        h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
        dst += n;
        dst_width -= n;
        src_start += n * src_incr;
    }
    src_end = src_start + dst_width * src_incr;
    if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
463
        n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
glantau's avatar
glantau committed
464 465 466 467
            src_incr;
    } else {
        n = dst_width;
    }
468
#ifdef HAVE_MMX
mru's avatar
mru committed
469
    if ((mm_flags & MM_MMX) && NB_TAPS == 4)
470
        h_resample_fast4_mmx(dst, n,
glantau's avatar
glantau committed
471 472 473
                             src, src_width, src_start, src_incr, filters);
    else
#endif
474
        h_resample_fast(dst, n,
glantau's avatar
glantau committed
475 476 477 478 479
                        src, src_width, src_start, src_incr, filters);
    if (n < dst_width) {
        dst += n;
        dst_width -= n;
        src_start += n * src_incr;
480
        h_resample_slow(dst, dst_width,
glantau's avatar
glantau committed
481 482 483 484
                        src, src_width, src_start, src_incr, filters);
    }
}

485
static void component_resample(ImgReSampleContext *s,
kabi's avatar
kabi committed
486 487
                               uint8_t *output, int owrap, int owidth, int oheight,
                               uint8_t *input, int iwrap, int iwidth, int iheight)
glantau's avatar
glantau committed
488 489
{
    int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
kabi's avatar
kabi committed
490
    uint8_t *new_line, *src_line;
glantau's avatar
glantau committed
491 492 493

    last_src_y = - FCENTER - 1;
    /* position of the bottom of the filter in the source image */
494
    src_y = (last_src_y + NB_TAPS) * POS_FRAC;
glantau's avatar
glantau committed
495 496 497 498 499 500 501 502
    ring_y = NB_TAPS; /* position in ring buffer */
    for(y=0;y<oheight;y++) {
        /* apply horizontal filter on new lines from input if needed */
        src_y1 = src_y >> POS_FRAC_BITS;
        while (last_src_y < src_y1) {
            if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
                ring_y = NB_TAPS;
            last_src_y++;
503 504
            /* handle limit conditions : replicate line (slightly
               inefficient because we filter multiple times) */
glantau's avatar
glantau committed
505 506 507 508 509 510 511 512 513
            y1 = last_src_y;
            if (y1 < 0) {
                y1 = 0;
            } else if (y1 >= iheight) {
                y1 = iheight - 1;
            }
            src_line = input + y1 * iwrap;
            new_line = s->line_buf + ring_y * owidth;
            /* apply filter and handle limit cases correctly */
514 515
            h_resample(new_line, owidth,
                       src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
glantau's avatar
glantau committed
516 517 518 519 520 521 522 523 524
                       &s->h_filters[0][0]);
            /* handle ring buffer wraping */
            if (ring_y >= LINE_BUF_HEIGHT) {
                memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
                       new_line, owidth);
            }
        }
        /* apply vertical filter */
        phase_y = get_phase(src_y);
525
#ifdef HAVE_MMX
glantau's avatar
glantau committed
526
        /* desactivated MMX because loss of precision */
mru's avatar
mru committed
527
        if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
528 529
            v_resample4_mmx(output, owidth,
                            s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
glantau's avatar
glantau committed
530
                            &s->v_filters[phase_y][0]);
531 532 533
        else
#endif
#ifdef HAVE_ALTIVEC
mru's avatar
mru committed
534
            if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
535 536 537
                v_resample16_altivec(output, owidth,
                                s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
                                &s->v_filters[phase_y][0]);
glantau's avatar
glantau committed
538 539
        else
#endif
540 541
            v_resample(output, owidth,
                       s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
glantau's avatar
glantau committed
542
                       &s->v_filters[phase_y][0]);
543

glantau's avatar
glantau committed
544
        src_y += s->v_incr;
545

glantau's avatar
glantau committed
546 547 548 549 550 551
        output += owrap;
    }
}

ImgReSampleContext *img_resample_init(int owidth, int oheight,
                                      int iwidth, int iheight)
552
{
553
    return img_resample_full_init(owidth, oheight, iwidth, iheight,
554
            0, 0, 0, 0, 0, 0, 0, 0);
555 556 557 558 559
}

ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
                                      int iwidth, int iheight,
                                      int topBand, int bottomBand,
560 561 562
        int leftBand, int rightBand,
        int padtop, int padbottom,
        int padleft, int padright)
glantau's avatar
glantau committed
563 564 565
{
    ImgReSampleContext *s;

566
    if (!owidth || !oheight || !iwidth || !iheight)
567
        return NULL;
568

glantau's avatar
glantau committed
569 570 571
    s = av_mallocz(sizeof(ImgReSampleContext));
    if (!s)
        return NULL;
572 573
    if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
        return NULL;
glantau's avatar
glantau committed
574
    s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
575
    if (!s->line_buf)
glantau's avatar
glantau committed
576
        goto fail;
577

glantau's avatar
glantau committed
578 579 580 581
    s->owidth = owidth;
    s->oheight = oheight;
    s->iwidth = iwidth;
    s->iheight = iheight;
582

583 584 585 586
    s->topBand = topBand;
    s->bottomBand = bottomBand;
    s->leftBand = leftBand;
    s->rightBand = rightBand;
587

588 589 590 591 592 593 594 595 596
    s->padtop = padtop;
    s->padbottom = padbottom;
    s->padleft = padleft;
    s->padright = padright;

    s->pad_owidth = owidth - (padleft + padright);
    s->pad_oheight = oheight - (padtop + padbottom);

    s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
597
    s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
598

599
    av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth  /
600
            (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
601
    av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
602
            (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
glantau's avatar
glantau committed
603 604

    return s;
605
fail:
606
    av_free(s);
glantau's avatar
glantau committed
607 608 609
    return NULL;
}

610
void img_resample(ImgReSampleContext *s,
611
                  AVPicture *output, const AVPicture *input)
glantau's avatar
glantau committed
612 613
{
    int i, shift;
614
    uint8_t* optr;
glantau's avatar
glantau committed
615

616
    for (i=0;i<3;i++) {
glantau's avatar
glantau committed
617
        shift = (i == 0) ? 0 : 1;
618

619
        optr = output->data[i] + (((output->linesize[i] *
620 621
                        s->padtop) + s->padleft) >> shift);

622
        component_resample(s, optr, output->linesize[i],
623
                s->pad_owidth >> shift, s->pad_oheight >> shift,
624
                input->data[i] + (input->linesize[i] *
625
                    (s->topBand >> shift)) + (s->leftBand >> shift),
626
                input->linesize[i], ((s->iwidth - s->leftBand -
627
                        s->rightBand) >> shift),
628
                           (s->iheight - s->topBand - s->bottomBand) >> shift);
glantau's avatar
glantau committed
629 630 631 632 633
    }
}

void img_resample_close(ImgReSampleContext *s)
{
634 635
    av_free(s->line_buf);
    av_free(s);
glantau's avatar
glantau committed
636 637
}

638 639 640 641 642 643 644 645
struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
                                  int dstW, int dstH, int dstFormat,
                                  int flags, SwsFilter *srcFilter,
                                  SwsFilter *dstFilter, double *param)
{
    struct SwsContext *ctx;

    ctx = av_malloc(sizeof(struct SwsContext));
646 647
    ctx->av_class = av_mallocz(sizeof(AVClass));
    if (!ctx || !ctx->av_class) {
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672
        av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");

        return NULL;
    }

    if ((srcH != dstH) || (srcW != dstW)) {
        if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
            av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
        }
        ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
    } else {
        ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
        ctx->resampling_ctx->iheight = srcH;
        ctx->resampling_ctx->iwidth = srcW;
        ctx->resampling_ctx->oheight = dstH;
        ctx->resampling_ctx->owidth = dstW;
    }
    ctx->src_pix_fmt = srcFormat;
    ctx->dst_pix_fmt = dstFormat;

    return ctx;
}

void sws_freeContext(struct SwsContext *ctx)
{
673 674
    if (!ctx)
        return;
675 676 677 678 679 680
    if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
        (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
        img_resample_close(ctx->resampling_ctx);
    } else {
        av_free(ctx->resampling_ctx);
    }
681
    av_free(ctx->av_class);
682 683 684
    av_free(ctx);
}

685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720

/**
 * Checks if context is valid or reallocs a new one instead.
 * If context is NULL, just calls sws_getContext() to get a new one.
 * Otherwise, checks if the parameters are the same already saved in context.
 * If that is the case, returns the current context.
 * Otherwise, frees context and gets a new one.
 *
 * Be warned that srcFilter, dstFilter are not checked, they are
 * asumed to remain valid.
 */
struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
                        int srcW, int srcH, int srcFormat,
                        int dstW, int dstH, int dstFormat, int flags,
                        SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
{
    if (ctx != NULL) {
        if ((ctx->resampling_ctx->iwidth != srcW) ||
                        (ctx->resampling_ctx->iheight != srcH) ||
                        (ctx->src_pix_fmt != srcFormat) ||
                        (ctx->resampling_ctx->owidth != dstW) ||
                        (ctx->resampling_ctx->oheight != dstH) ||
                        (ctx->dst_pix_fmt != dstFormat))
        {
            sws_freeContext(ctx);
            ctx = NULL;
        }
    }
    if (ctx == NULL) {
        return sws_getContext(srcW, srcH, srcFormat,
                        dstW, dstH, dstFormat, flags,
                        srcFilter, dstFilter, param);
    }
    return ctx;
}

721 722 723 724 725 726 727 728 729 730
int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
              int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
{
    AVPicture src_pict, dst_pict;
    int i, res = 0;
    AVPicture picture_format_temp;
    AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
    uint8_t *buf1 = NULL, *buf2 = NULL;
    enum PixelFormat current_pix_fmt;

lucabe's avatar
lucabe committed
731
    for (i = 0; i < 4; i++) {
732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802
        src_pict.data[i] = src[i];
        src_pict.linesize[i] = srcStride[i];
        dst_pict.data[i] = dst[i];
        dst_pict.linesize[i] = dstStride[i];
    }
    if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
        (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
        /* We have to rescale the picture, but only YUV420P rescaling is supported... */

        if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
            int size;

            /* create temporary picture for rescaling input*/
            size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
            buf1 = av_malloc(size);
            if (!buf1) {
                res = -1;
                goto the_end;
            }
            formatted_picture = &picture_format_temp;
            avpicture_fill((AVPicture*)formatted_picture, buf1,
                           PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);

            if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
                            &src_pict, ctx->src_pix_fmt,
                            ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {

                av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
                res = -1;
                goto the_end;
            }
        } else {
            formatted_picture = &src_pict;
        }

        if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
            int size;

            /* create temporary picture for rescaling output*/
            size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
            buf2 = av_malloc(size);
            if (!buf2) {
                res = -1;
                goto the_end;
            }
            resampled_picture = &picture_resample_temp;
            avpicture_fill((AVPicture*)resampled_picture, buf2,
                           PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);

        } else {
            resampled_picture = &dst_pict;
        }

        /* ...and finally rescale!!! */
        img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
        current_pix_fmt = PIX_FMT_YUV420P;
    } else {
        resampled_picture = &src_pict;
        current_pix_fmt = ctx->src_pix_fmt;
    }

    if (current_pix_fmt != ctx->dst_pix_fmt) {
        if (img_convert(&dst_pict, ctx->dst_pix_fmt,
                        resampled_picture, current_pix_fmt,
                        ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {

            av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");

            res = -1;
            goto the_end;
        }
lucabe's avatar
lucabe committed
803
    } else if (resampled_picture != &dst_pict) {
804
        av_picture_copy(&dst_pict, resampled_picture, current_pix_fmt,
lucabe's avatar
lucabe committed
805
                        ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
806 807 808 809 810 811 812 813 814
    }

the_end:
    av_free(buf1);
    av_free(buf2);
    return res;
}


glantau's avatar
glantau committed
815
#ifdef TEST
816
#include <stdio.h>
diego's avatar
diego committed
817
#undef exit
818

glantau's avatar
glantau committed
819 820 821
/* input */
#define XSIZE 256
#define YSIZE 256
kabi's avatar
kabi committed
822
uint8_t img[XSIZE * YSIZE];
glantau's avatar
glantau committed
823 824 825 826

/* output */
#define XSIZE1 512
#define YSIZE1 512
kabi's avatar
kabi committed
827 828
uint8_t img1[XSIZE1 * YSIZE1];
uint8_t img2[XSIZE1 * YSIZE1];
glantau's avatar
glantau committed
829

kabi's avatar
kabi committed
830
void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
glantau's avatar
glantau committed
831
{
832
#undef fprintf
glantau's avatar
glantau committed
833 834 835 836 837
    FILE *f;
    f=fopen(filename,"w");
    fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
    fwrite(img,1, xsize * ysize,f);
    fclose(f);
838
#define fprintf please_use_av_log
glantau's avatar
glantau committed
839 840
}

kabi's avatar
kabi committed
841
static void dump_filter(int16_t *filter)
glantau's avatar
glantau committed
842 843 844 845
{
    int i, ph;

    for(ph=0;ph<NB_PHASES;ph++) {
846
        av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
glantau's avatar
glantau committed
847
        for(i=0;i<NB_TAPS;i++) {
848
            av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
glantau's avatar
glantau committed
849
        }
850
        av_log(NULL, AV_LOG_INFO, "\n");
glantau's avatar
glantau committed
851 852 853
    }
}

854
#ifdef HAVE_MMX
michaelni's avatar
michaelni committed
855
int mm_flags;
glantau's avatar
glantau committed
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875
#endif

int main(int argc, char **argv)
{
    int x, y, v, i, xsize, ysize;
    ImgReSampleContext *s;
    float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
    char buf[256];

    /* build test image */
    for(y=0;y<YSIZE;y++) {
        for(x=0;x<XSIZE;x++) {
            if (x < XSIZE/2 && y < YSIZE/2) {
                if (x < XSIZE/4 && y < YSIZE/4) {
                    if ((x % 10) <= 6 &&
                        (y % 10) <= 6)
                        v = 0xff;
                    else
                        v = 0x00;
                } else if (x < XSIZE/4) {
876
                    if (x & 1)
glantau's avatar
glantau committed
877
                        v = 0xff;
878
                    else
glantau's avatar
glantau committed
879 880
                        v = 0;
                } else if (y < XSIZE/4) {
881
                    if (y & 1)
glantau's avatar
glantau committed
882
                        v = 0xff;
883
                    else
glantau's avatar
glantau committed
884 885 886
                        v = 0;
                } else {
                    if (y < YSIZE*3/8) {
887
                        if ((y+x) & 1)
glantau's avatar
glantau committed
888
                            v = 0xff;
889
                        else
glantau's avatar
glantau committed
890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905
                            v = 0;
                    } else {
                        if (((x+3) % 4) <= 1 &&
                            ((y+3) % 4) <= 1)
                            v = 0xff;
                        else
                            v = 0x00;
                    }
                }
            } else if (x < XSIZE/2) {
                v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
            } else if (y < XSIZE/2) {
                v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
            } else {
                v = ((x + y - XSIZE) * 255) / XSIZE;
            }
906
            img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
glantau's avatar
glantau committed
907 908 909 910 911 912
        }
    }
    save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
    for(i=0;i<sizeof(factors)/sizeof(float);i++) {
        fact = factors[i];
        xsize = (int)(XSIZE * fact);
913
        ysize = (int)((YSIZE - 100) * fact);
914 915
        s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
        av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
glantau's avatar
glantau committed
916 917
        dump_filter(&s->h_filters[0][0]);
        component_resample(s, img1, xsize, xsize, ysize,
918
                           img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
glantau's avatar
glantau committed
919 920
        img_resample_close(s);

michael's avatar
michael committed
921
        snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
glantau's avatar
glantau committed
922 923 924 925
        save_pgm(buf, img1, xsize, ysize);
    }

    /* mmx test */
926
#ifdef HAVE_MMX
927
    av_log(NULL, AV_LOG_INFO, "MMX test\n");
glantau's avatar
glantau committed
928 929 930 931 932 933 934 935 936 937 938 939 940
    fact = 0.72;
    xsize = (int)(XSIZE * fact);
    ysize = (int)(YSIZE * fact);
    mm_flags = MM_MMX;
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
    component_resample(s, img1, xsize, xsize, ysize,
                       img, XSIZE, XSIZE, YSIZE);

    mm_flags = 0;
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
    component_resample(s, img2, xsize, xsize, ysize,
                       img, XSIZE, XSIZE, YSIZE);
    if (memcmp(img1, img2, xsize * ysize) != 0) {
941
        av_log(NULL, AV_LOG_ERROR, "mmx error\n");
glantau's avatar
glantau committed
942 943
        exit(1);
    }
944
    av_log(NULL, AV_LOG_INFO, "MMX OK\n");
945
#endif /* HAVE_MMX */
glantau's avatar
glantau committed
946 947 948
    return 0;
}

949
#endif /* TEST */