diff options
Diffstat (limited to 'src/plugins/ffmpeg/libavcodec/ppc/int_altivec.c')
-rw-r--r-- | src/plugins/ffmpeg/libavcodec/ppc/int_altivec.c | 143 |
1 files changed, 0 insertions, 143 deletions
diff --git a/src/plugins/ffmpeg/libavcodec/ppc/int_altivec.c b/src/plugins/ffmpeg/libavcodec/ppc/int_altivec.c deleted file mode 100644 index 7a155a2..0000000 --- a/src/plugins/ffmpeg/libavcodec/ppc/int_altivec.c +++ /dev/null | |||
@@ -1,143 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org> | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | ** @file int_altivec.c | ||
23 | ** integer misc ops. | ||
24 | **/ | ||
25 | |||
26 | #include "libavcodec/dsputil.h" | ||
27 | |||
28 | #include "gcc_fixes.h" | ||
29 | |||
30 | #include "dsputil_altivec.h" | ||
31 | |||
32 | #include "types_altivec.h" | ||
33 | |||
34 | static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2, | ||
35 | int size) { | ||
36 | int i, size16; | ||
37 | vector signed char vpix1; | ||
38 | vector signed short vpix2, vdiff, vpix1l,vpix1h; | ||
39 | union { vector signed int vscore; | ||
40 | int32_t score[4]; | ||
41 | } u; | ||
42 | u.vscore = vec_splat_s32(0); | ||
43 | // | ||
44 | //XXX lazy way, fix it later | ||
45 | |||
46 | #define vec_unaligned_load(b) \ | ||
47 | vec_perm(vec_ld(0,b),vec_ld(15,b),vec_lvsl(0, b)); | ||
48 | |||
49 | size16 = size >> 4; | ||
50 | while(size16) { | ||
51 | // score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); | ||
52 | //load pix1 and the first batch of pix2 | ||
53 | |||
54 | vpix1 = vec_unaligned_load(pix1); | ||
55 | vpix2 = vec_unaligned_load(pix2); | ||
56 | pix2 += 8; | ||
57 | //unpack | ||
58 | vpix1h = vec_unpackh(vpix1); | ||
59 | vdiff = vec_sub(vpix1h, vpix2); | ||
60 | vpix1l = vec_unpackl(vpix1); | ||
61 | // load another batch from pix2 | ||
62 | vpix2 = vec_unaligned_load(pix2); | ||
63 | u.vscore = vec_msum(vdiff, vdiff, u.vscore); | ||
64 | vdiff = vec_sub(vpix1l, vpix2); | ||
65 | u.vscore = vec_msum(vdiff, vdiff, u.vscore); | ||
66 | pix1 += 16; | ||
67 | pix2 += 8; | ||
68 | size16--; | ||
69 | } | ||
70 | u.vscore = vec_sums(u.vscore, vec_splat_s32(0)); | ||
71 | |||
72 | size %= 16; | ||
73 | for (i = 0; i < size; i++) { | ||
74 | u.score[3] += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); | ||
75 | } | ||
76 | return u.score[3]; | ||
77 | } | ||
78 | |||
79 | static void add_int16_altivec(int16_t * v1, int16_t * v2, int order) | ||
80 | { | ||
81 | int i; | ||
82 | register vec_s16_t vec, *pv; | ||
83 | |||
84 | for(i = 0; i < order; i += 8){ | ||
85 | pv = (vec_s16_t*)v2; | ||
86 | vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2)); | ||
87 | vec_st(vec_add(vec_ld(0, v1), vec), 0, v1); | ||
88 | v1 += 8; | ||
89 | v2 += 8; | ||
90 | } | ||
91 | } | ||
92 | |||
93 | static void sub_int16_altivec(int16_t * v1, int16_t * v2, int order) | ||
94 | { | ||
95 | int i; | ||
96 | register vec_s16_t vec, *pv; | ||
97 | |||
98 | for(i = 0; i < order; i += 8){ | ||
99 | pv = (vec_s16_t*)v2; | ||
100 | vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2)); | ||
101 | vec_st(vec_sub(vec_ld(0, v1), vec), 0, v1); | ||
102 | v1 += 8; | ||
103 | v2 += 8; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | static int32_t scalarproduct_int16_altivec(int16_t * v1, int16_t * v2, int order, const int shift) | ||
108 | { | ||
109 | int i; | ||
110 | LOAD_ZERO; | ||
111 | register vec_s16_t vec1, *pv; | ||
112 | register vec_s32_t res = vec_splat_s32(0), t; | ||
113 | register vec_u32_t shifts; | ||
114 | DECLARE_ALIGNED_16(int32_t, ires); | ||
115 | |||
116 | shifts = zero_u32v; | ||
117 | if(shift & 0x10) shifts = vec_add(shifts, vec_sl(vec_splat_u32(0x08), vec_splat_u32(0x1))); | ||
118 | if(shift & 0x08) shifts = vec_add(shifts, vec_splat_u32(0x08)); | ||
119 | if(shift & 0x04) shifts = vec_add(shifts, vec_splat_u32(0x04)); | ||
120 | if(shift & 0x02) shifts = vec_add(shifts, vec_splat_u32(0x02)); | ||
121 | if(shift & 0x01) shifts = vec_add(shifts, vec_splat_u32(0x01)); | ||
122 | |||
123 | for(i = 0; i < order; i += 8){ | ||
124 | pv = (vec_s16_t*)v1; | ||
125 | vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1)); | ||
126 | t = vec_msum(vec1, vec_ld(0, v2), zero_s32v); | ||
127 | t = vec_sr(t, shifts); | ||
128 | res = vec_sums(t, res); | ||
129 | v1 += 8; | ||
130 | v2 += 8; | ||
131 | } | ||
132 | res = vec_splat(res, 3); | ||
133 | vec_ste(res, 0, &ires); | ||
134 | return ires; | ||
135 | } | ||
136 | |||
137 | void int_init_altivec(DSPContext* c, AVCodecContext *avctx) | ||
138 | { | ||
139 | c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec; | ||
140 | c->add_int16 = add_int16_altivec; | ||
141 | c->sub_int16 = sub_int16_altivec; | ||
142 | c->scalarproduct_int16 = scalarproduct_int16_altivec; | ||
143 | } | ||