aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Melanson <mike@multimedia.cx>2005-05-21 04:43:36 +0000
committerMike Melanson <mike@multimedia.cx>2005-05-21 04:43:36 +0000
commitdc4b78d9ddcbb9715f22c53f8c2200560d4ac558 (patch)
treeca55ea900aaec976e47e81a94178b765dcb28b47
parentd57b7316c50c92b94f6e642dd3b54b0ad869265e (diff)
downloadffmpeg-dc4b78d9ddcbb9715f22c53f8c2200560d4ac558.tar.gz
render by the slice rather than the plane
Originally committed as revision 4290 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/vp3.c260
1 files changed, 255 insertions, 5 deletions
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index 34c780cf23..2a07fb2acc 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -2144,6 +2144,248 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
}
/*
+ * Perform the final rendering for a particular slice of data.
+ * The slice number ranges from 0..(macroblock_height - 1).
+ */
+static void render_slice(Vp3DecodeContext *s, int slice)
+{
+ int x, y;
+ int m, n;
+ int i; /* indicates current fragment */
+ int16_t *dequantizer;
+ DCTELEM __align16 block[64];
+ unsigned char *output_plane;
+ unsigned char *last_plane;
+ unsigned char *golden_plane;
+ int stride;
+ int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef;
+ int upper_motion_limit, lower_motion_limit;
+ int motion_halfpel_index;
+ uint8_t *motion_source;
+ int plane;
+ int plane_width;
+ int plane_height;
+ int slice_height;
+ int current_macroblock_entry = slice * s->macroblock_width * 6;
+
+ if (slice >= s->macroblock_height)
+ return;
+
+ for (plane = 0; plane < 3; plane++) {
+
+ /* set up plane-specific parameters */
+ if (plane == 0) {
+ output_plane = s->current_frame.data[0];
+ last_plane = s->last_frame.data[0];
+ golden_plane = s->golden_frame.data[0];
+ stride = s->current_frame.linesize[0];
+ if (!s->flipped_image) stride = -stride;
+ upper_motion_limit = 7 * s->current_frame.linesize[0];
+ lower_motion_limit = s->height * s->current_frame.linesize[0] + s->width - 8;
+ y = slice * FRAGMENT_PIXELS * 2;
+ plane_width = s->width;
+ plane_height = s->height;
+ slice_height = y + FRAGMENT_PIXELS * 2;
+ i = s->macroblock_fragments[current_macroblock_entry + 0];
+ } else if (plane == 1) {
+ output_plane = s->current_frame.data[1];
+ last_plane = s->last_frame.data[1];
+ golden_plane = s->golden_frame.data[1];
+ stride = s->current_frame.linesize[1];
+ if (!s->flipped_image) stride = -stride;
+ upper_motion_limit = 7 * s->current_frame.linesize[1];
+ lower_motion_limit = (s->height / 2) * s->current_frame.linesize[1] + (s->width / 2) - 8;
+ y = slice * FRAGMENT_PIXELS;
+ plane_width = s->width / 2;
+ plane_height = s->height / 2;
+ slice_height = y + FRAGMENT_PIXELS;
+ i = s->macroblock_fragments[current_macroblock_entry + 4];
+ } else {
+ output_plane = s->current_frame.data[2];
+ last_plane = s->last_frame.data[2];
+ golden_plane = s->golden_frame.data[2];
+ stride = s->current_frame.linesize[2];
+ if (!s->flipped_image) stride = -stride;
+ upper_motion_limit = 7 * s->current_frame.linesize[2];
+ lower_motion_limit = (s->height / 2) * s->current_frame.linesize[2] + (s->width / 2) - 8;
+ y = slice * FRAGMENT_PIXELS;
+ plane_width = s->width / 2;
+ plane_height = s->height / 2;
+ slice_height = y + FRAGMENT_PIXELS;
+ i = s->macroblock_fragments[current_macroblock_entry + 5];
+ }
+
+ if(ABS(stride) > 2048)
+ return; //various tables are fixed size
+
+ /* for each fragment row in the slice (both of them)... */
+ for (; y < slice_height; y += 8) {
+
+ /* for each fragment in a row... */
+ for (x = 0; x < plane_width; x += 8, i++) {
+
+ if ((i < 0) || (i >= s->fragment_count)) {
+ av_log(s->avctx, AV_LOG_ERROR, " vp3:render_slice(): bad fragment number (%d)\n", i);
+ return;
+ }
+
+ /* transform if this block was coded */
+ if ((s->all_fragments[i].coding_method != MODE_COPY) &&
+ !((s->avctx->flags & CODEC_FLAG_GRAY) && plane)) {
+
+ if ((s->all_fragments[i].coding_method == MODE_USING_GOLDEN) ||
+ (s->all_fragments[i].coding_method == MODE_GOLDEN_MV))
+ motion_source= golden_plane;
+ else
+ motion_source= last_plane;
+
+ motion_source += s->all_fragments[i].first_pixel;
+ motion_halfpel_index = 0;
+
+ /* sort out the motion vector if this fragment is coded
+ * using a motion vector method */
+ if ((s->all_fragments[i].coding_method > MODE_INTRA) &&
+ (s->all_fragments[i].coding_method != MODE_USING_GOLDEN)) {
+ int src_x, src_y;
+ motion_x = s->all_fragments[i].motion_x;
+ motion_y = s->all_fragments[i].motion_y;
+ if(plane){
+ motion_x= (motion_x>>1) | (motion_x&1);
+ motion_y= (motion_y>>1) | (motion_y&1);
+ }
+
+ src_x= (motion_x>>1) + x;
+ src_y= (motion_y>>1) + y;
+ if ((motion_x == 127) || (motion_y == 127))
+ av_log(s->avctx, AV_LOG_ERROR, " help! got invalid motion vector! (%X, %X)\n", motion_x, motion_y);
+
+ motion_halfpel_index = motion_x & 0x01;
+ motion_source += (motion_x >> 1);
+
+ motion_halfpel_index |= (motion_y & 0x01) << 1;
+ motion_source += ((motion_y >> 1) * stride);
+
+ if(src_x<0 || src_y<0 || src_x + 9 >= plane_width || src_y + 9 >= plane_height){
+ uint8_t *temp= s->edge_emu_buffer;
+ if(stride<0) temp -= 9*stride;
+ else temp += 9*stride;
+
+ ff_emulated_edge_mc(temp, motion_source, stride, 9, 9, src_x, src_y, plane_width, plane_height);
+ motion_source= temp;
+ }
+ }
+
+
+ /* first, take care of copying a block from either the
+ * previous or the golden frame */
+ if (s->all_fragments[i].coding_method != MODE_INTRA) {
+ /* Note, it is possible to implement all MC cases with
+ put_no_rnd_pixels_l2 which would look more like the
+ VP3 source but this would be slower as
+ put_no_rnd_pixels_tab is better optimzed */
+ if(motion_halfpel_index != 3){
+ s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
+ output_plane + s->all_fragments[i].first_pixel,
+ motion_source, stride, 8);
+ }else{
+ int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1
+ s->dsp.put_no_rnd_pixels_l2[1](
+ output_plane + s->all_fragments[i].first_pixel,
+ motion_source - d,
+ motion_source + stride + 1 + d,
+ stride, 8);
+ }
+ dequantizer = s->inter_dequant;
+ }else{
+ if (plane == 0)
+ dequantizer = s->intra_y_dequant;
+ else
+ dequantizer = s->intra_c_dequant;
+ }
+
+ /* dequantize the DCT coefficients */
+ debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n",
+ i, s->all_fragments[i].coding_method,
+ DC_COEFF(i), dequantizer[0]);
+
+ if(s->avctx->idct_algo==FF_IDCT_VP3){
+ Coeff *coeff= s->coeffs + i;
+ memset(block, 0, sizeof(block));
+ while(coeff->next){
+ block[coeff->index]= coeff->coeff * dequantizer[coeff->index];
+ coeff= coeff->next;
+ }
+ }else{
+ Coeff *coeff= s->coeffs + i;
+ memset(block, 0, sizeof(block));
+ while(coeff->next){
+ block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2;
+ coeff= coeff->next;
+ }
+ }
+
+ /* invert DCT and place (or add) in final output */
+
+ if (s->all_fragments[i].coding_method == MODE_INTRA) {
+ if(s->avctx->idct_algo!=FF_IDCT_VP3)
+ block[0] += 128<<3;
+ s->dsp.idct_put(
+ output_plane + s->all_fragments[i].first_pixel,
+ stride,
+ block);
+ } else {
+ s->dsp.idct_add(
+ output_plane + s->all_fragments[i].first_pixel,
+ stride,
+ block);
+ }
+
+ debug_idct("block after idct_%s():\n",
+ (s->all_fragments[i].coding_method == MODE_INTRA)?
+ "put" : "add");
+ for (m = 0; m < 8; m++) {
+ for (n = 0; n < 8; n++) {
+ debug_idct(" %3d", *(output_plane +
+ s->all_fragments[i].first_pixel + (m * stride + n)));
+ }
+ debug_idct("\n");
+ }
+ debug_idct("\n");
+
+ } else {
+
+ /* copy directly from the previous frame */
+ s->dsp.put_pixels_tab[1][0](
+ output_plane + s->all_fragments[i].first_pixel,
+ last_plane + s->all_fragments[i].first_pixel,
+ stride, 8);
+
+ }
+ }
+ }
+ }
+
+ /* future loop filter logic goes here... */
+ /* algorithm:
+ * if (slice != 0)
+ * run filter on 1st row of Y slice
+ * run filter on U slice
+ * run filter on V slice
+ * run filter on 2nd row of Y slice
+ */
+
+ /* this looks like a good place for slice dispatch... */
+ /* algorithm:
+ * if (slice > 0)
+ * dispatch (slice - 1);
+ * if (slice == s->macroblock_height - 1)
+ * dispatch (slice); // handle last slice
+ */
+
+ emms_c();
+}
+
+/*
* This function performs the final rendering of each fragment's data
* onto the output frame.
*/
@@ -2774,6 +3016,7 @@ static int vp3_decode_frame(AVCodecContext *avctx,
Vp3DecodeContext *s = avctx->priv_data;
GetBitContext gb;
static int counter = 0;
+ int i;
init_get_bits(&gb, buf, buf_size * 8);
@@ -2917,22 +3160,29 @@ if (!s->keyframe) {
{START_TIMER
reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height);
- STOP_TIMER("reverse_dc_prediction")}
- {START_TIMER
- render_fragments(s, 0, s->width, s->height, 0);
- STOP_TIMER("render_fragments")}
-
if ((avctx->flags & CODEC_FLAG_GRAY) == 0) {
reverse_dc_prediction(s, s->u_fragment_start,
s->fragment_width / 2, s->fragment_height / 2);
reverse_dc_prediction(s, s->v_fragment_start,
s->fragment_width / 2, s->fragment_height / 2);
+ }
+ STOP_TIMER("reverse_dc_prediction")}
+ {START_TIMER
+
+#if 1
+ for (i = 0; i < s->macroblock_height; i++)
+ render_slice(s, i);
+#else
+ render_fragments(s, 0, s->width, s->height, 0);
+ if ((avctx->flags & CODEC_FLAG_GRAY) == 0) {
render_fragments(s, s->u_fragment_start, s->width / 2, s->height / 2, 1);
render_fragments(s, s->v_fragment_start, s->width / 2, s->height / 2, 2);
} else {
memset(s->current_frame.data[1], 0x80, s->width * s->height / 4);
memset(s->current_frame.data[2], 0x80, s->width * s->height / 4);
}
+#endif
+ STOP_TIMER("render_fragments")}
{START_TIMER
apply_loop_filter(s);