aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-05-31 20:19:56 +0200
committerMichael Niedermayer <michaelni@gmx.at>2012-06-07 00:55:24 +0200
commitcc0a6844973da4c855c399322bc6c6ee8ca8c204 (patch)
tree7540e0fbe18b44eb0830ecc5b05a79c716dedf49
parent7f0f6602cb3219c9d0e6bd5df33d2b891ac0089b (diff)
downloadffmpeg-cc0a6844973da4c855c399322bc6c6ee8ca8c204.tar.gz
h264: move q0 scan tables into context
This fixes out of global array reads. The alternative solutions of checking the index or modifying the VLC tables to prevent the index going outside are each about 1-2 cpu cyclces slower per coded 4x4 block. The alternative of padding the global tables directly is more ugly and moving them to the context should benefit cache locality. Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind Signed-off-by: Michael Niedermayer <michaelni@gmx.at> (cherry picked from commit b7d14883939e756cbda376c66552be9d843910a0) Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavcodec/h264.c24
-rw-r--r--libavcodec/h264.h12
-rw-r--r--libavcodec/h264data.h5
3 files changed, 18 insertions, 23 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 85787f40d1..7ebe3521b0 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2719,19 +2719,19 @@ static void init_scan_tables(H264Context *h)
#undef T
}
if (h->sps.transform_bypass) { // FIXME same ugly
- h->zigzag_scan_q0 = zigzag_scan;
- h->zigzag_scan8x8_q0 = ff_zigzag_direct;
- h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
- h->field_scan_q0 = field_scan;
- h->field_scan8x8_q0 = field_scan8x8;
- h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
+ memcpy(h->zigzag_scan_q0 , zigzag_scan , sizeof(h->zigzag_scan_q0 ));
+ memcpy(h->zigzag_scan8x8_q0 , ff_zigzag_direct , sizeof(h->zigzag_scan8x8_q0 ));
+ memcpy(h->zigzag_scan8x8_cavlc_q0 , zigzag_scan8x8_cavlc , sizeof(h->zigzag_scan8x8_cavlc_q0));
+ memcpy(h->field_scan_q0 , field_scan , sizeof(h->field_scan_q0 ));
+ memcpy(h->field_scan8x8_q0 , field_scan8x8 , sizeof(h->field_scan8x8_q0 ));
+ memcpy(h->field_scan8x8_cavlc_q0 , field_scan8x8_cavlc , sizeof(h->field_scan8x8_cavlc_q0 ));
} else {
- h->zigzag_scan_q0 = h->zigzag_scan;
- h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
- h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
- h->field_scan_q0 = h->field_scan;
- h->field_scan8x8_q0 = h->field_scan8x8;
- h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
+ memcpy(h->zigzag_scan_q0 , h->zigzag_scan , sizeof(h->zigzag_scan_q0 ));
+ memcpy(h->zigzag_scan8x8_q0 , h->zigzag_scan8x8 , sizeof(h->zigzag_scan8x8_q0 ));
+ memcpy(h->zigzag_scan8x8_cavlc_q0 , h->zigzag_scan8x8_cavlc , sizeof(h->zigzag_scan8x8_cavlc_q0));
+ memcpy(h->field_scan_q0 , h->field_scan , sizeof(h->field_scan_q0 ));
+ memcpy(h->field_scan8x8_q0 , h->field_scan8x8 , sizeof(h->field_scan8x8_q0 ));
+ memcpy(h->field_scan8x8_cavlc_q0 , h->field_scan8x8_cavlc , sizeof(h->field_scan8x8_cavlc_q0 ));
}
}
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 0e11e304ee..a27f82f494 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -421,12 +421,12 @@ typedef struct H264Context {
uint8_t field_scan[16];
uint8_t field_scan8x8[64];
uint8_t field_scan8x8_cavlc[64];
- const uint8_t *zigzag_scan_q0;
- const uint8_t *zigzag_scan8x8_q0;
- const uint8_t *zigzag_scan8x8_cavlc_q0;
- const uint8_t *field_scan_q0;
- const uint8_t *field_scan8x8_q0;
- const uint8_t *field_scan8x8_cavlc_q0;
+ uint8_t zigzag_scan_q0[16];
+ uint8_t zigzag_scan8x8_q0[64];
+ uint8_t zigzag_scan8x8_cavlc_q0[64];
+ uint8_t field_scan_q0[16];
+ uint8_t field_scan8x8_q0[64];
+ uint8_t field_scan8x8_cavlc_q0[64];
int x264_build;
diff --git a/libavcodec/h264data.h b/libavcodec/h264data.h
index b665bca8c5..f3fc7f9425 100644
--- a/libavcodec/h264data.h
+++ b/libavcodec/h264data.h
@@ -57,7 +57,6 @@ static const uint8_t zigzag_scan[16+1] = {
1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
- 0,
};
static const uint8_t field_scan[16+1] = {
@@ -65,7 +64,6 @@ static const uint8_t field_scan[16+1] = {
0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4,
2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4,
3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4,
- 0,
};
static const uint8_t luma_dc_zigzag_scan[16] = {
@@ -112,7 +110,6 @@ static const uint8_t zigzag_scan8x8_cavlc[64+1] = {
1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8,
0 + 7 * 8, 4 + 4 * 8, 7 + 2 * 8, 3 + 6 * 8,
5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8,
- 0,
};
static const uint8_t field_scan8x8[64+1] = {
@@ -132,7 +129,6 @@ static const uint8_t field_scan8x8[64+1] = {
7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8,
6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8,
7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8,
- 0,
};
static const uint8_t field_scan8x8_cavlc[64+1] = {
@@ -152,7 +148,6 @@ static const uint8_t field_scan8x8_cavlc[64+1] = {
1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8,
3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8,
6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8,
- 0,
};
typedef struct IMbInfo {