diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2014-07-12 16:51:09 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-07-19 15:27:08 +0200 |
commit | 7a4a5515b0ce97f1c4e18ed012cc3f86328dd220 (patch) | |
tree | 222f6cfbd9ee88af6daa4841d9110b42e2669d48 /libavcodec/hevc_mvs.c | |
parent | 8da1defe6986aab8506203c6e12f44d4df62672e (diff) | |
download | ffmpeg-7a4a5515b0ce97f1c4e18ed012cc3f86328dd220.tar.gz |
hevc: use intreadwrite
When dealing with MVs, both components may be processed at a time.
On Win64, 560 to 539 cycles for derive_spatial_merge_candidates.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/hevc_mvs.c')
-rw-r--r-- | libavcodec/hevc_mvs.c | 26 |
1 files changed, 11 insertions, 15 deletions
diff --git a/libavcodec/hevc_mvs.c b/libavcodec/hevc_mvs.c index 023fb55fb6..b1a1ffc243 100644 --- a/libavcodec/hevc_mvs.c +++ b/libavcodec/hevc_mvs.c @@ -125,6 +125,7 @@ static int isDiffMER(HEVCContext *s, int xN, int yN, int xP, int yP) yN >> plevel == yP >> plevel; } +#define MATCH_MV(x) (AV_RN32A(&A.x) == AV_RN32A(&B.x)) #define MATCH(x) (A.x == B.x) // check if the mv's and refidx are the same between A and B @@ -134,12 +135,12 @@ static int compareMVrefidx(struct MvField A, struct MvField B) int b_pf = B.pred_flag; if (a_pf == b_pf) { if (a_pf == PF_BI) { - return MATCH(ref_idx[0]) && MATCH(mv[0].x) && MATCH(mv[0].y) && - MATCH(ref_idx[1]) && MATCH(mv[1].x) && MATCH(mv[1].y); + return MATCH(ref_idx[0]) && MATCH_MV(mv[0]) && + MATCH(ref_idx[1]) && MATCH_MV(mv[1]); } else if (a_pf == PF_L0) { - return MATCH(ref_idx[0]) && MATCH(mv[0].x) && MATCH(mv[0].y); + return MATCH(ref_idx[0]) && MATCH_MV(mv[0]); } else if (a_pf == PF_L1) { - return MATCH(ref_idx[1]) && MATCH(mv[1].x) && MATCH(mv[1].y); + return MATCH(ref_idx[1]) && MATCH_MV(mv[1]); } } return 0; @@ -505,15 +506,12 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0, if ((l0_cand.pred_flag & PF_L0) && (l1_cand.pred_flag & PF_L1) && (refPicList[0].list[l0_cand.ref_idx[0]] != refPicList[1].list[l1_cand.ref_idx[1]] || - l0_cand.mv[0].x != l1_cand.mv[1].x || - l0_cand.mv[0].y != l1_cand.mv[1].y)) { + AV_RN32A(&l0_cand.mv[0]) != AV_RN32A(&l1_cand.mv[1]))) { mergecandlist[nb_merge_cand].ref_idx[0] = l0_cand.ref_idx[0]; mergecandlist[nb_merge_cand].ref_idx[1] = l1_cand.ref_idx[1]; mergecandlist[nb_merge_cand].pred_flag = PF_BI; - mergecandlist[nb_merge_cand].mv[0].x = l0_cand.mv[0].x; - mergecandlist[nb_merge_cand].mv[0].y = l0_cand.mv[0].y; - mergecandlist[nb_merge_cand].mv[1].x = l1_cand.mv[1].x; - mergecandlist[nb_merge_cand].mv[1].y = l1_cand.mv[1].y; + AV_COPY32(&mergecandlist[nb_merge_cand].mv[0], &l0_cand.mv[0]); + AV_COPY32(&mergecandlist[nb_merge_cand].mv[1], &l1_cand.mv[1]); if (merge_idx == nb_merge_cand) return; nb_merge_cand++; } @@ -523,10 +521,8 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0, // append Zero motion vector candidates while (nb_merge_cand < s->sh.max_num_merge_cand) { mergecandlist[nb_merge_cand].pred_flag = PF_L0 + ((s->sh.slice_type == B_SLICE) << 1); - mergecandlist[nb_merge_cand].mv[0].x = 0; - mergecandlist[nb_merge_cand].mv[0].y = 0; - mergecandlist[nb_merge_cand].mv[1].x = 0; - mergecandlist[nb_merge_cand].mv[1].y = 0; + AV_ZERO32(mergecandlist[nb_merge_cand].mv+0); + AV_ZERO32(mergecandlist[nb_merge_cand].mv+1); mergecandlist[nb_merge_cand].ref_idx[0] = zero_idx < nb_refs ? zero_idx : 0; mergecandlist[nb_merge_cand].ref_idx[1] = zero_idx < nb_refs ? zero_idx : 0; @@ -545,7 +541,7 @@ void ff_hevc_luma_mv_merge_mode(HEVCContext *s, int x0, int y0, int nPbW, { int singleMCLFlag = 0; int nCS = 1 << log2_cb_size; - struct MvField mergecand_list[MRG_MAX_NUM_CANDS] = { { { { 0 } } } }; + LOCAL_ALIGNED(4, MvField, mergecand_list, [MRG_MAX_NUM_CANDS]); int nPbW2 = nPbW; int nPbH2 = nPbH; HEVCLocalContext *lc = s->HEVClc; |