aboutsummaryrefslogtreecommitdiffstats
path: root/libavformat
diff options
context:
space:
mode:
authorAndreas Rheinhardt <andreas.rheinhardt@gmail.com>2019-05-17 00:30:14 +0200
committerJames Almer <jamrial@gmail.com>2019-07-16 16:17:00 -0300
commit5120305137436b556a23208f25b549c5d64fb38e (patch)
tree0571bc07b86cce4c70ca7c945288e64122959ffa /libavformat
parent60f75c9976368d9921ea5cf2b9193ff8ace1602d (diff)
downloadffmpeg-5120305137436b556a23208f25b549c5d64fb38e.tar.gz
avformat/matroskadec: Don't skip too much when unseekable
The Matroska (and WebM) file format achieves forward-compability by insisting that demuxers ignore and skip elements they don't know about. Unfortunately, this complicates the detection of errors as errors resulting from loosing sync can't be reliably distinguished from unknown elements that are part of a future version of the standard. Up until now, the strategy to deal with this situation was to skip all unknown elements that are not obviously erroneous; if an error happened, it was tried to seek to the last known good position to resync from (and resync to level 1 elements). This is working fine if the input is seekable, but if it is not, then the skipped data can usually not be rechecked lateron. This is particularly acute if unknown-length clusters are in use, as the check for whether a child element exceeds the containing master element is ineffective in this situation. To remedy this, a new heuristic has been introduced: If an unknown element is encountered in non-seekable mode, an error is presumed to have happened based upon a combination of the length of the row of the already encountered unknown elements and of how far away skipping this element would take us. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
Diffstat (limited to 'libavformat')
-rw-r--r--libavformat/matroskadec.c54
1 files changed, 52 insertions, 2 deletions
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 2a1ad485d2..2485d8edf4 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -73,6 +73,12 @@
* still need to be performed */
#define LEVEL_ENDED 3 /* return value of ebml_parse when the
* syntax level used for parsing ended. */
+#define SKIP_THRESHOLD 1024 * 1024 /* In non-seekable mode, if more than SKIP_THRESHOLD
+ * of unkown, potentially damaged data is encountered,
+ * it is considered an error. */
+#define UNKNOWN_EQUIV 50 * 1024 /* An unknown element is considered equivalent
+ * to this many bytes of unknown data for the
+ * SKIP_THRESHOLD check. */
typedef enum {
EBML_NONE,
@@ -338,6 +344,7 @@ typedef struct MatroskaDemuxContext {
int num_levels;
uint32_t current_id;
int64_t resync_pos;
+ int unknown_count;
uint64_t time_scale;
double duration;
@@ -763,8 +770,9 @@ static int matroska_reset_status(MatroskaDemuxContext *matroska,
return err;
}
- matroska->current_id = id;
- matroska->num_levels = 1;
+ matroska->current_id = id;
+ matroska->num_levels = 1;
+ matroska->unknown_count = 0;
matroska->resync_pos = avio_tell(matroska->ctx->pb);
if (id)
matroska->resync_pos -= (av_log2(id) + 7) / 8;
@@ -1289,6 +1297,48 @@ static int ebml_parse(MatroskaDemuxContext *matroska,
} else
level_check = 0;
+ if (!(pb->seekable & AVIO_SEEKABLE_NORMAL)) {
+ // Loosing sync will likely manifest itself as encountering unknown
+ // elements which are not reliably distinguishable from elements
+ // belonging to future extensions of the format.
+ // We use a heuristic to detect such situations: If the current
+ // element is not expected at the current syntax level and there
+ // were only a few unknown elements in a row, then the element is
+ // skipped or considered defective based upon the length of the
+ // current element (i.e. how much would be skipped); if there were
+ // more than a few skipped elements in a row and skipping the current
+ // element would lead us more than SKIP_THRESHOLD away from the last
+ // known good position, then it is inferred that an error occured.
+ // The dependency on the number of unknown elements in a row exists
+ // because the distance to the last known good position is
+ // automatically big if the last parsed element was big.
+ // In both cases, each unknown element is considered equivalent to
+ // UNKNOWN_EQUIV of skipped bytes for the check.
+ // The whole check is only done for non-seekable output, because
+ // in this situation skipped data can't simply be rechecked later.
+ // This is especially important when using unkown length elements
+ // as the check for whether a child exceeds its containing master
+ // element is not effective in this situation.
+ if (update_pos) {
+ matroska->unknown_count = 0;
+ } else {
+ int64_t dist = length + UNKNOWN_EQUIV * matroska->unknown_count++;
+
+ if (matroska->unknown_count > 3)
+ dist += pos_alt - matroska->resync_pos;
+
+ if (dist > SKIP_THRESHOLD) {
+ av_log(matroska->ctx, AV_LOG_ERROR,
+ "Unknown element %"PRIX32" at pos. 0x%"PRIx64" with "
+ "length 0x%"PRIx64" considered as invalid data. Last "
+ "known good position 0x%"PRIx64", %d unknown elements"
+ " in a row\n", id, pos, length, matroska->resync_pos,
+ matroska->unknown_count);
+ return AVERROR_INVALIDDATA;
+ }
+ }
+ }
+
if (update_pos) {
// We have found an element that is allowed at this place
// in the hierarchy and it passed all checks, so treat the beginning