1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
|
/*
* Depacketization for RTP Payload Format For AV1 (v1.0)
* https://aomediacodec.github.io/av1-rtp-spec/
* Copyright (c) 2024 Axis Communications
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* @brief AV1 / RTP depacketization code (RTP Payload Format For AV1 (v1.0))
* @author Chris Hodges <chris.hodges@axis.com>
* @note The process will restore TDs and put back size fields into headers.
* It will also try to keep complete OBUs and remove partial OBUs
* caused by packet drops and thus keep the stream syntactically intact.
*/
#include "libavutil/avstring.h"
#include "libavutil/mem.h"
#include "avformat.h"
#include "rtpdec.h"
#include "libavcodec/av1.h"
#include "rtp_av1.h"
// enable tracing of packet data
//#define RTPDEC_AV1_VERBOSE_TRACE
/**
* RTP/AV1 specific private data.
*/
struct PayloadContext {
uint32_t timestamp; ///< last received timestamp for frame
uint8_t profile; ///< profile (main/high/professional)
uint8_t level_idx; ///< level (0-31)
uint8_t tier; ///< main tier or high tier
uint16_t prev_seq; ///< sequence number of previous packet
unsigned int frag_obu_size; ///< current total size of fragmented OBU
unsigned int frag_pkt_leb_pos; ///< offset in buffer where OBU LEB starts
unsigned int frag_lebs_res; ///< number of bytes reserved for LEB
unsigned int frag_header_size; ///< size of OBU header (1 or 2)
int needs_td; ///< indicates that a TD should be output
int drop_fragment; ///< drop all fragments until next frame
int keyframe_seen; ///< keyframe was seen
int wait_for_keyframe; ///< message about waiting for keyframe has been issued
};
static int sdp_parse_fmtp_config_av1(AVFormatContext *s,
AVStream *stream,
PayloadContext *av1_data,
const char *attr, const char *value) {
if (!strcmp(attr, "profile")) {
av1_data->profile = atoi(value);
av_log(s, AV_LOG_DEBUG, "RTP AV1 profile: %u\n", av1_data->profile);
} else if (!strcmp(attr, "level-idx")) {
av1_data->level_idx = atoi(value);
av_log(s, AV_LOG_DEBUG, "RTP AV1 level: %u\n", av1_data->profile);
} else if (!strcmp(attr, "tier")) {
av1_data->tier = atoi(value);
av_log(s, AV_LOG_DEBUG, "RTP AV1 tier: %u\n", av1_data->tier);
}
return 0;
}
// return 0 on complete packet, -1 on partial packet
static int av1_handle_packet(AVFormatContext *ctx, PayloadContext *data,
AVStream *st, AVPacket *pkt, uint32_t *timestamp,
const uint8_t *buf, int len, uint16_t seq,
int flags) {
uint8_t aggr_hdr;
int result = 0;
int is_frag_cont;
int is_last_fragmented;
int is_first_pkt;
unsigned int num_obus;
unsigned int obu_cnt = 1;
unsigned int rem_pkt_size = len;
unsigned int pktpos;
const uint8_t *buf_ptr = buf;
uint16_t expected_seq = data->prev_seq + 1;
int16_t seq_diff = seq - expected_seq;
data->prev_seq = seq;
if (!len) {
av_log(ctx, AV_LOG_ERROR, "Empty AV1 RTP packet\n");
return AVERROR_INVALIDDATA;
}
if (len < 2) {
av_log(ctx, AV_LOG_ERROR, "AV1 RTP packet too short\n");
return AVERROR_INVALIDDATA;
}
/* The payload structure is supposed to be straight-forward, but there are a
* couple of edge cases which need to be tackled and make things a bit more
* complex.
* These are mainly due to:
* - To reconstruct the OBU size for fragmented packets and place it the OBU
* header, the final size will not be known until the last fragment has
* been parsed. However, the number LEBs in the header is variable
* depending on the length of the payload.
* - We are increasing the out-packet size while we are getting fragmented
* OBUs. If an RTP packet gets dropped, we would create corrupted OBUs.
* In this case we decide to drop the whole frame.
*/
#ifdef RTPDEC_AV1_VERBOSE_TRACE
av_log(ctx, AV_LOG_TRACE, "RTP Packet %d in (%x), len=%d:\n",
seq, flags, len);
av_hex_dump_log(ctx, AV_LOG_TRACE, buf, FFMIN(len, 64));
av_log(ctx, AV_LOG_TRACE, "... end at offset %x:\n", FFMAX(len - 64, 0));
av_hex_dump_log(ctx, AV_LOG_TRACE, buf + FFMAX(len - 64, 0), FFMIN(len - 64, 64));
#endif
/* 8 bit aggregate header: Z Y W W N - - - */
aggr_hdr = *buf_ptr++;
rem_pkt_size--;
/* Z: MUST be set to 1 if the first OBU element is an OBU fragment that is a
* continuation of an OBU fragment from the previous packet, and MUST be set
* to 0 otherwise */
is_frag_cont = (aggr_hdr >> AV1B_AGGR_HDR_FRAG_CONT) & 1;
/* Y: MUST be set to 1 if the last OBU element is an OBU fragment that will
* continue in the next packet, and MUST be set to 0 otherwise */
is_last_fragmented = (aggr_hdr >> AV1B_AGGR_HDR_LAST_FRAG) & 1;
/* W: two bit field that describes the number of OBU elements in the packet.
* This field MUST be set equal to 0 or equal to the number of OBU elements
* contained in the packet.
* If set to 0, each OBU element MUST be preceded by a length field.
* If not set to 0 (i.e., W = 1, 2 or 3) the last OBU element MUST NOT be
* preceded by a length field (it's derived from RTP packet size minus other
* known lengths). */
num_obus = (aggr_hdr >> AV1S_AGGR_HDR_NUM_OBUS) & AV1M_AGGR_HDR_NUM_OBUS;
/* N: MUST be set to 1 if the packet is the first packet of a coded video
* sequence, and MUST be set to 0 otherwise.*/
is_first_pkt = (aggr_hdr >> AV1B_AGGR_HDR_FIRST_PKT) & 1;
if (is_frag_cont) {
if (data->drop_fragment) {
return AVERROR_INVALIDDATA;
}
if (is_first_pkt) {
av_log(ctx, AV_LOG_ERROR, "Illegal aggregation header in first AV1 RTP packet\n");
return AVERROR_INVALIDDATA;
}
if (seq_diff) {
av_log(ctx, AV_LOG_WARNING, "AV1 RTP frag packet sequence mismatch (%d != %d), dropping temporal unit\n",
seq, expected_seq);
goto drop_fragment;
}
if (!pkt->size || !data->frag_obu_size) {
av_log(ctx, AV_LOG_WARNING, "Unexpected fragment continuation in AV1 RTP packet\n");
goto drop_fragment; // avoid repeated output for the same fragment
}
} else {
if (!is_first_pkt && !data->keyframe_seen) {
if (!data->wait_for_keyframe) {
data->wait_for_keyframe = 1;
av_log(ctx, AV_LOG_WARNING, "AV1 RTP packet before keyframe, dropping and waiting for next keyframe\n");
}
goto drop_fragment;
}
if (seq_diff && !is_first_pkt) {
av_log(ctx, AV_LOG_WARNING, "AV1 RTP unfrag packet sequence mismatch (%d != %d), dropping temporal unit\n",
seq, expected_seq);
goto drop_fragment;
}
data->drop_fragment = 0;
if (!data->needs_td && ((data->timestamp != *timestamp) || is_first_pkt)) {
av_log(ctx, AV_LOG_TRACE, "Timestamp changed to %u (or first pkt %d), forcing TD\n", *timestamp, is_first_pkt);
data->needs_td = 1;
data->frag_obu_size = 0; // new temporal unit might have been caused by dropped packets
}
if (data->frag_obu_size) {
data->frag_obu_size = 0; // make sure we recover
av_log(ctx, AV_LOG_ERROR, "Missing fragment continuation in AV1 RTP packet\n");
return AVERROR_INVALIDDATA;
}
// update the timestamp in the frame packet with the one from the RTP packet
data->timestamp = *timestamp;
}
pktpos = pkt->size;
#ifdef RTPDEC_AV1_VERBOSE_TRACE
av_log(ctx, AV_LOG_TRACE, "Input buffer size %d, aggr head 0x%02x fc %d, lf %d, no %d, fp %d\n",
len, aggr_hdr, is_frag_cont, is_last_fragmented, num_obus, is_first_pkt);
#endif
if (is_first_pkt) {
pkt->flags |= AV_PKT_FLAG_KEY;
data->keyframe_seen = 1;
data->wait_for_keyframe = 0;
}
// loop over OBU elements
while (rem_pkt_size) {
uint32_t obu_size;
int num_lebs;
int needs_size_field;
int output_size;
unsigned int obu_payload_size;
uint8_t obu_hdr;
obu_size = rem_pkt_size;
if (!num_obus || obu_cnt < num_obus) {
// read out explicit OBU element size (which almost corresponds to the original OBU size)
num_lebs = parse_leb(ctx, buf_ptr, rem_pkt_size, &obu_size);
if (!num_lebs) {
return AVERROR_INVALIDDATA;
}
rem_pkt_size -= num_lebs;
buf_ptr += num_lebs;
}
// read first byte (which is the header byte only for non-fragmented elements)
obu_hdr = *buf_ptr;
if (obu_size > rem_pkt_size) {
av_log(ctx, AV_LOG_ERROR, "AV1 OBU size %u larger than remaining pkt size %d\n", obu_size, rem_pkt_size);
return AVERROR_INVALIDDATA;
}
if (!obu_size) {
av_log(ctx, AV_LOG_ERROR, "Unreasonable AV1 OBU size %u\n", obu_size);
return AVERROR_INVALIDDATA;
}
if (!is_frag_cont) {
uint8_t obu_type = (obu_hdr >> AV1S_OBU_TYPE) & AV1M_OBU_TYPE;
if (obu_hdr & AV1F_OBU_FORBIDDEN) {
av_log(ctx, AV_LOG_ERROR, "Forbidden bit set in AV1 OBU header (0x%02x)\n", obu_hdr);
return AVERROR_INVALIDDATA;
}
// ignore and remove OBUs according to spec
if ((obu_type == AV1_OBU_TEMPORAL_DELIMITER) ||
(obu_type == AV1_OBU_TILE_LIST)) {
pktpos += obu_size;
rem_pkt_size -= obu_size;
// TODO: This probably breaks if the OBU_TILE_LIST is fragmented
// into the next RTP packet, so at least check and fail here
if (rem_pkt_size == 0 && is_last_fragmented) {
avpriv_report_missing_feature(ctx, "AV1 OBU_TILE_LIST (should not be there!) to be ignored but is fragmented\n");
return AVERROR_PATCHWELCOME;
}
obu_cnt++;
continue;
}
}
// If we need to add a size field, out size will be different
output_size = obu_size;
// Spec says the OBUs should have their size fields removed,
// but this is not mandatory
if (is_frag_cont || (obu_hdr & AV1F_OBU_HAS_SIZE_FIELD)) {
needs_size_field = 0;
} else {
needs_size_field = 1;
// (re)calculate number of LEB bytes needed (if it was implicit, there were no LEBs)
output_size += calc_leb_size(obu_size - (1 + ((obu_hdr & AV1F_OBU_EXTENSION_FLAG) ? 1 : 0)));
}
if (!is_frag_cont && (obu_cnt == 1)) {
if (data->needs_td) {
output_size += 2; // for Temporal Delimiter (TD)
}
if (pkt->data) {
if ((result = av_grow_packet(pkt, output_size)) < 0)
return result;
} else {
if ((result = av_new_packet(pkt, output_size) < 0))
return result;
}
if (data->needs_td) {
// restore TD
pkt->data[pktpos++] = 0x12;
pkt->data[pktpos++] = 0x00;
}
data->needs_td = 0;
} else {
if ((result = av_grow_packet(pkt, output_size)) < 0)
return result;
}
obu_payload_size = obu_size;
// do we need to restore the OBU size field?
if (needs_size_field) {
// set obu_has_size_field in header byte
pkt->data[pktpos++] = *buf_ptr++ | AV1F_OBU_HAS_SIZE_FIELD;
data->frag_header_size = 1;
obu_payload_size--;
// copy extension byte, if available
if (obu_hdr & AV1F_OBU_EXTENSION_FLAG) {
/* TODO we cannot handle the edge case where last element is a
* fragment of exactly one byte AND the header has the extension
* flag set. Note that it would be more efficient to not send a
* fragment of one byte and instead drop the size field of the
* prior element */
if (!obu_payload_size) {
av_log(ctx, AV_LOG_ERROR, "AV1 OBU too short for extension byte (0x%02x)\n",
obu_hdr);
return AVERROR_INVALIDDATA;
}
pkt->data[pktpos++] = *buf_ptr++;
data->frag_header_size = 2;
obu_payload_size--;
}
// remember start position of LEB for possibly fragmented packet to
// fixup OBU size later
data->frag_pkt_leb_pos = pktpos;
// write intermediate OBU size field
num_lebs = write_leb(pkt->data + pktpos, obu_payload_size);
data->frag_lebs_res = num_lebs;
pktpos += num_lebs;
}
// copy verbatim or without above header size patch
memcpy(pkt->data + pktpos, buf_ptr, obu_payload_size);
pktpos += obu_payload_size;
buf_ptr += obu_payload_size;
rem_pkt_size -= obu_size;
// if we were handling a fragmented packet and this was the last
// fragment, correct OBU size field
if (data->frag_obu_size && (rem_pkt_size || !is_last_fragmented)) {
uint32_t final_obu_size = data->frag_obu_size + obu_size - data->frag_header_size;
uint8_t *lebptr = pkt->data + data->frag_pkt_leb_pos;
num_lebs = calc_leb_size(final_obu_size);
// check if we had allocated enough LEB bytes in header,
// otherwise make some extra space
if (num_lebs > data->frag_lebs_res) {
int extra_bytes = num_lebs - data->frag_lebs_res;
if ((result = av_grow_packet(pkt, extra_bytes)) < 0)
return result;
// update pointer in case buffer address changed
lebptr = pkt->data + data->frag_pkt_leb_pos;
// move existing data for OBU back a bit
memmove(lebptr + extra_bytes, lebptr,
pkt->size - extra_bytes - data->frag_pkt_leb_pos);
// move pktpos further down for following OBUs in same packet.
pktpos += extra_bytes;
}
// update OBU size field
write_leb(lebptr, final_obu_size);
data->frag_obu_size = 0; // signal end of fragment
} else if (is_last_fragmented && !rem_pkt_size) {
// add to total OBU size, so we can fix that in OBU header
// (but only if the OBU size was missing!)
if (needs_size_field || data->frag_obu_size) {
data->frag_obu_size += obu_size;
}
// fragment not yet finished!
result = -1;
}
is_frag_cont = 0;
if (!rem_pkt_size && num_obus && (num_obus != obu_cnt)) {
av_log(ctx, AV_LOG_WARNING, "AV1 aggregation header indicated %u OBU elements, was %u\n",
num_obus, obu_cnt);
}
obu_cnt++;
}
if (flags & RTP_FLAG_MARKER) {
av_log(ctx, AV_LOG_TRACE, "TD on next packet due to marker\n");
data->needs_td = 1;
} else {
// fragment may be complete, but temporal unit is not yet finished
result = -1;
}
if (!is_last_fragmented) {
data->frag_obu_size = 0;
data->frag_pkt_leb_pos = 0;
}
#ifdef RTPDEC_AV1_VERBOSE_TRACE
if (!result) {
av_log(ctx, AV_LOG_TRACE, "AV1 out pkt-size: %d\n", pkt->size);
av_hex_dump_log(ctx, AV_LOG_TRACE, pkt->data, FFMIN(pkt->size, 64));
av_log(ctx, AV_LOG_TRACE, "... end at offset %x:\n", FFMAX(pkt->size - 64, 0));
av_hex_dump_log(ctx, AV_LOG_TRACE, pkt->data + FFMAX(pkt->size - 64, 0), FFMIN(pkt->size, 64));
}
#endif
pkt->stream_index = st->index;
return result;
drop_fragment:
data->keyframe_seen = 0;
data->drop_fragment = 1;
data->frag_obu_size = 0;
data->needs_td = 1;
if (pkt->size) {
av_log(ctx, AV_LOG_TRACE, "Dumping current AV1 frame packet\n");
// we can't seem to deallocate the fragmented packet, but we can shrink it to 0
av_shrink_packet(pkt, 0);
}
return AVERROR_INVALIDDATA;
}
static void av1_close_context(PayloadContext *data) {
}
static int av1_need_keyframe(PayloadContext *data)
{
return !data->keyframe_seen;
}
static int parse_av1_sdp_line(AVFormatContext *s, int st_index,
PayloadContext *av1_data, const char *line) {
AVStream * stream;
const char *p = line;
int result = 0;
if (st_index < 0)
return 0;
stream = s->streams[st_index];
/* Optional parameters are profile, level-idx, and tier.
* See Section 7.2.1 of https://aomediacodec.github.io/av1-rtp-spec/ */
if (av_strstart(p, "fmtp:", &p)) {
result = ff_parse_fmtp(s, stream, av1_data, p, sdp_parse_fmtp_config_av1);
av_log(s, AV_LOG_DEBUG, "RTP AV1 Profile: %u, Level: %u, Tier: %u\n",
av1_data->profile, av1_data->level_idx, av1_data->tier);
}
return result;
}
const RTPDynamicProtocolHandler ff_av1_dynamic_handler = {
.enc_name = "AV1",
.codec_type = AVMEDIA_TYPE_VIDEO,
.codec_id = AV_CODEC_ID_AV1,
.need_parsing = AVSTREAM_PARSE_FULL,
.priv_data_size = sizeof(PayloadContext),
.parse_sdp_a_line = parse_av1_sdp_line,
.close = av1_close_context,
.parse_packet = av1_handle_packet,
.need_keyframe = av1_need_keyframe,
};
|