Skip to content

Commit a82c2a7

Browse files
committed
video: merge the tiles of grid heif and avif images
Fixes #13585. Fixes #16486. Switching between different grid images added with --external-files is also supported. The prority of independent tracks is reverted for images, because we now want to select a dependent track to trigger the merging, rather than small preview tracks.
1 parent ec4d50f commit a82c2a7

4 files changed

Lines changed: 313 additions & 2 deletions

File tree

demux/demux_lavf.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,97 @@ static void add_new_streams(demuxer_t *demuxer)
900900
handle_new_stream(demuxer, priv->num_streams);
901901
}
902902

903+
static void handle_tile_grid_groups(demuxer_t *demuxer)
904+
{
905+
#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(61, 1, 100)
906+
lavf_priv_t *priv = demuxer->priv;
907+
AVFormatContext *avfc = priv->avfc;
908+
909+
for (int g = 0; g < avfc->nb_stream_groups; g++) {
910+
AVStreamGroup *stream_group = avfc->stream_groups[g];
911+
if (stream_group->type != AV_STREAM_GROUP_PARAMS_TILE_GRID)
912+
continue;
913+
914+
const AVStreamGroupTileGrid *av_grid = stream_group->params.tile_grid;
915+
if (!av_grid || av_grid->nb_tiles == 0)
916+
continue;
917+
918+
struct mp_tile_grid *mp_grid = talloc_zero(demuxer, struct mp_tile_grid);
919+
mp_grid->nb_tiles = av_grid->nb_tiles;
920+
mp_grid->width = av_grid->width;
921+
mp_grid->height = av_grid->height;
922+
mp_grid->coded_width = av_grid->coded_width;
923+
mp_grid->coded_height = av_grid->coded_height;
924+
mp_grid->horizontal_offset = av_grid->horizontal_offset;
925+
mp_grid->vertical_offset = av_grid->vertical_offset;
926+
memcpy(mp_grid->background, av_grid->background, 4);
927+
928+
mp_grid->tiles = talloc_array(mp_grid, struct mp_tile_grid_entry,
929+
av_grid->nb_tiles);
930+
931+
for (int i = 0; i < av_grid->nb_tiles; i++) {
932+
unsigned int group_idx = av_grid->offsets[i].idx;
933+
if (group_idx >= stream_group->nb_streams) {
934+
MP_WARN(demuxer, "Tile %d references out-of-range group "
935+
"stream index %u (group has %u streams) – skipping.\n",
936+
i, group_idx, stream_group->nb_streams);
937+
continue;
938+
}
939+
940+
int ff_idx = stream_group->streams[group_idx]->index;
941+
942+
mp_grid->tiles[i].ff_index = ff_idx;
943+
mp_grid->tiles[i].horizontal = av_grid->offsets[i].horizontal;
944+
mp_grid->tiles[i].vertical = av_grid->offsets[i].vertical;
945+
946+
if (ff_idx >= 0 && ff_idx < priv->num_streams &&
947+
priv->streams[ff_idx])
948+
{
949+
struct sh_stream *sh = priv->streams[ff_idx]->sh;
950+
if (sh && sh->type == STREAM_VIDEO) {
951+
sh->tile_grid = mp_grid;
952+
} else {
953+
MP_WARN(demuxer, "Tile %u stream %d is not a video "
954+
"stream – ignoring tile grid for it.\n",
955+
i, ff_idx);
956+
}
957+
}
958+
}
959+
960+
// For a single-tile image the assembled canvas may be larger than
961+
// the display area due to codec alignment padding. Encode that as a
962+
// codec-level crop so mpv's normal pipeline trims it.
963+
if (mp_grid->nb_tiles == 1 &&
964+
(mp_grid->coded_width != mp_grid->width ||
965+
mp_grid->coded_height != mp_grid->height))
966+
{
967+
int ff_idx = stream_group->streams[av_grid->offsets[0].idx]->index;
968+
if (ff_idx >= 0 && ff_idx < priv->num_streams &&
969+
priv->streams[ff_idx])
970+
{
971+
struct sh_stream *sh = priv->streams[ff_idx]->sh;
972+
if (sh && sh->codec) {
973+
sh->codec->crop = (struct mp_rect){
974+
.x0 = mp_grid->horizontal_offset,
975+
.y0 = mp_grid->vertical_offset,
976+
.x1 = mp_grid->horizontal_offset + mp_grid->width,
977+
.y1 = mp_grid->vertical_offset + mp_grid->height,
978+
};
979+
}
980+
}
981+
}
982+
983+
MP_VERBOSE(demuxer,
984+
"Stream group %u: tile grid %d tile(s), "
985+
"display %dx%d, coded %dx%d, offset (%d,%d).\n",
986+
g, mp_grid->nb_tiles,
987+
mp_grid->width, mp_grid->height,
988+
mp_grid->coded_width, mp_grid->coded_height,
989+
mp_grid->horizontal_offset, mp_grid->vertical_offset);
990+
}
991+
#endif
992+
}
993+
903994
static void update_metadata(demuxer_t *demuxer)
904995
{
905996
lavf_priv_t *priv = demuxer->priv;
@@ -1140,6 +1231,8 @@ static int demux_open_lavf(demuxer_t *demuxer, enum demux_check check)
11401231

11411232
add_new_streams(demuxer);
11421233

1234+
handle_tile_grid_groups(demuxer);
1235+
11431236
mp_tags_move_from_av_dictionary(demuxer->metadata, &avfc->metadata);
11441237

11451238
demuxer->ts_resets_possible =

demux/stheader.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ struct sh_stream {
6565
// stream is a picture (such as album art)
6666
struct demux_packet *attached_picture;
6767

68+
// Metadata for tiled grid images.
69+
// All streams belonging to the same group share the same mp_tile_grid
70+
// object.
71+
struct mp_tile_grid *tile_grid;
72+
6873
// Internal to demux.c
6974
struct demux_stream *ds;
7075
};
@@ -142,4 +147,38 @@ struct mp_codec_params {
142147
double duration;
143148
};
144149

150+
struct mp_tile_grid {
151+
// Number of tiles.
152+
int nb_tiles;
153+
154+
// Dimensions after cropping.
155+
int width, height;
156+
157+
// Dimensions before cropping (union of all tile areas plus
158+
// any alignment padding on the right/bottom edges).
159+
int coded_width, coded_height;
160+
161+
// Top-left offset of the display rectangle within the coded canvas.
162+
// crop_right = coded_width - width - horizontal_offset
163+
// crop_bottom = coded_height - height - vertical_offset
164+
int horizontal_offset;
165+
int vertical_offset;
166+
167+
// Per-tile placement info, array of length nb_tiles.
168+
struct mp_tile_grid_entry *tiles;
169+
170+
// Background fill colour used outside tile boundaries (R,G,B,A bytes).
171+
uint8_t background[4];
172+
};
173+
174+
// Describes one tile's position within a tiled grid image.
175+
struct mp_tile_grid_entry {
176+
// Global AVFormatContext stream index (AVStream.index).
177+
// Used to find the matching track.
178+
int ff_index;
179+
// Top-left pixel position of this tile in the assembled image.
180+
int horizontal;
181+
int vertical;
182+
};
183+
145184
#endif /* MPLAYER_STHEADER_H */

player/loadfile.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,7 @@ static bool compare_track(struct track *t1, struct track *t2, char **langs, bool
504504
if (t1->image != t2->image)
505505
return !t1->image;
506506
if (t1->dependent_track != t2->dependent_track)
507-
return !t1->dependent_track;
507+
return t1->image ? t1->dependent_track : !t1->dependent_track;
508508
if (t1->stream && t2->stream && opts->hls_bitrate >= 0 &&
509509
t1->stream->hls_bitrate != t2->stream->hls_bitrate)
510510
{
@@ -706,7 +706,7 @@ void mp_switch_track_n(struct MPContext *mpctx, int order, enum stream_type type
706706
if (track == current)
707707
return;
708708

709-
if (current && current->sink) {
709+
if (current && current->sink && !current->stream->tile_grid) {
710710
MP_ERR(mpctx, "Can't disable input to complex filter.\n");
711711
goto error;
712712
}

player/video.c

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "sub/osd.h"
4040
#include "video/hwdec.h"
4141
#include "filters/f_decoder_wrapper.h"
42+
#include "filters/f_lavfi.h"
4243
#include "video/out/vo.h"
4344

4445
#include "core.h"
@@ -155,10 +156,34 @@ static void vo_chain_uninit(struct vo_chain *vo_c)
155156
// this does not free the VO
156157
}
157158

159+
static void uninit_grid(struct MPContext *mpctx)
160+
{
161+
struct track *primary = mpctx->vo_chain->track;
162+
if (!primary || !primary->stream || !primary->stream->tile_grid ||
163+
primary->stream->tile_grid->nb_tiles == 1)
164+
return;
165+
166+
struct mp_tile_grid *grid = primary->stream->tile_grid;
167+
for (int n = 0; n < mpctx->num_tracks; n++) {
168+
struct track *track = mpctx->tracks[n];
169+
if (!track->stream || track->stream->tile_grid != grid)
170+
continue;
171+
if (track->sink) {
172+
mp_pin_disconnect(track->sink);
173+
track->sink = NULL;
174+
}
175+
if (track != primary)
176+
track->dec = NULL;
177+
track->selected = false;
178+
reselect_demux_stream(mpctx, track, false);
179+
}
180+
}
181+
158182
void uninit_video_chain(struct MPContext *mpctx)
159183
{
160184
if (mpctx->vo_chain) {
161185
reset_video_state(mpctx);
186+
uninit_grid(mpctx);
162187
vo_chain_uninit(mpctx->vo_chain);
163188
mpctx->vo_chain = NULL;
164189

@@ -201,13 +226,167 @@ int init_video_decoder(struct MPContext *mpctx, struct track *track)
201226
return 0;
202227
}
203228

229+
static char *tile_grid_graph(void *ctx, const struct mp_tile_grid *grid)
230+
{
231+
struct bstr buf = {0};
232+
233+
for (int i = 0; i < grid->nb_tiles; i++)
234+
bstr_xappend_asprintf(ctx, &buf, "[in%d]", i);
235+
236+
bstr_xappend_asprintf(ctx, &buf, "xstack=inputs=%d:layout=", grid->nb_tiles);
237+
for (int i = 0; i < grid->nb_tiles; i++) {
238+
if (i > 0)
239+
bstr_xappend(ctx, &buf, bstr0("|"));
240+
bstr_xappend_asprintf(ctx, &buf, "%d_%d", grid->tiles[i].horizontal,
241+
grid->tiles[i].vertical);
242+
}
243+
bstr_xappend_asprintf(ctx, &buf,
244+
":fill=0x%02X%02X%02X@0x%02X",
245+
grid->background[0], grid->background[1],
246+
grid->background[2], grid->background[3]);
247+
248+
if (grid->coded_width != grid->width || grid->coded_height != grid->height) {
249+
bstr_xappend_asprintf(ctx, &buf, ",crop=w=%d:h=%d:x=%d:y=%d", grid->width,
250+
grid->height, grid->horizontal_offset, grid->vertical_offset);
251+
}
252+
253+
bstr_xappend(ctx, &buf, bstr0("[vo]"));
254+
return buf.start;
255+
}
256+
257+
static struct track *find_tile_track(struct MPContext *mpctx,
258+
const struct mp_tile_grid *tg, int tile_idx)
259+
{
260+
261+
int wanted_ff = tg->tiles[tile_idx].ff_index;
262+
for (int n = 0; n < mpctx->num_tracks; n++) {
263+
struct track *t = mpctx->tracks[n];
264+
if (t->ff_index == wanted_ff && t->stream && t->stream->tile_grid == tg)
265+
return t;
266+
}
267+
return NULL;
268+
}
269+
270+
static void reinit_video_chain_tiled(struct MPContext *mpctx, struct track *track)
271+
{
272+
struct mp_tile_grid *grid = track->stream->tile_grid;
273+
mp_assert(grid && grid->nb_tiles > 1);
274+
275+
for (int i = 0; i < grid->nb_tiles; i++) {
276+
struct track *t = find_tile_track(mpctx, grid, i);
277+
if (t) {
278+
t->selected = true;
279+
reselect_demux_stream(mpctx, t, false);
280+
}
281+
}
282+
283+
reinit_video_chain_src(mpctx, NULL);
284+
if (!mpctx->vo_chain)
285+
return;
286+
287+
struct vo_chain *vo_c = mpctx->vo_chain;
288+
289+
void *tmp = talloc_new(NULL);
290+
char *graph_str = tile_grid_graph(tmp, grid);
291+
MP_VERBOSE(mpctx, "Tile grid xstack graph: %s\n", graph_str);
292+
293+
struct mp_lavfi *lavfi =
294+
mp_lavfi_create_graph(vo_c->filter->f, 0, false, NULL, NULL, graph_str);
295+
talloc_free(tmp);
296+
297+
if (!lavfi) {
298+
MP_ERR(mpctx, "Failed to create tile grid filtergraph.\n");
299+
goto err_out;
300+
}
301+
302+
struct mp_filter *lavfi_f = lavfi->f;
303+
304+
struct mp_pin *out_pad = mp_filter_get_named_pin(lavfi_f, "vo");
305+
if (!out_pad || mp_pin_get_dir(out_pad) != MP_PIN_OUT) {
306+
MP_ERR(mpctx, "Tile grid filtergraph missing output pin 'vo'.\n");
307+
goto err_out;
308+
}
309+
vo_c->filter_src = out_pad;
310+
mp_pin_connect(vo_c->filter->f->pins[0], vo_c->filter_src);
311+
312+
for (int i = 0; i < grid->nb_tiles; i++) {
313+
struct track *tile_track = find_tile_track(mpctx, grid, i);
314+
if (!tile_track) {
315+
MP_ERR(mpctx, "No track found for tile %d (ff_index %d).\n",
316+
i, grid->tiles[i].ff_index);
317+
goto err_out;
318+
}
319+
320+
tile_track->vo_c = vo_c;
321+
bool result = init_video_decoder(mpctx, tile_track);
322+
// vo_chain_uninit() only unsets vo_c on the primary track
323+
// (vo_c->track).
324+
tile_track->vo_c = NULL;
325+
if (!result)
326+
goto err_out;
327+
328+
char label[16];
329+
snprintf(label, sizeof(label), "in%d", i);
330+
struct mp_pin *in_pad = mp_filter_get_named_pin(lavfi_f, label);
331+
if (!in_pad || mp_pin_get_dir(in_pad) != MP_PIN_IN) {
332+
MP_ERR(mpctx, "Tile grid filtergraph missing input pin '%s'.\n",
333+
label);
334+
goto err_out;
335+
}
336+
tile_track->sink = in_pad;
337+
mp_pin_connect(tile_track->sink, tile_track->dec->f->pins[0]);
338+
}
339+
340+
struct track *primary = find_tile_track(mpctx, grid, 0);
341+
vo_c->track = primary;
342+
primary->vo_c = vo_c;
343+
vo_c->filter->container_fps =
344+
mp_decoder_wrapper_get_container_fps(primary->dec);
345+
vo_c->is_coverart = !!primary->attached_picture;
346+
vo_c->is_sparse = primary->stream->still_image || vo_c->is_coverart;
347+
348+
if (vo_c->is_coverart)
349+
mp_decoder_wrapper_set_coverart_flag(track->dec, true);
350+
351+
MP_VERBOSE(mpctx, "Tile grid: assembling %d tile(s) into %dx%d image.\n",
352+
grid->nb_tiles, grid->width, grid->height);
353+
return;
354+
355+
err_out:
356+
uninit_video_chain(mpctx);
357+
error_on_track(mpctx, track);
358+
handle_force_window(mpctx, true);
359+
}
360+
204361
void reinit_video_chain(struct MPContext *mpctx)
205362
{
206363
struct track *track = mpctx->current_track[0][STREAM_VIDEO];
207364
if (!track || !track->stream) {
208365
error_on_track(mpctx, track);
209366
return;
210367
}
368+
369+
struct mp_tile_grid *grid = track->stream->tile_grid;
370+
if (grid && grid->nb_tiles > 1) {
371+
// Prevent crashing with heif-conformance/C021.heic in FATE.
372+
bool valid = true;
373+
for (int i = 0; i < grid->nb_tiles; i++) {
374+
if (grid->tiles[i].horizontal >= grid->coded_width ||
375+
grid->tiles[i].vertical >= grid->coded_height)
376+
{
377+
valid = false;
378+
break;
379+
}
380+
}
381+
if (valid) {
382+
reinit_video_chain_tiled(mpctx, track);
383+
return;
384+
}
385+
MP_WARN(mpctx, "Tile grid offsets exceed coded canvas (%dx%d) — "
386+
"ignoring tile grid and displaying primary stream only.\n",
387+
grid->coded_width, grid->coded_height);
388+
}
389+
211390
reinit_video_chain_src(mpctx, track);
212391
}
213392

0 commit comments

Comments
 (0)