Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 71 additions & 58 deletions src/FFmpegReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,14 @@ FFmpegReader::FFmpegReader(const std::string &path, bool inspect_reader)
: FFmpegReader(path, DurationStrategy::VideoPreferred, inspect_reader) {}

FFmpegReader::FFmpegReader(const std::string &path, DurationStrategy duration_strategy, bool inspect_reader)
: last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999),
path(path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false),
seek_audio_frame_found(0), seek_video_frame_found(0),
last_seek_max_frame(-1), seek_stagnant_count(0),
is_duration_known(false), largest_frame_processed(0),
current_video_frame(0), packet(NULL), duration_strategy(duration_strategy),
audio_pts(0), video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0},
hold_packet(false) {
: path(path), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
pStream(NULL), aStream(NULL), packet(NULL), pFrame(NULL), is_open(false), is_duration_known(false),
check_interlace(false), check_fps(false), duration_strategy(duration_strategy), previous_packet_location{-1, 0},
is_seeking(false), seeking_pts(0), seeking_frame(0), is_video_seek(true), seek_count(0),
seek_audio_frame_found(0), seek_video_frame_found(0), last_seek_max_frame(-1), seek_stagnant_count(0),
last_frame(0), largest_frame_processed(0), current_video_frame(0), audio_pts(0), video_pts(0),
hold_packet(false), pts_offset_seconds(0.0), audio_pts_seconds(0.0), video_pts_seconds(0.0),
NO_PTS_OFFSET(-99999), enable_seek(true) {

// Initialize FFMpeg, and register all formats and codecs
AV_REGISTER_ALL
Expand Down Expand Up @@ -681,43 +680,35 @@ void FFmpegReader::Open() {
for (unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
AVStream* st = pFormatCtx->streams[i];
if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
// Only inspect the first video stream
for (int j = 0; j < st->nb_side_data; j++) {
AVPacketSideData *sd = &st->side_data[j];

// Handle rotation metadata (unchanged)
if (sd->type == AV_PKT_DATA_DISPLAYMATRIX &&
sd->size >= 9 * sizeof(int32_t) &&
!info.metadata.count("rotate"))
{
double rotation = -av_display_rotation_get(
reinterpret_cast<int32_t *>(sd->data));
if (std::isnan(rotation)) rotation = 0;
info.metadata["rotate"] = std::to_string(rotation);
}
// Handle spherical video metadata
else if (sd->type == AV_PKT_DATA_SPHERICAL) {
// Always mark as spherical
info.metadata["spherical"] = "1";

// Cast the raw bytes to an AVSphericalMapping
const AVSphericalMapping* map =
reinterpret_cast<const AVSphericalMapping*>(sd->data);

// Projection enum → string
const char* proj_name = av_spherical_projection_name(map->projection);
info.metadata["spherical_projection"] = proj_name
? proj_name
: "unknown";

// Convert 16.16 fixed-point to float degrees
auto to_deg = [](int32_t v){
return (double)v / 65536.0;
};
info.metadata["spherical_yaw"] = std::to_string(to_deg(map->yaw));
info.metadata["spherical_pitch"] = std::to_string(to_deg(map->pitch));
info.metadata["spherical_roll"] = std::to_string(to_deg(map->roll));
}
size_t side_data_size = 0;
const uint8_t *displaymatrix = ffmpeg_stream_get_side_data(
st, AV_PKT_DATA_DISPLAYMATRIX, &side_data_size);
if (displaymatrix &&
side_data_size >= 9 * sizeof(int32_t) &&
!info.metadata.count("rotate")) {
double rotation = -av_display_rotation_get(
reinterpret_cast<const int32_t *>(displaymatrix));
if (std::isnan(rotation))
rotation = 0;
info.metadata["rotate"] = std::to_string(rotation);
}

const uint8_t *spherical = ffmpeg_stream_get_side_data(
st, AV_PKT_DATA_SPHERICAL, &side_data_size);
if (spherical && side_data_size >= sizeof(AVSphericalMapping)) {
info.metadata["spherical"] = "1";

const AVSphericalMapping *map =
reinterpret_cast<const AVSphericalMapping *>(spherical);
const char *proj_name = av_spherical_projection_name(map->projection);
info.metadata["spherical_projection"] = proj_name ? proj_name : "unknown";

auto to_deg = [](int32_t v) {
return static_cast<double>(v) / 65536.0;
};
info.metadata["spherical_yaw"] = std::to_string(to_deg(map->yaw));
info.metadata["spherical_pitch"] = std::to_string(to_deg(map->pitch));
info.metadata["spherical_roll"] = std::to_string(to_deg(map->roll));
}
break;
}
Expand Down Expand Up @@ -924,7 +915,7 @@ void FFmpegReader::ApplyDurationStrategy() {
}

void FFmpegReader::UpdateAudioInfo() {
const int codec_channels =
int codec_channels =
#if HAVE_CH_LAYOUT
AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.nb_channels;
#else
Expand All @@ -933,9 +924,13 @@ void FFmpegReader::UpdateAudioInfo() {

// Set default audio channel layout (if needed)
#if HAVE_CH_LAYOUT
if (codec_channels > 0 &&
!av_channel_layout_check(&(AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout)))
AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout = (AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO;
AVChannelLayout audio_ch_layout = ffmpeg_get_valid_channel_layout(
AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout, codec_channels);
if (audio_ch_layout.nb_channels > 0) {
av_channel_layout_uninit(&(AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout));
av_channel_layout_copy(&(AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout), &audio_ch_layout);
codec_channels = audio_ch_layout.nb_channels;
}
#else
if (codec_channels > 0 && AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout == 0)
AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout = av_get_default_channel_layout(AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels);
Expand All @@ -956,8 +951,8 @@ void FFmpegReader::UpdateAudioInfo() {
info.file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
info.acodec = aCodecCtx->codec->name;
#if HAVE_CH_LAYOUT
info.channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.nb_channels;
info.channel_layout = (ChannelLayout) AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.u.mask;
info.channels = audio_ch_layout.nb_channels;
info.channel_layout = static_cast<ChannelLayout>(ffmpeg_channel_layout_mask(audio_ch_layout));
#else
info.channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels;
info.channel_layout = (ChannelLayout) AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout;
Expand Down Expand Up @@ -1028,6 +1023,9 @@ void FFmpegReader::UpdateAudioInfo() {
QString str_value = tag->value;
info.metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
}
#if HAVE_CH_LAYOUT
av_channel_layout_uninit(&audio_ch_layout);
#endif
}

void FFmpegReader::UpdateVideoInfo() {
Expand Down Expand Up @@ -1874,8 +1872,6 @@ void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
int src_height = (pFrame && pFrame->height > 0) ? pFrame->height : info.height;
int height = src_height;
int width = src_width;
int64_t video_length = info.video_length;

// Create or reuse a RGB Frame (since most videos are not in RGB, we must convert it)
AVFrame *pFrameRGB = pFrameRGB_cached;
if (!pFrameRGB) {
Expand Down Expand Up @@ -2204,8 +2200,12 @@ void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
if (!avr) {
avr = SWR_ALLOC();
#if HAVE_CH_LAYOUT
av_opt_set_chlayout(avr, "in_chlayout", &AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout, 0);
av_opt_set_chlayout(avr, "out_chlayout", &AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout, 0);
AVChannelLayout input_layout = ffmpeg_get_valid_channel_layout(
AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout, info.channels);
AVChannelLayout output_layout = ffmpeg_get_valid_channel_layout(
input_layout, info.channels);
int in_layout_err = av_opt_set_chlayout(avr, "in_chlayout", &input_layout, 0);
int out_layout_err = av_opt_set_chlayout(avr, "out_chlayout", &output_layout, 0);
#else
av_opt_set_int(avr, "in_channel_layout", AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout, 0);
av_opt_set_int(avr, "out_channel_layout", AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout, 0);
Expand All @@ -2216,8 +2216,21 @@ void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
av_opt_set_int(avr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
av_opt_set_int(avr, "in_sample_rate", info.sample_rate, 0);
av_opt_set_int(avr, "out_sample_rate", info.sample_rate, 0);
SWR_INIT(avr);
avr_ctx = avr;
int swr_init_err = SWR_INIT(avr);
#if HAVE_CH_LAYOUT
av_channel_layout_uninit(&input_layout);
av_channel_layout_uninit(&output_layout);
if (in_layout_err < 0 || out_layout_err < 0 || swr_init_err < 0) {
SWR_FREE(&avr);
throw InvalidChannels("Could not initialize FFmpeg audio channel layout or resampler.", path);
}
#else
if (swr_init_err < 0) {
SWR_FREE(&avr);
throw InvalidChannels("Could not initialize FFmpeg audio resampler.", path);
}
#endif
avr_ctx = avr;
}

// Convert audio samples
Expand Down
119 changes: 111 additions & 8 deletions src/FFmpegUtilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,122 @@ inline static const std::string av_err2string(int errnum)
#ifndef PIX_FMT_YUV444P
#define PIX_FMT_YUV444P AV_PIX_FMT_YUV444P
#endif
#ifndef AV_PROFILE_H264_BASELINE
#ifdef FF_PROFILE_H264_BASELINE
#define AV_PROFILE_H264_BASELINE FF_PROFILE_H264_BASELINE
#endif
#endif
#ifndef AV_PROFILE_H264_CONSTRAINED
#ifdef FF_PROFILE_H264_CONSTRAINED
#define AV_PROFILE_H264_CONSTRAINED FF_PROFILE_H264_CONSTRAINED
#endif
#endif
#ifndef AV_PROFILE_H264_CONSTRAINED_BASELINE
#if defined(AV_PROFILE_H264_BASELINE) && defined(AV_PROFILE_H264_CONSTRAINED)
#define AV_PROFILE_H264_CONSTRAINED_BASELINE (AV_PROFILE_H264_BASELINE | AV_PROFILE_H264_CONSTRAINED)
#elif defined(FF_PROFILE_H264_CONSTRAINED_BASELINE)
#define AV_PROFILE_H264_CONSTRAINED_BASELINE FF_PROFILE_H264_CONSTRAINED_BASELINE
#endif
#endif

// Does ffmpeg pixel format contain an alpha channel?
inline static bool ffmpeg_has_alpha(PixelFormat pix_fmt) {
const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(pix_fmt);
return bool(fmt_desc->flags & AV_PIX_FMT_FLAG_ALPHA);
}

// Allocate and populate an AVCodecContext from stream codec parameters.
inline static AVCodecContext* ffmpeg_get_codec_context(AVStream *av_stream, const AVCodec *av_codec) {
AVCodecContext *context = avcodec_alloc_context3(av_codec);
if (context)
avcodec_parameters_to_context(context, av_stream->codecpar);
return context;
}

#if HAVE_CH_LAYOUT
inline static AVChannelLayout ffmpeg_default_channel_layout(int channels) {
AVChannelLayout layout = {};
if (channels > 0)
av_channel_layout_default(&layout, channels);
return layout;
}

inline static AVChannelLayout ffmpeg_get_valid_channel_layout(
const AVChannelLayout &layout, int fallback_channels) {
AVChannelLayout normalized = {};
if (layout.nb_channels > 0 && av_channel_layout_check(&layout) == 1) {
av_channel_layout_copy(&normalized, &layout);
return normalized;
}
return ffmpeg_default_channel_layout(fallback_channels);
}

inline static uint64_t ffmpeg_channel_layout_mask(const AVChannelLayout &layout) {
return layout.order == AV_CHANNEL_ORDER_NATIVE ? layout.u.mask : 0;
}
#endif

// Access stream side data across FFmpeg API changes.
inline static const uint8_t* ffmpeg_stream_get_side_data(
const AVStream *stream, enum AVPacketSideDataType type, size_t *size) {
#if (LIBAVFORMAT_VERSION_MAJOR >= 61)
const AVPacketSideData *side_data = av_packet_side_data_get(
stream->codecpar->coded_side_data,
stream->codecpar->nb_coded_side_data,
type);
if (!side_data) {
if (size)
*size = 0;
return nullptr;
}
if (size)
*size = side_data->size;
return side_data->data;
#else
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
#if (LIBAVFORMAT_VERSION_MAJOR >= 59)
const uint8_t *data = av_stream_get_side_data(stream, type, size);
#else
int old_size = 0;
const uint8_t *data = av_stream_get_side_data(stream, type, &old_size);
if (size)
*size = old_size >= 0 ? static_cast<size_t>(old_size) : 0;
#endif
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
return data;
#endif
}

// Add stream side data across FFmpeg API changes.
inline static int ffmpeg_stream_add_side_data(
AVStream *stream, enum AVPacketSideDataType type, uint8_t *data, size_t size) {
#if (LIBAVFORMAT_VERSION_MAJOR >= 61)
AVPacketSideData *side_data = av_packet_side_data_add(
&stream->codecpar->coded_side_data,
&stream->codecpar->nb_coded_side_data,
type,
data,
size,
0);
return side_data ? 0 : AVERROR(ENOMEM);
#else
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
int result = av_stream_add_side_data(stream, type, data, size);
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
return result;
#endif
}

// FFmpeg's libavutil/common.h defines an RSHIFT incompatible with Ruby's
// definition in ruby/config.h, so we move it to FF_RSHIFT
#ifdef RSHIFT
Expand Down Expand Up @@ -171,10 +280,7 @@ inline static bool ffmpeg_has_alpha(PixelFormat pix_fmt) {
#define AV_FREE_CONTEXT(av_context) avcodec_free_context(&av_context)
#define AV_GET_CODEC_TYPE(av_stream) av_stream->codecpar->codec_type
#define AV_FIND_DECODER_CODEC_ID(av_stream) av_stream->codecpar->codec_id
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec) \
({ AVCodecContext *context = avcodec_alloc_context3(av_codec); \
avcodec_parameters_to_context(context, av_stream->codecpar); \
context; })
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec) ffmpeg_get_codec_context(av_stream, av_codec)
#define AV_GET_CODEC_PAR_CONTEXT(av_stream, av_codec) av_codec;
#define AV_GET_CODEC_FROM_STREAM(av_stream,codec_in)
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context) av_stream->codecpar
Expand Down Expand Up @@ -209,10 +315,7 @@ inline static bool ffmpeg_has_alpha(PixelFormat pix_fmt) {
#define AV_FREE_CONTEXT(av_context) avcodec_free_context(&av_context)
#define AV_GET_CODEC_TYPE(av_stream) av_stream->codecpar->codec_type
#define AV_FIND_DECODER_CODEC_ID(av_stream) av_stream->codecpar->codec_id
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec) \
({ AVCodecContext *context = avcodec_alloc_context3(av_codec); \
avcodec_parameters_to_context(context, av_stream->codecpar); \
context; })
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec) ffmpeg_get_codec_context(av_stream, av_codec)
#define AV_GET_CODEC_PAR_CONTEXT(av_stream, av_codec) av_codec;
#define AV_GET_CODEC_FROM_STREAM(av_stream,codec_in)
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context) av_stream->codecpar
Expand Down
5 changes: 3 additions & 2 deletions src/FFmpegWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1510,7 +1510,7 @@ void FFmpegWriter::open_video(AVFormatContext *oc, AVStream *st) {
switch (video_codec_ctx->codec_id) {
case AV_CODEC_ID_H264:
video_codec_ctx->max_b_frames = 0; // At least this GPU doesn't support b-frames
video_codec_ctx->profile = FF_PROFILE_H264_BASELINE | FF_PROFILE_H264_CONSTRAINED;
video_codec_ctx->profile = AV_PROFILE_H264_CONSTRAINED_BASELINE;
av_opt_set(video_codec_ctx->priv_data, "preset", "slow", 0);
av_opt_set(video_codec_ctx->priv_data, "tune", "zerolatency", 0);
av_opt_set(video_codec_ctx->priv_data, "vprofile", "baseline", AV_OPT_SEARCH_CHILDREN);
Expand Down Expand Up @@ -2400,6 +2400,7 @@ void FFmpegWriter::AddSphericalMetadata(const std::string& projection, float yaw
map->pitch = static_cast<int32_t>(pitch_deg * (1 << 16));
map->roll = static_cast<int32_t>(roll_deg * (1 << 16));

av_stream_add_side_data(video_st, AV_PKT_DATA_SPHERICAL, reinterpret_cast<uint8_t*>(map), sd_size);
ffmpeg_stream_add_side_data(video_st, AV_PKT_DATA_SPHERICAL,
reinterpret_cast<uint8_t*>(map), sd_size);
#endif
}
Loading
Loading