From 2988f31dc83bfa16a59c6015fb1c761166322c7b Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 15 May 2025 11:17:25 +0100 Subject: [PATCH] WIP --- src/torchcodec/_core/_metadata.py | 14 +++++++++++++- test/test_decoders.py | 25 +++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/torchcodec/_core/_metadata.py b/src/torchcodec/_core/_metadata.py index 93c4448c..f2cb6f14 100644 --- a/src/torchcodec/_core/_metadata.py +++ b/src/torchcodec/_core/_metadata.py @@ -46,7 +46,16 @@ def __repr__(self): @dataclass class VideoStreamMetadata(StreamMetadata): - """Metadata of a single video stream.""" + """Metadata of a single video stream. + + .. note:: + In general the ``*from_content`` metadata are more accurate than their + ``*from_header`` counterparts, since headers may sometime be incorrect. + In some rare cases however, e.g. when a video is incorrectly encoded + with wrong packet :term:`pts` values, it's possible for the + ``*from_content`` metadata to be misleading. + + """ begin_stream_seconds_from_content: Optional[float] """Beginning of the stream, in seconds (float or None). @@ -142,6 +151,9 @@ def average_fps(self) -> Optional[float]: self.end_stream_seconds_from_content is None or self.begin_stream_seconds_from_content is None or self.num_frames is None + # Avoid ZeroDivisionError + or self.end_stream_seconds_from_content + == self.begin_stream_seconds_from_content ): return self.average_fps_from_header return self.num_frames / ( diff --git a/test/test_decoders.py b/test/test_decoders.py index ddd35ff3..0f643e38 100644 --- a/test/test_decoders.py +++ b/test/test_decoders.py @@ -986,6 +986,31 @@ def get_some_frames(decoder): assert_frames_equal(ref_frame3, frames[1].data) assert_frames_equal(ref_frame5, frames[2].data) + def test_video_with_bad_packet_pts_metadata(self): + path = "/home/nicolashug/Downloads/sample_video_2.avi" + + approx_metadata = VideoDecoder(path, seek_mode="approximate").metadata + exact_metadata = VideoDecoder(path, seek_mode="exact").metadata + + INT64_MIN_SECONDS = -3.0830212936561926e17 + + assert approx_metadata.duration_seconds_from_header == 9.02507 + assert approx_metadata.duration_seconds == 9.02507 + assert exact_metadata.duration_seconds_from_header == 9.02507 + assert exact_metadata.duration_seconds == 0 + + assert approx_metadata.begin_stream_seconds_from_header == 0 + assert approx_metadata.begin_stream_seconds_from_content is None + assert approx_metadata.begin_stream_seconds == 0 + assert exact_metadata.begin_stream_seconds_from_header == 0 + assert exact_metadata.begin_stream_seconds_from_content == INT64_MIN_SECONDS + assert exact_metadata.begin_stream_seconds == INT64_MIN_SECONDS + + assert approx_metadata.end_stream_seconds_from_content is None + assert approx_metadata.end_stream_seconds == 9.02507 + assert exact_metadata.end_stream_seconds_from_content == INT64_MIN_SECONDS + assert exact_metadata.end_stream_seconds == INT64_MIN_SECONDS + class TestAudioDecoder: @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3, SINE_MONO_S32))