Final doc touches before release (#650)

NicolasHug · NicolasHug · commit 484e2c8b3e58 · 2025-04-23T14:32:53.000+01:00
diff --git a/README.md b/README.md
@@ -2,10 +2,10 @@
 
 # TorchCodec
 
-TorchCodec is a Python library for decoding videos into PyTorch tensors, on CPU
-and CUDA GPU. It aims to be fast, easy to use, and well integrated into the
-PyTorch ecosystem. If you want to use PyTorch to train ML models on videos,
-TorchCodec is how you turn those videos into data.
+TorchCodec is a Python library for decoding video and audio data into PyTorch
+tensors, on CPU and CUDA GPU. It aims to be fast, easy to use, and well
+integrated into the PyTorch ecosystem. If you want to use PyTorch to train ML
+models on videos and audio, TorchCodec is how you turn these into data.
 
 We achieve these capabilities through:
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -57,13 +57,44 @@
     "sphinx_copybutton",
 ]
 
+
+class CustomGalleryExampleSortKey:
+    # This class defines the order in which our examples appear in
+    # https://pytorch.org/torchcodec/stable/generated_examples/index.html
+    # They would otherwise be sorted alphabetically.
+    #
+    # See https://sphinx-gallery.github.io/stable/configuration.html#sorting-gallery-examples
+    # and https://github.com/sphinx-gallery/sphinx-gallery/blob/master/sphinx_gallery/sorting.py
+    def __init__(self, src_dir):
+        self.src_dir = src_dir
+
+    order = [
+        "basic_example.py",
+        "audio_decoding.py",
+        "basic_cuda_example.py",
+        "file_like.py",
+        "approximate_mode.py",
+        "sampling.py",
+    ]
+
+    def __call__(self, filename):
+        try:
+            return self.order.index(filename)
+        except ValueError as e:
+            raise ValueError(
+                "Looks like you added an example in the examples/ folder?"
+                "You need to specify its order in docs/source/conf.py. Look for CustomGalleryExampleSortKey."
+            ) from e
+
+
 sphinx_gallery_conf = {
     "examples_dirs": "../../examples/",  # path to your example scripts
     "gallery_dirs": "generated_examples",  # path to where to save gallery generated output
     "filename_pattern": ".py",
     "backreferences_dir": "gen_modules/backreferences",
     "doc_module": ("torchcodec",),
     "remove_config_comments": True,
+    "within_subsection_order": CustomGalleryExampleSortKey,
 }
 
 # We override sphinx-gallery's example header to prevent sphinx-gallery from
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -1,10 +1,10 @@
 Welcome to the TorchCodec documentation!
 ========================================
 
-TorchCodec is a Python library for decoding videos into PyTorch tensors, on CPU
-and CUDA GPU. It aims to be fast, easy to use, and well integrated into the
-PyTorch ecosystem. If you want to use PyTorch to train ML models on videos,
-TorchCodec is how you turn those videos into data.
+TorchCodec is a Python library for decoding video and audio data into PyTorch
+tensors, on CPU and CUDA GPU. It aims to be fast, easy to use, and well
+integrated into the PyTorch ecosystem. If you want to use PyTorch to train ML
+models on videos and audio, TorchCodec is how you turn these into data.
 
 We achieve these capabilities through:
 
@@ -36,12 +36,12 @@ We achieve these capabilities through:
         A simple video decoding example
 
      .. grid-item-card:: :octicon:`file-code;1em`
-        Clip sampling
+        Audio Decoding
         :img-top: _static/img/card-background.svg
-        :link: generated_examples/sampling.html
+        :link: generated_examples/audio_decoding.html
         :link-type: url
 
-        How to sample regular and random clips from a video
+        A simple audio decoding example
 
      .. grid-item-card:: :octicon:`file-code;1em`
         GPU decoding
@@ -51,6 +51,22 @@ We achieve these capabilities through:
 
         A simple example demonstrating CUDA GPU decoding
 
+     .. grid-item-card:: :octicon:`file-code;1em`
+        Streaming video
+        :img-top: _static/img/card-background.svg
+        :link: generated_examples/file_like.html
+        :link-type: url
+
+        How to efficiently decode videos from the cloud
+
+     .. grid-item-card:: :octicon:`file-code;1em`
+        Clip sampling
+        :img-top: _static/img/card-background.svg
+        :link: generated_examples/sampling.html
+        :link-type: url
+
+        How to sample regular and random clips from a video
+
 .. note::
 
    TorchCodec is still in development stage and we are actively seeking
diff --git a/examples/basic_example.py b/examples/basic_example.py
@@ -93,6 +93,16 @@ def plot(frames: torch.Tensor, title : Optional[str] = None):
 # :class:`~torchcodec.decoders.VideoDecoder`. Frames are always of
 # ``torch.uint8`` dtype.
 #
+# .. note::
+#
+#     If you need to decode multiple frames, we recommend using the batch
+#     methods instead, since they are faster:
+#     :meth:`~torchcodec.decoders.VideoDecoder.get_frames_at`,
+#     :meth:`~torchcodec.decoders.VideoDecoder.get_frames_in_range`,
+#     :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_at`, and
+#     :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_in_range`. They
+#     are described below.
+
 
 plot(first_frame, "First frame")
 
diff --git a/src/torchcodec/decoders/_video_decoder.py b/src/torchcodec/decoders/_video_decoder.py
@@ -150,6 +150,15 @@ def _getitem_slice(self, key: slice) -> Tensor:
     def __getitem__(self, key: Union[numbers.Integral, slice]) -> Tensor:
         """Return frame or frames as tensors, at the given index or range.
 
+        .. note::
+
+            If you need to decode multiple frames, we recommend using the batch
+            methods instead, since they are faster:
+            :meth:`~torchcodec.decoders.VideoDecoder.get_frames_at`,
+            :meth:`~torchcodec.decoders.VideoDecoder.get_frames_in_range`,
+            :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_at`, and
+            :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_in_range`.
+
         Args:
             key(int or slice): The index or range of frame(s) to retrieve.
 
@@ -171,6 +180,15 @@ def _get_key_frame_indices(self) -> list[int]:
     def get_frame_at(self, index: int) -> Frame:
         """Return a single frame at the given index.
 
+        .. note::
+
+            If you need to decode multiple frames, we recommend using the batch
+            methods instead, since they are faster:
+            :meth:`~torchcodec.decoders.VideoDecoder.get_frames_at`,
+            :meth:`~torchcodec.decoders.VideoDecoder.get_frames_in_range`,
+            :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_at`,
+            :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_in_range`.
+
         Args:
             index (int): The index of the frame to retrieve.
 
@@ -194,13 +212,6 @@ def get_frame_at(self, index: int) -> Frame:
     def get_frames_at(self, indices: list[int]) -> FrameBatch:
         """Return frames at the given indices.
 
-        .. note::
-
-            Calling this method is more efficient that repeated individual calls
-            to :meth:`~torchcodec.decoders.VideoDecoder.get_frame_at`. This
-            method makes sure not to decode the same frame twice, and also
-            avoids "backwards seek" operations, which are slow.
-
         Args:
             indices (list of int): The indices of the frames to retrieve.
 
@@ -252,6 +263,15 @@ def get_frames_in_range(self, start: int, stop: int, step: int = 1) -> FrameBatc
     def get_frame_played_at(self, seconds: float) -> Frame:
         """Return a single frame played at the given timestamp in seconds.
 
+        .. note::
+
+            If you need to decode multiple frames, we recommend using the batch
+            methods instead, since they are faster:
+            :meth:`~torchcodec.decoders.VideoDecoder.get_frames_at`,
+            :meth:`~torchcodec.decoders.VideoDecoder.get_frames_in_range`,
+            :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_at`,
+            :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_in_range`.
+
         Args:
             seconds (float): The time stamp in seconds when the frame is played.
 
@@ -276,13 +296,6 @@ def get_frame_played_at(self, seconds: float) -> Frame:
     def get_frames_played_at(self, seconds: list[float]) -> FrameBatch:
         """Return frames played at the given timestamps in seconds.
 
-        .. note::
-
-            Calling this method is more efficient that repeated individual calls
-            to :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at`.
-            This method makes sure not to decode the same frame twice, and also
-            avoids "backwards seek" operations, which are slow.
-
         Args:
             seconds (list of float): The timestamps in seconds when the frames are played.