Skip to content

Refactor and add benchmarks #330

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 19 additions & 15 deletions benchmarks/decoders/benchmark_decoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
plot_data,
run_benchmarks,
TorchAudioDecoder,
TorchcodecCompiled,
TorchCodecNonCompiledBatch,
TorchcodecNonCompiledWithOptions,
TVNewAPIDecoderWithBackend,
TorchCodecCoreCompiled,
TorchCodecCoreBatch,
TorchCodecCore,
TorchCodecPublic,
TorchVision,
)


Expand Down Expand Up @@ -70,7 +71,7 @@ def main() -> None:
"For torchcodec, you can specify options with tcoptions:<plus-separated-options>. "
),
type=str,
default="decord,tcoptions:,torchvision,torchaudio,torchcodec_compiled,tcoptions:num_threads=1",
default="decord,tcoptions:,torchvision,torchaudio,torchcodec_compiled,torchcodec_public,tcoptions:num_threads=1,tcbatchoptions:",
)
parser.add_argument(
"--bm_video_dir",
Expand Down Expand Up @@ -98,14 +99,16 @@ def main() -> None:
DecordNonBatchDecoderAccurateSeek()
)
elif decoder == "torchcodec":
decoder_dict["TorchCodecNonCompiled"] = TorchcodecNonCompiledWithOptions()
decoder_dict["TorchCodecCore:"] = TorchCodecCore()
elif decoder == "torchcodec_compiled":
decoder_dict["TorchcodecCompiled"] = TorchcodecCompiled()
decoder_dict["TorchCodecCoreCompiled"] = TorchCodecCoreCompiled()
elif decoder == "torchcodec_public":
decoder_dict["TorchCodecPublic"] = TorchCodecPublic()
elif decoder == "torchvision":
decoder_dict["TVNewAPIDecoderWithBackendVideoReader"] = (
decoder_dict["TorchVision[backend=video_reader]"] = (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@NicolasHug can tell you about torchvision's APIs and what the name should be for this. This is a specific API -- that I have heard him say it's the new API

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The naming in torchvision is awful. Basially the VideoReader class and the "video_reader" backend are 2 orthogonal concepts with the same name.

There are 2 main Python decoding APIs.

  • read_video() in io.video.py.
  • VideoReader(). It is considered more "fine-grained" and the "new" API (although not that new at this point, it's just more recent).

Both APIs support pyav and "video_reader" backend where "video_reader" backend just means "torchvision cpu" backend. The VideoReader() also supposedly supports the "cuda" backend.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the benchmark, we're calling the VideoReader class, providing it with the "video_reader" backend. I think we should name the TorchVision decoder on the config option that makes the most difference for performance. I think that the backend would be more important here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I agree.

# We don't compare TorchVision's "pyav" backend because it doesn't support
# accurate seeks.
TVNewAPIDecoderWithBackend("video_reader")
TorchVision("video_reader")
)
elif decoder == "torchaudio":
decoder_dict["TorchAudioDecoder"] = TorchAudioDecoder()
Expand All @@ -117,8 +120,8 @@ def main() -> None:
continue
k, v = item.split("=")
kwargs_dict[k] = v
decoder_dict["TorchCodecNonCompiledBatch:" + options] = (
TorchCodecNonCompiledBatch(**kwargs_dict)
decoder_dict["TorchCodecCoreBatch" + options] = (
TorchCodecCoreBatch(**kwargs_dict)
)
elif decoder.startswith("tcoptions:"):
options = decoder[len("tcoptions:") :]
Expand All @@ -128,8 +131,8 @@ def main() -> None:
continue
k, v = item.split("=")
kwargs_dict[k] = v
decoder_dict["TorchcodecNonCompiled:" + options] = (
TorchcodecNonCompiledWithOptions(**kwargs_dict)
decoder_dict["TorchCodecCore:" + options] = (
TorchCodecCore(**kwargs_dict)
)
video_paths = args.bm_video_paths.split(",")
if args.bm_video_dir:
Expand All @@ -142,8 +145,9 @@ def main() -> None:
decoder_dict,
video_paths,
num_uniform_samples,
args.bm_video_speed_min_run_seconds,
args.bm_video_creation,
num_sequential_frames_from_start=[1, 10, 100],
min_runtime_seconds=args.bm_video_speed_min_run_seconds,
benchmark_video_creation=args.bm_video_creation,
)
plot_data(df_data, args.plot_path)

Expand Down
148 changes: 89 additions & 59 deletions benchmarks/decoders/benchmark_decoders_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import torch
import torch.utils.benchmark as benchmark
from torchcodec.decoders import VideoDecoder
from torchcodec.decoders import VideoDecoder, VideoStreamMetadata

from torchcodec.decoders._core import (
_add_video_stream,
Expand Down Expand Up @@ -78,7 +78,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
return frames


class TVNewAPIDecoderWithBackend(AbstractDecoder):
class TorchVision(AbstractDecoder):
def __init__(self, backend):
self._backend = backend
self._print_each_iteration_time = False
Expand Down Expand Up @@ -125,7 +125,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
return frames


class TorchcodecNonCompiledWithOptions(AbstractDecoder):
class TorchCodecCore(AbstractDecoder):
def __init__(self, num_threads=None, color_conversion_library=None, device="cpu"):
self._print_each_iteration_time = False
self._num_threads = int(num_threads) if num_threads else None
Expand Down Expand Up @@ -186,7 +186,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
return frames


class TorchCodecNonCompiledBatch(AbstractDecoder):
class TorchCodecCoreBatch(AbstractDecoder):
def __init__(self, num_threads=None, color_conversion_library=None):
self._print_each_iteration_time = False
self._num_threads = int(num_threads) if num_threads else None
Expand Down Expand Up @@ -227,6 +227,24 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
)
return frames

class TorchCodecPublic(AbstractDecoder):
def __init__(self, num_ffmpeg_threads=None):
self._num_ffmpeg_threads = int(num_ffmpeg_threads) if num_ffmpeg_threads else None

def get_frames_from_video(self, video_file, pts_list):
decoder = VideoDecoder(video_file, num_ffmpeg_threads=self._num_ffmpeg_threads)
return decoder.get_frames_played_at(pts_list)

def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
decoder = VideoDecoder(video_file, num_ffmpeg_threads=self._num_ffmpeg_threads)
frames = []
count = 0
for frame in decoder:
frames.append(frame)
count += 1
if count == numFramesToDecode:
break
return frames

@torch.compile(fullgraph=True, backend="eager")
def compiled_seek_and_next(decoder, pts):
Expand All @@ -239,7 +257,7 @@ def compiled_next(decoder):
return get_next_frame(decoder)


class TorchcodecCompiled(AbstractDecoder):
class TorchCodecCoreCompiled(AbstractDecoder):
def __init__(self):
pass

Expand Down Expand Up @@ -450,70 +468,83 @@ def plot_data(df_data, plot_path):
plot_path,
)

def get_metadata(video_file_path: str) -> VideoStreamMetadata:
return VideoDecoder(video_file_path).metadata

def run_benchmarks(
decoder_dict,
video_files_paths,
num_uniform_samples,
min_runtime_seconds,
benchmark_video_creation,
decoder_dict: dict[str, AbstractDecoder],
video_files_paths: list[str],
num_samples: int,
num_sequential_frames_from_start: list[int],
min_runtime_seconds: float,
benchmark_video_creation: bool,
) -> list[dict[str, str | float | int]]:
# Ensure that we have the same seed across benchmark runs.
torch.manual_seed(0)

print(f"video_files_paths={video_files_paths}")

results = []
df_data = []
print(f"video_files_paths={video_files_paths}")
verbose = False
for decoder_name, decoder in decoder_dict.items():
for video_file_path in video_files_paths:
for video_file_path in video_files_paths:
metadata = get_metadata(video_file_path)
metadata_label = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps"

duration = metadata.duration_seconds
uniform_pts_list = [
i * duration / num_samples for i in range(num_samples)
]

# Note that we are using the same random pts values for all decoders for the same
# video. However, because we use the duration as part of this calculation, we
# are using different random pts values across videos.
random_pts_list = (torch.rand(num_samples) * duration).tolist()

for decoder_name, decoder in decoder_dict.items():
print(f"video={video_file_path}, decoder={decoder_name}")
# We only use the VideoDecoder to get the metadata and get
# the list of PTS values to seek to.
simple_decoder = VideoDecoder(video_file_path)
duration = simple_decoder.metadata.duration_seconds
pts_list = [
i * duration / num_uniform_samples for i in range(num_uniform_samples)
]
metadata = simple_decoder.metadata
metadata_string = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps"
if verbose:
print(
f"video={video_file_path}, decoder={decoder_name}, pts_list={pts_list}"

for kind, pts_list in [("uniform", uniform_pts_list), ("random", random_pts_list)]:
if verbose:
print(
f"video={video_file_path}, decoder={decoder_name}, pts_list={pts_list}"
)
seeked_result = benchmark.Timer(
stmt="decoder.get_frames_from_video(video_file, pts_list)",
globals={
"video_file": video_file_path,
"pts_list": pts_list,
"decoder": decoder,
},
label=f"video={video_file_path} {metadata_label}",
sub_label=decoder_name,
description=f"{kind} {num_samples} seek()+next()",
)
seeked_result = benchmark.Timer(
stmt="decoder.get_frames_from_video(video_file, pts_list)",
globals={
"video_file": video_file_path,
"pts_list": pts_list,
"decoder": decoder,
},
label=f"video={video_file_path} {metadata_string}",
sub_label=decoder_name,
description=f"{num_uniform_samples} seek()+next()",
)
results.append(
seeked_result.blocked_autorange(min_run_time=min_runtime_seconds)
)
df_item = {}
df_item["decoder"] = decoder_name
df_item["video"] = video_file_path
df_item["description"] = results[-1].description
df_item["frame_count"] = num_uniform_samples
df_item["median"] = results[-1].median
df_item["iqr"] = results[-1].iqr
df_item["type"] = "seek()+next()"
df_item["fps"] = 1.0 * num_uniform_samples / results[-1].median
df_item["fps_p75"] = 1.0 * num_uniform_samples / results[-1]._p75
df_item["fps_p25"] = 1.0 * num_uniform_samples / results[-1]._p25
df_data.append(df_item)

for num_consecutive_nexts in [1, 10]:
results.append(
seeked_result.blocked_autorange(min_run_time=min_runtime_seconds)
)
df_item = {}
df_item["decoder"] = decoder_name
df_item["video"] = video_file_path
df_item["description"] = results[-1].description
df_item["frame_count"] = num_samples
df_item["median"] = results[-1].median
df_item["iqr"] = results[-1].iqr
df_item["type"] = f"{kind}:seek()+next()"
df_item["fps"] = 1.0 * num_samples / results[-1].median
df_item["fps_p75"] = 1.0 * num_samples / results[-1]._p75
df_item["fps_p25"] = 1.0 * num_samples / results[-1]._p25
df_data.append(df_item)

for num_consecutive_nexts in num_sequential_frames_from_start:
consecutive_frames_result = benchmark.Timer(
stmt="decoder.get_consecutive_frames_from_video(video_file, consecutive_frames_to_extract)",
globals={
"video_file": video_file_path,
"consecutive_frames_to_extract": num_consecutive_nexts,
"decoder": decoder,
},
label=f"video={video_file_path} {metadata_string}",
label=f"video={video_file_path} {metadata_label}",
sub_label=decoder_name,
description=f"{num_consecutive_nexts} next()",
)
Expand All @@ -537,17 +568,16 @@ def run_benchmarks(

first_video_file_path = video_files_paths[0]
if benchmark_video_creation:
simple_decoder = VideoDecoder(first_video_file_path)
metadata = simple_decoder.metadata
metadata_string = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps"
metadata = get_metadata(video_file_path)
metadata_label = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps"
creation_result = benchmark.Timer(
stmt="create_torchcodec_decoder_from_file(video_file)",
globals={
"video_file": first_video_file_path,
"create_torchcodec_decoder_from_file": create_torchcodec_decoder_from_file,
},
label=f"video={first_video_file_path} {metadata_string}",
sub_label="TorchcodecNonCompiled",
label=f"video={first_video_file_path} {metadata_label}",
sub_label="TorchCodecCore:",
description="create()+next()",
)
results.append(
Expand Down
Binary file modified benchmarks/decoders/benchmark_readme_chart.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Loading