Skip to content

Commit 6028420

Browse files
authored
Refactor and add benchmarks (pytorch#330)
1 parent b979201 commit 6028420

5 files changed

+211
-176
lines changed

benchmarks/decoders/benchmark_decoders.py

+19-15
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@
1414
plot_data,
1515
run_benchmarks,
1616
TorchAudioDecoder,
17-
TorchcodecCompiled,
18-
TorchCodecNonCompiledBatch,
19-
TorchcodecNonCompiledWithOptions,
20-
TVNewAPIDecoderWithBackend,
17+
TorchCodecCoreCompiled,
18+
TorchCodecCoreBatch,
19+
TorchCodecCore,
20+
TorchCodecPublic,
21+
TorchVision,
2122
)
2223

2324

@@ -70,7 +71,7 @@ def main() -> None:
7071
"For torchcodec, you can specify options with tcoptions:<plus-separated-options>. "
7172
),
7273
type=str,
73-
default="decord,tcoptions:,torchvision,torchaudio,torchcodec_compiled,tcoptions:num_threads=1",
74+
default="decord,tcoptions:,torchvision,torchaudio,torchcodec_compiled,torchcodec_public,tcoptions:num_threads=1,tcbatchoptions:",
7475
)
7576
parser.add_argument(
7677
"--bm_video_dir",
@@ -98,14 +99,16 @@ def main() -> None:
9899
DecordNonBatchDecoderAccurateSeek()
99100
)
100101
elif decoder == "torchcodec":
101-
decoder_dict["TorchCodecNonCompiled"] = TorchcodecNonCompiledWithOptions()
102+
decoder_dict["TorchCodecCore:"] = TorchCodecCore()
102103
elif decoder == "torchcodec_compiled":
103-
decoder_dict["TorchcodecCompiled"] = TorchcodecCompiled()
104+
decoder_dict["TorchCodecCoreCompiled"] = TorchCodecCoreCompiled()
105+
elif decoder == "torchcodec_public":
106+
decoder_dict["TorchCodecPublic"] = TorchCodecPublic()
104107
elif decoder == "torchvision":
105-
decoder_dict["TVNewAPIDecoderWithBackendVideoReader"] = (
108+
decoder_dict["TorchVision[backend=video_reader]"] = (
106109
# We don't compare TorchVision's "pyav" backend because it doesn't support
107110
# accurate seeks.
108-
TVNewAPIDecoderWithBackend("video_reader")
111+
TorchVision("video_reader")
109112
)
110113
elif decoder == "torchaudio":
111114
decoder_dict["TorchAudioDecoder"] = TorchAudioDecoder()
@@ -117,8 +120,8 @@ def main() -> None:
117120
continue
118121
k, v = item.split("=")
119122
kwargs_dict[k] = v
120-
decoder_dict["TorchCodecNonCompiledBatch:" + options] = (
121-
TorchCodecNonCompiledBatch(**kwargs_dict)
123+
decoder_dict["TorchCodecCoreBatch" + options] = (
124+
TorchCodecCoreBatch(**kwargs_dict)
122125
)
123126
elif decoder.startswith("tcoptions:"):
124127
options = decoder[len("tcoptions:") :]
@@ -128,8 +131,8 @@ def main() -> None:
128131
continue
129132
k, v = item.split("=")
130133
kwargs_dict[k] = v
131-
decoder_dict["TorchcodecNonCompiled:" + options] = (
132-
TorchcodecNonCompiledWithOptions(**kwargs_dict)
134+
decoder_dict["TorchCodecCore:" + options] = (
135+
TorchCodecCore(**kwargs_dict)
133136
)
134137
video_paths = args.bm_video_paths.split(",")
135138
if args.bm_video_dir:
@@ -142,8 +145,9 @@ def main() -> None:
142145
decoder_dict,
143146
video_paths,
144147
num_uniform_samples,
145-
args.bm_video_speed_min_run_seconds,
146-
args.bm_video_creation,
148+
num_sequential_frames_from_start=[1, 10, 100],
149+
min_runtime_seconds=args.bm_video_speed_min_run_seconds,
150+
benchmark_video_creation=args.bm_video_creation,
147151
)
148152
plot_data(df_data, args.plot_path)
149153

benchmarks/decoders/benchmark_decoders_library.py

+89-59
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
import torch
1414
import torch.utils.benchmark as benchmark
15-
from torchcodec.decoders import VideoDecoder
15+
from torchcodec.decoders import VideoDecoder, VideoStreamMetadata
1616

1717
from torchcodec.decoders._core import (
1818
_add_video_stream,
@@ -78,7 +78,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
7878
return frames
7979

8080

81-
class TVNewAPIDecoderWithBackend(AbstractDecoder):
81+
class TorchVision(AbstractDecoder):
8282
def __init__(self, backend):
8383
self._backend = backend
8484
self._print_each_iteration_time = False
@@ -125,7 +125,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
125125
return frames
126126

127127

128-
class TorchcodecNonCompiledWithOptions(AbstractDecoder):
128+
class TorchCodecCore(AbstractDecoder):
129129
def __init__(self, num_threads=None, color_conversion_library=None, device="cpu"):
130130
self._print_each_iteration_time = False
131131
self._num_threads = int(num_threads) if num_threads else None
@@ -186,7 +186,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
186186
return frames
187187

188188

189-
class TorchCodecNonCompiledBatch(AbstractDecoder):
189+
class TorchCodecCoreBatch(AbstractDecoder):
190190
def __init__(self, num_threads=None, color_conversion_library=None):
191191
self._print_each_iteration_time = False
192192
self._num_threads = int(num_threads) if num_threads else None
@@ -227,6 +227,24 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
227227
)
228228
return frames
229229

230+
class TorchCodecPublic(AbstractDecoder):
231+
def __init__(self, num_ffmpeg_threads=None):
232+
self._num_ffmpeg_threads = int(num_ffmpeg_threads) if num_ffmpeg_threads else None
233+
234+
def get_frames_from_video(self, video_file, pts_list):
235+
decoder = VideoDecoder(video_file, num_ffmpeg_threads=self._num_ffmpeg_threads)
236+
return decoder.get_frames_played_at(pts_list)
237+
238+
def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
239+
decoder = VideoDecoder(video_file, num_ffmpeg_threads=self._num_ffmpeg_threads)
240+
frames = []
241+
count = 0
242+
for frame in decoder:
243+
frames.append(frame)
244+
count += 1
245+
if count == numFramesToDecode:
246+
break
247+
return frames
230248

231249
@torch.compile(fullgraph=True, backend="eager")
232250
def compiled_seek_and_next(decoder, pts):
@@ -239,7 +257,7 @@ def compiled_next(decoder):
239257
return get_next_frame(decoder)
240258

241259

242-
class TorchcodecCompiled(AbstractDecoder):
260+
class TorchCodecCoreCompiled(AbstractDecoder):
243261
def __init__(self):
244262
pass
245263

@@ -450,70 +468,83 @@ def plot_data(df_data, plot_path):
450468
plot_path,
451469
)
452470

471+
def get_metadata(video_file_path: str) -> VideoStreamMetadata:
472+
return VideoDecoder(video_file_path).metadata
453473

454474
def run_benchmarks(
455-
decoder_dict,
456-
video_files_paths,
457-
num_uniform_samples,
458-
min_runtime_seconds,
459-
benchmark_video_creation,
475+
decoder_dict: dict[str, AbstractDecoder],
476+
video_files_paths: list[str],
477+
num_samples: int,
478+
num_sequential_frames_from_start: list[int],
479+
min_runtime_seconds: float,
480+
benchmark_video_creation: bool,
460481
) -> list[dict[str, str | float | int]]:
482+
# Ensure that we have the same seed across benchmark runs.
483+
torch.manual_seed(0)
484+
485+
print(f"video_files_paths={video_files_paths}")
486+
461487
results = []
462488
df_data = []
463-
print(f"video_files_paths={video_files_paths}")
464489
verbose = False
465-
for decoder_name, decoder in decoder_dict.items():
466-
for video_file_path in video_files_paths:
490+
for video_file_path in video_files_paths:
491+
metadata = get_metadata(video_file_path)
492+
metadata_label = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps"
493+
494+
duration = metadata.duration_seconds
495+
uniform_pts_list = [
496+
i * duration / num_samples for i in range(num_samples)
497+
]
498+
499+
# Note that we are using the same random pts values for all decoders for the same
500+
# video. However, because we use the duration as part of this calculation, we
501+
# are using different random pts values across videos.
502+
random_pts_list = (torch.rand(num_samples) * duration).tolist()
503+
504+
for decoder_name, decoder in decoder_dict.items():
467505
print(f"video={video_file_path}, decoder={decoder_name}")
468-
# We only use the VideoDecoder to get the metadata and get
469-
# the list of PTS values to seek to.
470-
simple_decoder = VideoDecoder(video_file_path)
471-
duration = simple_decoder.metadata.duration_seconds
472-
pts_list = [
473-
i * duration / num_uniform_samples for i in range(num_uniform_samples)
474-
]
475-
metadata = simple_decoder.metadata
476-
metadata_string = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps"
477-
if verbose:
478-
print(
479-
f"video={video_file_path}, decoder={decoder_name}, pts_list={pts_list}"
506+
507+
for kind, pts_list in [("uniform", uniform_pts_list), ("random", random_pts_list)]:
508+
if verbose:
509+
print(
510+
f"video={video_file_path}, decoder={decoder_name}, pts_list={pts_list}"
511+
)
512+
seeked_result = benchmark.Timer(
513+
stmt="decoder.get_frames_from_video(video_file, pts_list)",
514+
globals={
515+
"video_file": video_file_path,
516+
"pts_list": pts_list,
517+
"decoder": decoder,
518+
},
519+
label=f"video={video_file_path} {metadata_label}",
520+
sub_label=decoder_name,
521+
description=f"{kind} {num_samples} seek()+next()",
480522
)
481-
seeked_result = benchmark.Timer(
482-
stmt="decoder.get_frames_from_video(video_file, pts_list)",
483-
globals={
484-
"video_file": video_file_path,
485-
"pts_list": pts_list,
486-
"decoder": decoder,
487-
},
488-
label=f"video={video_file_path} {metadata_string}",
489-
sub_label=decoder_name,
490-
description=f"{num_uniform_samples} seek()+next()",
491-
)
492-
results.append(
493-
seeked_result.blocked_autorange(min_run_time=min_runtime_seconds)
494-
)
495-
df_item = {}
496-
df_item["decoder"] = decoder_name
497-
df_item["video"] = video_file_path
498-
df_item["description"] = results[-1].description
499-
df_item["frame_count"] = num_uniform_samples
500-
df_item["median"] = results[-1].median
501-
df_item["iqr"] = results[-1].iqr
502-
df_item["type"] = "seek()+next()"
503-
df_item["fps"] = 1.0 * num_uniform_samples / results[-1].median
504-
df_item["fps_p75"] = 1.0 * num_uniform_samples / results[-1]._p75
505-
df_item["fps_p25"] = 1.0 * num_uniform_samples / results[-1]._p25
506-
df_data.append(df_item)
507-
508-
for num_consecutive_nexts in [1, 10]:
523+
results.append(
524+
seeked_result.blocked_autorange(min_run_time=min_runtime_seconds)
525+
)
526+
df_item = {}
527+
df_item["decoder"] = decoder_name
528+
df_item["video"] = video_file_path
529+
df_item["description"] = results[-1].description
530+
df_item["frame_count"] = num_samples
531+
df_item["median"] = results[-1].median
532+
df_item["iqr"] = results[-1].iqr
533+
df_item["type"] = f"{kind}:seek()+next()"
534+
df_item["fps"] = 1.0 * num_samples / results[-1].median
535+
df_item["fps_p75"] = 1.0 * num_samples / results[-1]._p75
536+
df_item["fps_p25"] = 1.0 * num_samples / results[-1]._p25
537+
df_data.append(df_item)
538+
539+
for num_consecutive_nexts in num_sequential_frames_from_start:
509540
consecutive_frames_result = benchmark.Timer(
510541
stmt="decoder.get_consecutive_frames_from_video(video_file, consecutive_frames_to_extract)",
511542
globals={
512543
"video_file": video_file_path,
513544
"consecutive_frames_to_extract": num_consecutive_nexts,
514545
"decoder": decoder,
515546
},
516-
label=f"video={video_file_path} {metadata_string}",
547+
label=f"video={video_file_path} {metadata_label}",
517548
sub_label=decoder_name,
518549
description=f"{num_consecutive_nexts} next()",
519550
)
@@ -537,17 +568,16 @@ def run_benchmarks(
537568

538569
first_video_file_path = video_files_paths[0]
539570
if benchmark_video_creation:
540-
simple_decoder = VideoDecoder(first_video_file_path)
541-
metadata = simple_decoder.metadata
542-
metadata_string = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps"
571+
metadata = get_metadata(video_file_path)
572+
metadata_label = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps"
543573
creation_result = benchmark.Timer(
544574
stmt="create_torchcodec_decoder_from_file(video_file)",
545575
globals={
546576
"video_file": first_video_file_path,
547577
"create_torchcodec_decoder_from_file": create_torchcodec_decoder_from_file,
548578
},
549-
label=f"video={first_video_file_path} {metadata_string}",
550-
sub_label="TorchcodecNonCompiled",
579+
label=f"video={first_video_file_path} {metadata_label}",
580+
sub_label="TorchCodecCore:",
551581
description="create()+next()",
552582
)
553583
results.append(
1.97 KB
Loading

0 commit comments

Comments
 (0)