-
Notifications
You must be signed in to change notification settings - Fork 36
Refactor and add benchmarks #330
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,7 @@ | |
|
||
import torch | ||
import torch.utils.benchmark as benchmark | ||
from torchcodec.decoders import VideoDecoder | ||
from torchcodec.decoders import VideoDecoder, VideoStreamMetadata | ||
|
||
from torchcodec.decoders._core import ( | ||
_add_video_stream, | ||
|
@@ -78,7 +78,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): | |
return frames | ||
|
||
|
||
class TVNewAPIDecoderWithBackend(AbstractDecoder): | ||
class TorchVision(AbstractDecoder): | ||
def __init__(self, backend): | ||
self._backend = backend | ||
self._print_each_iteration_time = False | ||
|
@@ -125,7 +125,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): | |
return frames | ||
|
||
|
||
class TorchcodecNonCompiledWithOptions(AbstractDecoder): | ||
class TorchCodecCore(AbstractDecoder): | ||
def __init__(self, num_threads=None, color_conversion_library=None, device="cpu"): | ||
self._print_each_iteration_time = False | ||
self._num_threads = int(num_threads) if num_threads else None | ||
|
@@ -186,7 +186,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): | |
return frames | ||
|
||
|
||
class TorchCodecNonCompiledBatch(AbstractDecoder): | ||
class TorchCodecCoreBatch(AbstractDecoder): | ||
def __init__(self, num_threads=None, color_conversion_library=None): | ||
self._print_each_iteration_time = False | ||
self._num_threads = int(num_threads) if num_threads else None | ||
|
@@ -227,6 +227,24 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): | |
) | ||
return frames | ||
|
||
class TorchCodecPublic(AbstractDecoder): | ||
def __init__(self, num_ffmpeg_threads=None): | ||
self._num_ffmpeg_threads = int(num_ffmpeg_threads) if num_ffmpeg_threads else None | ||
|
||
def get_frames_from_video(self, video_file, pts_list): | ||
decoder = VideoDecoder(video_file, num_ffmpeg_threads=self._num_ffmpeg_threads) | ||
return decoder.get_frames_played_at(pts_list) | ||
|
||
def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): | ||
decoder = VideoDecoder(video_file, num_ffmpeg_threads=self._num_ffmpeg_threads) | ||
frames = [] | ||
count = 0 | ||
for frame in decoder: | ||
frames.append(frame) | ||
count += 1 | ||
if count == numFramesToDecode: | ||
break | ||
return frames | ||
|
||
@torch.compile(fullgraph=True, backend="eager") | ||
def compiled_seek_and_next(decoder, pts): | ||
|
@@ -239,7 +257,7 @@ def compiled_next(decoder): | |
return get_next_frame(decoder) | ||
|
||
|
||
class TorchcodecCompiled(AbstractDecoder): | ||
class TorchCodecCoreCompiled(AbstractDecoder): | ||
def __init__(self): | ||
pass | ||
|
||
|
@@ -444,11 +462,13 @@ def plot_data(df_data, plot_path): | |
plot_path, | ||
) | ||
|
||
def get_metadata(video_file_path: str) -> VideoStreamMetadata: | ||
return VideoDecoder(video_file_path).metadata | ||
|
||
def run_benchmarks( | ||
decoder_dict, | ||
video_files_paths, | ||
num_uniform_samples, | ||
num_samples, | ||
min_runtime_seconds, | ||
benchmark_video_creation, | ||
) -> list[dict[str, str | float | int]]: | ||
|
@@ -459,55 +479,57 @@ def run_benchmarks( | |
for decoder_name, decoder in decoder_dict.items(): | ||
for video_file_path in video_files_paths: | ||
print(f"video={video_file_path}, decoder={decoder_name}") | ||
# We only use the VideoDecoder to get the metadata and get | ||
# the list of PTS values to seek to. | ||
simple_decoder = VideoDecoder(video_file_path) | ||
duration = simple_decoder.metadata.duration_seconds | ||
pts_list = [ | ||
i * duration / num_uniform_samples for i in range(num_uniform_samples) | ||
metadata = get_metadata(video_file_path) | ||
metadata_label = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps" | ||
|
||
duration = metadata.duration_seconds | ||
uniform_pts_list = [ | ||
i * duration / num_samples for i in range(num_samples) | ||
] | ||
metadata = simple_decoder.metadata | ||
metadata_string = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps" | ||
if verbose: | ||
print( | ||
f"video={video_file_path}, decoder={decoder_name}, pts_list={pts_list}" | ||
|
||
random_pts_list = (torch.rand(num_samples) * duration).tolist() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the random seed here? Is this the source of the benchmark run time variation? Is it the same for all decoders at least? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, good point. We're doing a different random list for all decoders. We should fix that. I can also set the seed to be deterministic. Maybe in the future that should be a benchmark parameter. |
||
|
||
for kind, pts_list in [("uniform", uniform_pts_list), ("random", random_pts_list)]: | ||
if verbose: | ||
print( | ||
f"video={video_file_path}, decoder={decoder_name}, pts_list={pts_list}" | ||
) | ||
seeked_result = benchmark.Timer( | ||
stmt="decoder.get_frames_from_video(video_file, pts_list)", | ||
globals={ | ||
"video_file": video_file_path, | ||
"pts_list": pts_list, | ||
"decoder": decoder, | ||
}, | ||
label=f"video={video_file_path} {metadata_label}", | ||
sub_label=decoder_name, | ||
description=f"{kind} {num_samples} seek()+next()", | ||
) | ||
seeked_result = benchmark.Timer( | ||
stmt="decoder.get_frames_from_video(video_file, pts_list)", | ||
globals={ | ||
"video_file": video_file_path, | ||
"pts_list": pts_list, | ||
"decoder": decoder, | ||
}, | ||
label=f"video={video_file_path} {metadata_string}", | ||
sub_label=decoder_name, | ||
description=f"{num_uniform_samples} seek()+next()", | ||
) | ||
results.append( | ||
seeked_result.blocked_autorange(min_run_time=min_runtime_seconds) | ||
) | ||
df_item = {} | ||
df_item["decoder"] = decoder_name | ||
df_item["video"] = video_file_path | ||
df_item["description"] = results[-1].description | ||
df_item["frame_count"] = num_uniform_samples | ||
df_item["median"] = results[-1].median | ||
df_item["iqr"] = results[-1].iqr | ||
df_item["type"] = "seek()+next()" | ||
df_item["fps"] = 1.0 * num_uniform_samples / results[-1].median | ||
df_item["fps_p75"] = 1.0 * num_uniform_samples / results[-1]._p75 | ||
df_item["fps_p25"] = 1.0 * num_uniform_samples / results[-1]._p25 | ||
df_data.append(df_item) | ||
|
||
for num_consecutive_nexts in [1, 10]: | ||
results.append( | ||
seeked_result.blocked_autorange(min_run_time=min_runtime_seconds) | ||
) | ||
df_item = {} | ||
df_item["decoder"] = decoder_name | ||
df_item["video"] = video_file_path | ||
df_item["description"] = results[-1].description | ||
df_item["frame_count"] = num_samples | ||
df_item["median"] = results[-1].median | ||
df_item["iqr"] = results[-1].iqr | ||
df_item["type"] = f"{kind}:seek()+next()" | ||
df_item["fps"] = 1.0 * num_samples / results[-1].median | ||
df_item["fps_p75"] = 1.0 * num_samples / results[-1]._p75 | ||
df_item["fps_p25"] = 1.0 * num_samples / results[-1]._p25 | ||
df_data.append(df_item) | ||
|
||
for num_consecutive_nexts in [100]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we do need 1 here, or at least the ability to pass in 1 here. (Maybe don't need 1 for the readme, but this lib should be able to accept 1 here somehow) Time to first frame is a useful metric for devs to track There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. I think we'll want to make that configurable, at least as input to |
||
consecutive_frames_result = benchmark.Timer( | ||
stmt="decoder.get_consecutive_frames_from_video(video_file, consecutive_frames_to_extract)", | ||
globals={ | ||
"video_file": video_file_path, | ||
"consecutive_frames_to_extract": num_consecutive_nexts, | ||
"decoder": decoder, | ||
}, | ||
label=f"video={video_file_path} {metadata_string}", | ||
label=f"video={video_file_path} {metadata_label}", | ||
sub_label=decoder_name, | ||
description=f"{num_consecutive_nexts} next()", | ||
) | ||
|
@@ -531,17 +553,16 @@ def run_benchmarks( | |
|
||
first_video_file_path = video_files_paths[0] | ||
if benchmark_video_creation: | ||
simple_decoder = VideoDecoder(first_video_file_path) | ||
metadata = simple_decoder.metadata | ||
metadata_string = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps" | ||
metadata = get_metadata(video_file_path) | ||
metadata_label = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps" | ||
creation_result = benchmark.Timer( | ||
stmt="create_torchcodec_decoder_from_file(video_file)", | ||
globals={ | ||
"video_file": first_video_file_path, | ||
"create_torchcodec_decoder_from_file": create_torchcodec_decoder_from_file, | ||
}, | ||
label=f"video={first_video_file_path} {metadata_string}", | ||
sub_label="TorchcodecNonCompiled", | ||
label=f"video={first_video_file_path} {metadata_label}", | ||
sub_label="TorchCodecCore:", | ||
description="create()+next()", | ||
) | ||
results.append( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@NicolasHug can tell you about torchvision's APIs and what the name should be for this. This is a specific API -- that I have heard him say it's the new API
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The naming in torchvision is awful. Basially the
VideoReader
class and the"video_reader"
backend are 2 orthogonal concepts with the same name.There are 2 main Python decoding APIs.
read_video()
inio.video.py
.VideoReader()
. It is considered more "fine-grained" and the "new" API (although not that new at this point, it's just more recent).Both APIs support pyav and "video_reader" backend where "video_reader" backend just means "torchvision cpu" backend. The
VideoReader()
also supposedly supports the "cuda" backend.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the benchmark, we're calling the
VideoReader
class, providing it with the"video_reader"
backend. I think we should name the TorchVision decoder on the config option that makes the most difference for performance. I think that the backend would be more important here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, I agree.