12
12
13
13
import torch
14
14
import torch .utils .benchmark as benchmark
15
- from torchcodec .decoders import VideoDecoder
15
+ from torchcodec .decoders import VideoDecoder , VideoStreamMetadata
16
16
17
17
from torchcodec .decoders ._core import (
18
18
_add_video_stream ,
@@ -78,7 +78,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
78
78
return frames
79
79
80
80
81
- class TVNewAPIDecoderWithBackend (AbstractDecoder ):
81
+ class TorchVision (AbstractDecoder ):
82
82
def __init__ (self , backend ):
83
83
self ._backend = backend
84
84
self ._print_each_iteration_time = False
@@ -125,7 +125,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
125
125
return frames
126
126
127
127
128
- class TorchcodecNonCompiledWithOptions (AbstractDecoder ):
128
+ class TorchCodecCore (AbstractDecoder ):
129
129
def __init__ (self , num_threads = None , color_conversion_library = None , device = "cpu" ):
130
130
self ._print_each_iteration_time = False
131
131
self ._num_threads = int (num_threads ) if num_threads else None
@@ -186,7 +186,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
186
186
return frames
187
187
188
188
189
- class TorchCodecNonCompiledBatch (AbstractDecoder ):
189
+ class TorchCodecCoreBatch (AbstractDecoder ):
190
190
def __init__ (self , num_threads = None , color_conversion_library = None ):
191
191
self ._print_each_iteration_time = False
192
192
self ._num_threads = int (num_threads ) if num_threads else None
@@ -227,6 +227,24 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
227
227
)
228
228
return frames
229
229
230
+ class TorchCodecPublic (AbstractDecoder ):
231
+ def __init__ (self , num_ffmpeg_threads = None ):
232
+ self ._num_ffmpeg_threads = int (num_ffmpeg_threads ) if num_ffmpeg_threads else None
233
+
234
+ def get_frames_from_video (self , video_file , pts_list ):
235
+ decoder = VideoDecoder (video_file , num_ffmpeg_threads = self ._num_ffmpeg_threads )
236
+ return decoder .get_frames_played_at (pts_list )
237
+
238
+ def get_consecutive_frames_from_video (self , video_file , numFramesToDecode ):
239
+ decoder = VideoDecoder (video_file , num_ffmpeg_threads = self ._num_ffmpeg_threads )
240
+ frames = []
241
+ count = 0
242
+ for frame in decoder :
243
+ frames .append (frame )
244
+ count += 1
245
+ if count == numFramesToDecode :
246
+ break
247
+ return frames
230
248
231
249
@torch .compile (fullgraph = True , backend = "eager" )
232
250
def compiled_seek_and_next (decoder , pts ):
@@ -239,7 +257,7 @@ def compiled_next(decoder):
239
257
return get_next_frame (decoder )
240
258
241
259
242
- class TorchcodecCompiled (AbstractDecoder ):
260
+ class TorchCodecCoreCompiled (AbstractDecoder ):
243
261
def __init__ (self ):
244
262
pass
245
263
@@ -450,70 +468,83 @@ def plot_data(df_data, plot_path):
450
468
plot_path ,
451
469
)
452
470
471
+ def get_metadata (video_file_path : str ) -> VideoStreamMetadata :
472
+ return VideoDecoder (video_file_path ).metadata
453
473
454
474
def run_benchmarks (
455
- decoder_dict ,
456
- video_files_paths ,
457
- num_uniform_samples ,
458
- min_runtime_seconds ,
459
- benchmark_video_creation ,
475
+ decoder_dict : dict [str , AbstractDecoder ],
476
+ video_files_paths : list [str ],
477
+ num_samples : int ,
478
+ num_sequential_frames_from_start : list [int ],
479
+ min_runtime_seconds : float ,
480
+ benchmark_video_creation : bool ,
460
481
) -> list [dict [str , str | float | int ]]:
482
+ # Ensure that we have the same seed across benchmark runs.
483
+ torch .manual_seed (0 )
484
+
485
+ print (f"video_files_paths={ video_files_paths } " )
486
+
461
487
results = []
462
488
df_data = []
463
- print (f"video_files_paths={ video_files_paths } " )
464
489
verbose = False
465
- for decoder_name , decoder in decoder_dict .items ():
466
- for video_file_path in video_files_paths :
490
+ for video_file_path in video_files_paths :
491
+ metadata = get_metadata (video_file_path )
492
+ metadata_label = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
493
+
494
+ duration = metadata .duration_seconds
495
+ uniform_pts_list = [
496
+ i * duration / num_samples for i in range (num_samples )
497
+ ]
498
+
499
+ # Note that we are using the same random pts values for all decoders for the same
500
+ # video. However, because we use the duration as part of this calculation, we
501
+ # are using different random pts values across videos.
502
+ random_pts_list = (torch .rand (num_samples ) * duration ).tolist ()
503
+
504
+ for decoder_name , decoder in decoder_dict .items ():
467
505
print (f"video={ video_file_path } , decoder={ decoder_name } " )
468
- # We only use the VideoDecoder to get the metadata and get
469
- # the list of PTS values to seek to.
470
- simple_decoder = VideoDecoder (video_file_path )
471
- duration = simple_decoder .metadata .duration_seconds
472
- pts_list = [
473
- i * duration / num_uniform_samples for i in range (num_uniform_samples )
474
- ]
475
- metadata = simple_decoder .metadata
476
- metadata_string = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
477
- if verbose :
478
- print (
479
- f"video={ video_file_path } , decoder={ decoder_name } , pts_list={ pts_list } "
506
+
507
+ for kind , pts_list in [("uniform" , uniform_pts_list ), ("random" , random_pts_list )]:
508
+ if verbose :
509
+ print (
510
+ f"video={ video_file_path } , decoder={ decoder_name } , pts_list={ pts_list } "
511
+ )
512
+ seeked_result = benchmark .Timer (
513
+ stmt = "decoder.get_frames_from_video(video_file, pts_list)" ,
514
+ globals = {
515
+ "video_file" : video_file_path ,
516
+ "pts_list" : pts_list ,
517
+ "decoder" : decoder ,
518
+ },
519
+ label = f"video={ video_file_path } { metadata_label } " ,
520
+ sub_label = decoder_name ,
521
+ description = f"{ kind } { num_samples } seek()+next()" ,
480
522
)
481
- seeked_result = benchmark .Timer (
482
- stmt = "decoder.get_frames_from_video(video_file, pts_list)" ,
483
- globals = {
484
- "video_file" : video_file_path ,
485
- "pts_list" : pts_list ,
486
- "decoder" : decoder ,
487
- },
488
- label = f"video={ video_file_path } { metadata_string } " ,
489
- sub_label = decoder_name ,
490
- description = f"{ num_uniform_samples } seek()+next()" ,
491
- )
492
- results .append (
493
- seeked_result .blocked_autorange (min_run_time = min_runtime_seconds )
494
- )
495
- df_item = {}
496
- df_item ["decoder" ] = decoder_name
497
- df_item ["video" ] = video_file_path
498
- df_item ["description" ] = results [- 1 ].description
499
- df_item ["frame_count" ] = num_uniform_samples
500
- df_item ["median" ] = results [- 1 ].median
501
- df_item ["iqr" ] = results [- 1 ].iqr
502
- df_item ["type" ] = "seek()+next()"
503
- df_item ["fps" ] = 1.0 * num_uniform_samples / results [- 1 ].median
504
- df_item ["fps_p75" ] = 1.0 * num_uniform_samples / results [- 1 ]._p75
505
- df_item ["fps_p25" ] = 1.0 * num_uniform_samples / results [- 1 ]._p25
506
- df_data .append (df_item )
507
-
508
- for num_consecutive_nexts in [1 , 10 ]:
523
+ results .append (
524
+ seeked_result .blocked_autorange (min_run_time = min_runtime_seconds )
525
+ )
526
+ df_item = {}
527
+ df_item ["decoder" ] = decoder_name
528
+ df_item ["video" ] = video_file_path
529
+ df_item ["description" ] = results [- 1 ].description
530
+ df_item ["frame_count" ] = num_samples
531
+ df_item ["median" ] = results [- 1 ].median
532
+ df_item ["iqr" ] = results [- 1 ].iqr
533
+ df_item ["type" ] = f"{ kind } :seek()+next()"
534
+ df_item ["fps" ] = 1.0 * num_samples / results [- 1 ].median
535
+ df_item ["fps_p75" ] = 1.0 * num_samples / results [- 1 ]._p75
536
+ df_item ["fps_p25" ] = 1.0 * num_samples / results [- 1 ]._p25
537
+ df_data .append (df_item )
538
+
539
+ for num_consecutive_nexts in num_sequential_frames_from_start :
509
540
consecutive_frames_result = benchmark .Timer (
510
541
stmt = "decoder.get_consecutive_frames_from_video(video_file, consecutive_frames_to_extract)" ,
511
542
globals = {
512
543
"video_file" : video_file_path ,
513
544
"consecutive_frames_to_extract" : num_consecutive_nexts ,
514
545
"decoder" : decoder ,
515
546
},
516
- label = f"video={ video_file_path } { metadata_string } " ,
547
+ label = f"video={ video_file_path } { metadata_label } " ,
517
548
sub_label = decoder_name ,
518
549
description = f"{ num_consecutive_nexts } next()" ,
519
550
)
@@ -537,17 +568,16 @@ def run_benchmarks(
537
568
538
569
first_video_file_path = video_files_paths [0 ]
539
570
if benchmark_video_creation :
540
- simple_decoder = VideoDecoder (first_video_file_path )
541
- metadata = simple_decoder .metadata
542
- metadata_string = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
571
+ metadata = get_metadata (video_file_path )
572
+ metadata_label = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
543
573
creation_result = benchmark .Timer (
544
574
stmt = "create_torchcodec_decoder_from_file(video_file)" ,
545
575
globals = {
546
576
"video_file" : first_video_file_path ,
547
577
"create_torchcodec_decoder_from_file" : create_torchcodec_decoder_from_file ,
548
578
},
549
- label = f"video={ first_video_file_path } { metadata_string } " ,
550
- sub_label = "TorchcodecNonCompiled " ,
579
+ label = f"video={ first_video_file_path } { metadata_label } " ,
580
+ sub_label = "TorchCodecCore: " ,
551
581
description = "create()+next()" ,
552
582
)
553
583
results .append (
0 commit comments