Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 45b8fe1

Browse files
committedApr 30, 2025·
Encode into file-like
1 parent 8a02d26 commit 45b8fe1

File tree

8 files changed

+79
-5
lines changed

8 files changed

+79
-5
lines changed
 

‎src/torchcodec/_core/AVIOContextHolder.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ void AVIOContextHolder::createAVIOContext(
2323
buffer != nullptr,
2424
"Failed to allocate buffer of size " + std::to_string(bufferSize));
2525

26-
TORCH_CHECK(
27-
(seek != nullptr) && ((write != nullptr) ^ (read != nullptr)),
28-
"seek method must be defined, and either write or read must be defined. "
29-
"But not both!")
26+
// TORCH_CHECK(
27+
// (seek != nullptr) && ((write != nullptr) ^ (read != nullptr)),
28+
// "seek method must be defined, and either write or read must be
29+
// defined. " "But not both!")
3030
avioContext_.reset(avioAllocContext(
3131
buffer,
3232
bufferSize,

‎src/torchcodec/_core/AVIOFileLikeContext.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ AVIOFileLikeContext::AVIOFileLikeContext(py::object fileLike)
2323
py::hasattr(fileLike, "seek"),
2424
"File like object must implement a seek method.");
2525
}
26-
createAVIOContext(&read, nullptr, &seek, &fileLike_);
26+
createAVIOContext(&read, &write, &seek, &fileLike_);
2727
}
2828

2929
int AVIOFileLikeContext::read(void* opaque, uint8_t* buf, int buf_size) {
@@ -77,4 +77,12 @@ int64_t AVIOFileLikeContext::seek(void* opaque, int64_t offset, int whence) {
7777
return py::cast<int64_t>((*fileLike)->attr("seek")(offset, whence));
7878
}
7979

80+
int AVIOFileLikeContext::write(void* opaque, const uint8_t* buf, int buf_size) {
81+
auto fileLike = static_cast<UniquePyObject*>(opaque);
82+
py::gil_scoped_acquire gil;
83+
py::bytes bytes_obj(reinterpret_cast<const char*>(buf), buf_size);
84+
85+
return py::cast<int64_t>((*fileLike)->attr("write")(bytes_obj));
86+
}
87+
8088
} // namespace facebook::torchcodec

‎src/torchcodec/_core/AVIOFileLikeContext.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class AVIOFileLikeContext : public AVIOContextHolder {
2424
private:
2525
static int read(void* opaque, uint8_t* buf, int buf_size);
2626
static int64_t seek(void* opaque, int64_t offset, int whence);
27+
static int write(void* opaque, const uint8_t* buf, int buf_size);
2728

2829
// Note that we dynamically allocate the Python object because we need to
2930
// strictly control when its destructor is called. We must hold the GIL

‎src/torchcodec/_core/Encoder.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <sstream>
22

33
#include "src/torchcodec/_core/AVIOBytesContext.h"
4+
#include "src/torchcodec/_core/AVIOContextHolder.h"
45
#include "src/torchcodec/_core/Encoder.h"
56
#include "torch/types.h"
67

@@ -148,6 +149,31 @@ AudioEncoder::AudioEncoder(
148149
initializeEncoder(sampleRate, bitRate);
149150
}
150151

152+
// TODO this sucks, shouldn't need 2 separate constructors for AVIOContextHolder
153+
AudioEncoder::AudioEncoder(
154+
const torch::Tensor wf,
155+
int sampleRate,
156+
std::string_view formatName,
157+
std::unique_ptr<AVIOContextHolder> avioContextHolder,
158+
std::optional<int64_t> bitRate)
159+
: wf_(validateWf(wf)), avioContextHolderrrr_(std::move(avioContextHolder)) {
160+
setFFmpegLogLevel();
161+
AVFormatContext* avFormatContext = nullptr;
162+
int status = avformat_alloc_output_context2(
163+
&avFormatContext, nullptr, formatName.data(), nullptr);
164+
165+
TORCH_CHECK(
166+
avFormatContext != nullptr,
167+
"Couldn't allocate AVFormatContext. ",
168+
"Check the desired extension? ",
169+
getFFMPEGErrorStringFromErrorCode(status));
170+
avFormatContext_.reset(avFormatContext);
171+
172+
avFormatContext_->pb = avioContextHolderrrr_->getAVIOContext();
173+
174+
initializeEncoder(sampleRate, bitRate);
175+
}
176+
151177
void AudioEncoder::initializeEncoder(
152178
int sampleRate,
153179
std::optional<int64_t> bitRate) {

‎src/torchcodec/_core/Encoder.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22
#include <torch/types.h>
33
#include "src/torchcodec/_core/AVIOBytesContext.h"
4+
#include "src/torchcodec/_core/AVIOContextHolder.h"
45
#include "src/torchcodec/_core/FFMPEGCommon.h"
56

67
namespace facebook::torchcodec {
@@ -28,6 +29,12 @@ class AudioEncoder {
2829
std::string_view formatName,
2930
std::unique_ptr<AVIOToTensorContext> avioContextHolder,
3031
std::optional<int64_t> bitRate = std::nullopt);
32+
AudioEncoder(
33+
const torch::Tensor wf,
34+
int sampleRate,
35+
std::string_view formatName,
36+
std::unique_ptr<AVIOContextHolder> avioContextHolder,
37+
std::optional<int64_t> bitRate = std::nullopt);
3138
void encode();
3239
torch::Tensor encodeToTensor();
3340

@@ -49,6 +56,7 @@ class AudioEncoder {
4956

5057
// Stores the AVIOContext for the output tensor buffer.
5158
std::unique_ptr<AVIOToTensorContext> avioContextHolder_;
59+
std::unique_ptr<AVIOContextHolder> avioContextHolderrrr_; // EWWWWW
5260

5361
bool encodeWasCalled_ = false;
5462
};

‎src/torchcodec/_core/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
create_from_file_like,
2424
create_from_tensor,
2525
encode_audio_to_file,
26+
encode_audio_to_file_like,
2627
encode_audio_to_tensor,
2728
get_ffmpeg_library_versions,
2829
get_frame_at_index,

‎src/torchcodec/_core/ops.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,17 @@ def create_from_file_like(
153153
return _convert_to_tensor(_pybind_ops.create_from_file_like(file_like, seek_mode))
154154

155155

156+
def encode_audio_to_file_like(
157+
file_like: Union[io.RawIOBase, io.BufferedReader],
158+
wf: torch.Tensor,
159+
sample_rate: int,
160+
format: str,
161+
bit_rate: Optional[int] = None,
162+
):
163+
assert _pybind_ops is not None
164+
_pybind_ops.encode_audio_to_file_like(file_like, wf, sample_rate, format, bit_rate)
165+
166+
156167
# ==============================
157168
# Abstract impl for the operators. Needed by torch.compile.
158169
# ==============================

‎src/torchcodec/_core/pybind_ops.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <string>
1111

1212
#include "src/torchcodec/_core/AVIOFileLikeContext.h"
13+
#include "src/torchcodec/_core/Encoder.h"
1314
#include "src/torchcodec/_core/SingleStreamDecoder.h"
1415

1516
namespace py = pybind11;
@@ -38,8 +39,26 @@ int64_t create_from_file_like(
3839
return reinterpret_cast<int64_t>(decoder);
3940
}
4041

42+
void encode_audio_to_file_like(
43+
py::object file_like,
44+
// const at::Tensor wf,
45+
[[maybe_unused]] int wf,
46+
int64_t sample_rate,
47+
std::string_view format,
48+
std::optional<int64_t> bit_rate = std::nullopt) {
49+
auto avioContextHolder = std::make_unique<AVIOFileLikeContext>(file_like);
50+
AudioEncoder(
51+
torch::empty({2, 1000}, torch::kFloat32),
52+
sample_rate, // TODO need validateSampleRate
53+
format,
54+
std::move(avioContextHolder),
55+
bit_rate)
56+
.encode();
57+
}
58+
4159
PYBIND11_MODULE(decoder_core_pybind_ops, m) {
4260
m.def("create_from_file_like", &create_from_file_like);
61+
m.def("encode_audio_to_file_like", &encode_audio_to_file_like);
4362
}
4463

4564
} // namespace facebook::torchcodec

0 commit comments

Comments
 (0)
Please sign in to comment.