Skip to content

Add ability to build cuda wheels #272

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 85 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
85 commits
Select commit Hold shift + click to select a range
0eedfe6
Add cuda wheel
ahmadsharif1 Oct 16, 2024
9996cc1
.
ahmadsharif1 Oct 16, 2024
40dcc4c
.
ahmadsharif1 Oct 16, 2024
1392d97
.
ahmadsharif1 Oct 16, 2024
b6e991d
.
ahmadsharif1 Oct 16, 2024
4939eab
.
ahmadsharif1 Oct 16, 2024
da42e6b
.
ahmadsharif1 Oct 16, 2024
6be7b76
.
ahmadsharif1 Oct 16, 2024
4bdc851
Merge branch 'main' of https://github.com/pytorch/torchcodec into cuda8
ahmadsharif1 Oct 16, 2024
a2d1156
.
ahmadsharif1 Oct 16, 2024
f4e0879
.
ahmadsharif1 Oct 16, 2024
3358fa8
.
ahmadsharif1 Oct 17, 2024
bb655ed
.
ahmadsharif1 Oct 17, 2024
cb95067
.
ahmadsharif1 Oct 17, 2024
5ca956c
.
ahmadsharif1 Oct 17, 2024
2592372
.
ahmadsharif1 Oct 17, 2024
9dfea81
.
ahmadsharif1 Oct 17, 2024
c6605c2
.
ahmadsharif1 Oct 17, 2024
4bcab19
.
ahmadsharif1 Oct 17, 2024
c94d5a1
.
ahmadsharif1 Oct 17, 2024
a400d0f
.
ahmadsharif1 Oct 17, 2024
f3945b9
.
ahmadsharif1 Oct 17, 2024
26d70a4
.
ahmadsharif1 Oct 17, 2024
c98878b
.
ahmadsharif1 Oct 17, 2024
318c950
.
ahmadsharif1 Oct 17, 2024
4ef67fd
.
ahmadsharif1 Oct 17, 2024
c29219b
.
ahmadsharif1 Oct 17, 2024
68dc6e6
.
ahmadsharif1 Oct 18, 2024
9d1a505
.
ahmadsharif1 Oct 18, 2024
df7c4e5
.
ahmadsharif1 Oct 18, 2024
0ed2cd9
.
ahmadsharif1 Oct 18, 2024
83b34b4
.
ahmadsharif1 Oct 18, 2024
1c0b022
.
ahmadsharif1 Oct 18, 2024
9e8e9cd
.
ahmadsharif1 Oct 18, 2024
e78986a
.
ahmadsharif1 Oct 18, 2024
08cf4fa
.
ahmadsharif1 Oct 18, 2024
411e077
.
ahmadsharif1 Oct 18, 2024
da3458f
.
ahmadsharif1 Oct 18, 2024
f2728c8
.
ahmadsharif1 Oct 18, 2024
0b22a39
.
ahmadsharif1 Oct 18, 2024
605152b
.
ahmadsharif1 Oct 18, 2024
0194995
.
ahmadsharif1 Oct 18, 2024
297e0b2
.
ahmadsharif1 Oct 18, 2024
45ce5a3
.
ahmadsharif1 Oct 21, 2024
272f1d9
.
ahmadsharif1 Oct 21, 2024
de07c6c
.
ahmadsharif1 Oct 21, 2024
899c915
.
ahmadsharif1 Oct 21, 2024
68793fc
.
ahmadsharif1 Oct 21, 2024
f1cf7c3
.
ahmadsharif1 Oct 21, 2024
ecb5cb9
.
ahmadsharif1 Oct 21, 2024
8886a0f
.
ahmadsharif1 Oct 21, 2024
d2d8467
.
ahmadsharif1 Oct 21, 2024
75e9edc
.
ahmadsharif1 Oct 21, 2024
a4b85d4
.
ahmadsharif1 Oct 21, 2024
483d147
.
ahmadsharif1 Oct 21, 2024
1f5ed25
.
ahmadsharif1 Oct 21, 2024
738cdf3
.
ahmadsharif1 Oct 21, 2024
399c307
.
ahmadsharif1 Oct 21, 2024
425336b
.
ahmadsharif1 Oct 21, 2024
c1a7f68
Merge remote-tracking branch 'origin/main' into cuda8
ahmadsharif1 Oct 21, 2024
b4f5262
.
ahmadsharif1 Oct 21, 2024
4fad5c2
.
ahmadsharif1 Oct 21, 2024
c92d64c
.
ahmadsharif1 Oct 21, 2024
7730c4b
.
ahmadsharif1 Oct 21, 2024
796e637
.
ahmadsharif1 Oct 21, 2024
52f49dd
.
ahmadsharif1 Oct 21, 2024
768641a
.
ahmadsharif1 Oct 21, 2024
d9ae6d0
.
ahmadsharif1 Oct 22, 2024
abe3ed0
.
ahmadsharif1 Oct 22, 2024
fb619ba
.
ahmadsharif1 Oct 22, 2024
6097f32
.
ahmadsharif1 Oct 22, 2024
6add20f
.
ahmadsharif1 Oct 22, 2024
f6e6187
.
ahmadsharif1 Oct 22, 2024
3456b1a
.
ahmadsharif1 Oct 22, 2024
fad4e2b
.
ahmadsharif1 Oct 22, 2024
e6e74d1
.
ahmadsharif1 Oct 22, 2024
7fa4d6d
.
ahmadsharif1 Oct 22, 2024
aae6aaf
.
ahmadsharif1 Oct 22, 2024
8e0ef0e
.
ahmadsharif1 Oct 23, 2024
164882c
.
ahmadsharif1 Oct 23, 2024
878153d
.
ahmadsharif1 Oct 23, 2024
e123c65
.
ahmadsharif1 Oct 23, 2024
370f3ee
.
ahmadsharif1 Oct 23, 2024
250c3a8
.
ahmadsharif1 Oct 23, 2024
3a51218
.
ahmadsharif1 Oct 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions .github/workflows/linux_cuda_wheel.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
name: Build and test Linux CUDA wheels

on:
pull_request:
push:
branches:
- nightly
- main
- release/*
tags:
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true

permissions:
id-token: write
contents: write

defaults:
run:
shell: bash -l -eo pipefail {0}

jobs:
generate-matrix:
uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
with:
package-type: wheel
os: linux
test-infra-repository: pytorch/test-infra
test-infra-ref: main
with-cpu: disable
with-xpu: disable
with-rocm: disable
with-cuda: enable
build-python-only: "disable"
build:
needs: generate-matrix
strategy:
fail-fast: false
name: Build and Upload wheel
uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
with:
repository: pytorch/torchcodec
ref: ""
test-infra-repository: pytorch/test-infra
test-infra-ref: main
build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
post-script: packaging/post_build_script.sh
smoke-test-script: packaging/fake_smoke_test.py
package-name: torchcodec
trigger-event: ${{ github.event_name }}
build-platform: "python-build-package"
build-command: "BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 ENABLE_CUDA=1 python -m build --wheel -vvv --no-isolation"

install-and-test:
runs-on: linux.4xlarge.nvidia.gpu
strategy:
fail-fast: false
matrix:
# 3.9 corresponds to the minimum python version for which we build
# the wheel unless the label cliflow/binaries/all is present in the
# PR.
# For the actual release we should add that label and change this to
# include more python versions.
python-version: ['3.9']
cuda-version: ['11.8', '12.1', '12.4']
ffmpeg-version-for-tests: ['5', '6', '7']
container:
image: "pytorch/manylinux-builder:cuda${{ matrix.cuda-version }}"
options: "--gpus all -e NVIDIA_DRIVER_CAPABILITIES=video,compute,utility"
if: ${{ always() }}
needs: build
steps:
- name: Setup env vars
run: |
cuda_version_without_periods=$(echo "${{ matrix.cuda-version }}" | sed 's/\.//g')
echo cuda_version_without_periods=${cuda_version_without_periods} >> $GITHUB_ENV
- uses: actions/download-artifact@v3
with:
name: pytorch_torchcodec__3.9_cu${{ env.cuda_version_without_periods }}_x86_64
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what the comment above means, can you provide more details? It doesn't seem like we're testing any more Python versions that 3.9 here (which is fine, this is the same as the CPU job, but that seems contradictory with the comment).

Should "3.9" be ${{ matrix.python-version }}, like in our linux CPU one?

name: pytorch_torchcodec__${{ matrix.python-version }}_cpu_x86_64

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry the comment was unclear

We set

build-python-only: "disable"

That should build for all python versions but it doesn't unless cliflow/binaries/all label is present.

If that label is not present, we only build for the lowest version which happens to be 3.9. Note that it's related to the build step, not test-and-install step.

We can make test-and-install test for all versions, conditional on that label. But we have to make it aware of that label -- right now it's hard-coded.

Maybe I can just control it from the matrix as you suggested

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think we need to control it from the matrix. This way, when we push a release, we set the tag as you mentioned which will trigger the build for all Python versions, and then we just need to update the matrix for the tests. That's what we do on the CPU job.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we should orthogonally test installing and testing the 3.9 wheel on python versions other than 3.9 (like 3.10, etc.)?

path: pytorch/torchcodec/dist/
- name: Setup miniconda using test-infra
uses: ahmadsharif1/test-infra/.github/actions/setup-miniconda@14bc3c29f88d13b0237ab4ddf00aa409e45ade40
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this intended to be your fork? Should we upstream the changes you made locally, if any? The problem with keeping that ref to your fork is that the fork might end up outdated compared to the main official one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am trying to upstream that now. Hopefully will happen soon:

pytorch/test-infra#5789

with:
python-version: ${{ matrix.python-version }}
default-packages: "conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }}"
- name: Check env
run: |
${CONDA_RUN} env
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember this was mentioned last week, but it looks like we don't really need CONDA_RUN in the macos wheels anymore. Do we still need CONDA_RUN for this job?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe I do -- because I do install conda packages like libnpp

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, we do install conda packages in the other jobs as well (ffmpeg), and we don't use CONDA_RUN. But that's OK, we can merge as-is and quickly try to remove them in a follow-up PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe for those we are running commands on the base conda env not the one that test-infra creates for us? WDYT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mac wheels also switched back to using conda-incubator/setup-miniconda@v3 rather than from test-infra. I think that if we use conda from test-infra, we should do the CONDA_RUN thing everywhere.

${CONDA_RUN} conda info
${CONDA_RUN} nvidia-smi
- name: Update pip
run: ${CONDA_RUN} python -m pip install --upgrade pip
- name: Install PyTorch
run: |
${CONDA_RUN} python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu${{ env.cuda_version_without_periods }}
${CONDA_RUN} python -c 'import torch; print(f"{torch.__version__}"); print(f"{torch.__file__}"); print(f"{torch.cuda.is_available()=}")'
- name: Install torchcodec from the wheel
run: |
wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"`
echo Installing $wheel_path
${CONDA_RUN} python -m pip install $wheel_path -vvv

- name: Check out repo
uses: actions/checkout@v3

- name: Install cuda runtime dependencies
run: |
# For some reason nvidia::libnpp=12.4 doesn't install but nvidia/label/cuda-12.4.0::libnpp does.
# So we use the latter convention for libnpp.
${CONDA_RUN} conda install --yes nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }}
- name: Install test dependencies
run: |
${CONDA_RUN} python -m pip install --pre torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am a bit surprised that this doesn't force a re-installation of the torch CPU wheel... But looking at the logs, this seems OK. (It might come back and bite us in the future, let's just keep that in mind as a possible source of future problem)

# Ideally we would find a way to get those dependencies from pyproject.toml
${CONDA_RUN} python -m pip install numpy pytest pillow

- name: Delete the src/ folder just for fun
run: |
# The only reason we checked-out the repo is to get access to the
# tests. We don't care about the rest. Out of precaution, we delete
# the src/ folder to be extra sure that we're running the code from
# the installed wheel rather than from the source.
# This is just to be extra cautious and very overkill because a)
# there's no way the `torchcodec` package from src/ can be found from
# the PythonPath: the main point of `src/` is precisely to protect
# against that and b) if we ever were to execute code from
# `src/torchcodec`, it would fail loudly because the built .so files
# aren't present there.
rm -r src/
ls
- name: Smoke test
run: |
${CONDA_RUN} python test/decoders/manual_smoke_test.py
- name: Run Python tests
run: |
# We skip test_get_ffmpeg_version because it may not have a micro version.
${CONDA_RUN} FAIL_WITHOUT_CUDA=1 pytest test -k "not test_get_ffmpeg_version" -vvv
- name: Run Python benchmark
run: |
${CONDA_RUN} time python benchmarks/decoders/gpu_benchmark.py --devices=cuda:0,cpu --resize_devices=none
9 changes: 8 additions & 1 deletion packaging/post_build_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@ wheel_path=$(pwd)/$(find dist -type f -name "*.whl")
echo "Wheel content:"
unzip -l $wheel_path

for ffmpeg_major_version in 4 5 6 7; do
ffmpeg_versions=(4 5 6 7)

# TODO: Make ffmpeg4 work with nvcc.
if [ "$ENABLE_CUDA" -eq 1 ]; then
ffmpeg_versions=(5 6 7)
fi

for ffmpeg_major_version in ${ffmepg_versions[@]}; do
assert_in_wheel $wheel_path torchcodec/libtorchcodec${ffmpeg_major_version}.so
done
assert_not_in_wheel $wheel_path libtorchcodec.so
Expand Down
15 changes: 10 additions & 5 deletions src/torchcodec/decoders/_core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ function(make_torchcodec_library library_name ffmpeg_target)
set(NEEDED_LIBRARIES ${ffmpeg_target} ${TORCH_LIBRARIES}
${Python3_LIBRARIES})
if(ENABLE_CUDA)
list(APPEND NEEDED_LIBRARIES ${CUDA_CUDA_LIBRARY}
list(APPEND NEEDED_LIBRARIES
${CUDA_nppi_LIBRARY} ${CUDA_nppicc_LIBRARY} )
endif()
target_link_libraries(
Expand Down Expand Up @@ -76,10 +76,15 @@ if(DEFINED ENV{BUILD_AGAINST_ALL_FFMPEG_FROM_S3})
${CMAKE_CURRENT_SOURCE_DIR}/fetch_and_expose_non_gpl_ffmpeg_libs.cmake
)

make_torchcodec_library(libtorchcodec4 ffmpeg4)
make_torchcodec_library(libtorchcodec5 ffmpeg5)
make_torchcodec_library(libtorchcodec6 ffmpeg6)
make_torchcodec_library(libtorchcodec7 ffmpeg7)

if(NOT ENABLE_CUDA)
# TODO: Enable more ffmpeg versions for cuda.
make_torchcodec_library(libtorchcodec4 ffmpeg4)
endif()
make_torchcodec_library(libtorchcodec7 ffmpeg7)
make_torchcodec_library(libtorchcodec6 ffmpeg6)
make_torchcodec_library(libtorchcodec5 ffmpeg5)

else()
message(
STATUS
Expand Down
2 changes: 2 additions & 0 deletions test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# Decorator for skipping CUDA tests when CUDA isn't available
def needs_cuda(test_item):
if not torch.cuda.is_available():
if os.environ.get("FAIL_WITHOUT_CUDA") == "1":
raise RuntimeError("CUDA is required for this test")
return pytest.mark.skip(reason="CUDA not available")(test_item)
return test_item

Expand Down
Loading