Merge branch 'main' into zhiwei/xpu_quant

svekars · web-flow · commit 79d56de3cd49 · 2025-03-21T09:15:03.000-07:00
diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt
@@ -28,8 +28,8 @@ tensorboard
 jinja2==3.1.3
 pytorch-lightning
 torchx
-torchrl==0.6.0
-tensordict==0.6.0
+torchrl==0.7.2
+tensordict==0.7.2
 ax-platform>=0.4.0
 nbformat>=5.9.2
 datasets
diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py
@@ -12,7 +12,7 @@
 
 
 # Pattern to search ``` {.python .jupyter-code-cell}
-pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\nhtml_code = """\n.*?\n"""\ndisplay\(HTML\(html_code\)\))\n```(.*)', re.DOTALL)
+pattern = re.compile(r'(.*?)``` {\.python \.jupyter-code-cell}\n(.*?from IPython\.display import display, HTML.*?display\(HTML\(html_code\)\))\n```(.*)', re.DOTALL)
 
 
 def process_video_cell(notebook_path):
diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py
@@ -50,6 +50,7 @@
     "intermediate_source/flask_rest_api_tutorial",
     "intermediate_source/text_to_speech_with_torchaudio",
     "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
+    "advanced_source/semi_structured_sparse" # reenable after 3303 is fixed.
 ]
 
 def tutorial_source_dirs() -> List[Path]:
diff --git a/advanced_source/coding_ddpg.py b/advanced_source/coding_ddpg.py
@@ -1040,7 +1040,7 @@ def ceil_div(x, y):
 
 ###############################################################################
 # let's use the TD(lambda) estimator!
-loss_module.make_value_estimator(ValueEstimators.TDLambda, gamma=gamma, lmbda=lmbda)
+loss_module.make_value_estimator(ValueEstimators.TDLambda, gamma=gamma, lmbda=lmbda, device=device)
 
 ###############################################################################
 # .. note::
diff --git a/advanced_source/cpp_extension.rst b/advanced_source/cpp_extension.rst
@@ -1207,3 +1207,5 @@ examples displayed in this note `here
 <https://github.com/pytorch/extension-cpp>`_. If you have questions, please use
 `the forums <https://discuss.pytorch.org>`_. Also be sure to check our `FAQ
 <https://pytorch.org/cppdocs/notes/faq.html>`_ in case you run into any issues.
+A blog on writing extensions for AMD ROCm can be found `here
+<https://rocm.blogs.amd.com/artificial-intelligence/cpp-extn/readme.html>`_.
diff --git a/advanced_source/python_custom_ops.py b/advanced_source/python_custom_ops.py
@@ -112,7 +112,10 @@ def crop(pic: torch.Tensor, box: Sequence[int]) -> torch.Tensor:
 def _(pic, box):
     channels = pic.shape[0]
     x0, y0, x1, y1 = box
-    return pic.new_empty(channels, y1 - y0, x1 - x0)
+    result = pic.new_empty(y1 - y0, x1 - x0, channels).permute(2, 0, 1)
+    # The result should have the same metadata (shape/strides/``dtype``/device)
+    # as running the ``crop`` function above.
+    return result
 
 ######################################################################
 # After this, ``crop`` now works without graph breaks:
diff --git a/advanced_source/semi_structured_sparse.py b/advanced_source/semi_structured_sparse.py
@@ -210,6 +210,8 @@
 SparseSemiStructuredTensor._FORCE_CUTLASS = True
 torch.manual_seed(100)
 
+# Set default device to "cuda:0"
+torch.set_default_device(torch.device("cuda:0" if torch.cuda.is_available() else "cpu"))
 
 ######################################################################
 # We’ll also need to define some helper functions that are specific to the
diff --git a/intermediate_source/pinmem_nonblock.py b/intermediate_source/pinmem_nonblock.py
@@ -547,7 +547,7 @@ def pin_copy_to_device_nonblocking(*tensors):
 
 i = -1
 for i in range(100):
-    # Create a tensor in pin-memory
+    # Create a tensor in pageable memory
     cpu_tensor = torch.ones(1024, 1024)
     torch.cuda.synchronize()
     # Send the tensor to CUDA
diff --git a/intermediate_source/reinforcement_ppo.py b/intermediate_source/reinforcement_ppo.py
@@ -551,7 +551,7 @@
 #
 
 advantage_module = GAE(
-    gamma=gamma, lmbda=lmbda, value_network=value_module, average_gae=True
+    gamma=gamma, lmbda=lmbda, value_network=value_module, average_gae=True, device=device,
 )
 
 loss_module = ClipPPOLoss(

Original file line number	Diff line number	Diff line change
`@@ -50,6 +50,7 @@`
`50`	`50`	`"intermediate_source/flask_rest_api_tutorial",`
`51`	`51`	`"intermediate_source/text_to_speech_with_torchaudio",`
`52`	`52`	`"intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.`
	`53`	`+ "advanced_source/semi_structured_sparse" # reenable after 3303 is fixed.`
`53`	`54`	`]`
`54`	`55`
`55`	`56`	`def tutorial_source_dirs() -> List[Path]:`
Original file line number	Diff line number	Diff line change
`@@ -551,7 +551,7 @@`
`551`	`551`	`#`
`552`	`552`
`553`	`553`	`advantage_module = GAE(`
`554`		`- gamma=gamma, lmbda=lmbda, value_network=value_module, average_gae=True`
	`554`	`+ gamma=gamma, lmbda=lmbda, value_network=value_module, average_gae=True, device=device,`
`555`	`555`	`)`
`556`	`556`
`557`	`557`	`loss_module = ClipPPOLoss(`