From 6af9c032a2661871616fb5305d4e417f871f044e Mon Sep 17 00:00:00 2001 From: Catherine Lee Date: Thu, 15 May 2025 08:38:04 -0700 Subject: [PATCH] tc --- .gitignore | 3 ++ .jenkins/build.sh | 5 ++- .jenkins/get_files_to_run.py | 2 ++ .jenkins/sphinx_files.py | 53 ++++++++++++++++++++++++++++ .jenkins/validate_tutorials_built.py | 1 - Makefile | 13 ++++--- conf.py | 15 ++------ 7 files changed, 72 insertions(+), 20 deletions(-) create mode 100644 .jenkins/sphinx_files.py diff --git a/.gitignore b/.gitignore index 1d9d572e565..b77f2d677ce 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,6 @@ cleanup.sh # pyspelling dictionary.dic + +# CI related +tutorials-review-data.json diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 58483c168b5..16110ddb075 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -58,7 +58,9 @@ if [[ "${JOB_TYPE}" == "worker" ]]; then # Step 3: Run `make docs` to generate HTML files and static files for these tutorialis pip3 install -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme - make docs + make download-last-reviewed-json + python .jenkins/sphinx_files.py + make postprocess # Step 3.1: Run the post-processing script: python .jenkins/post_process_notebooks.py @@ -118,6 +120,7 @@ if [[ "${JOB_TYPE}" == "worker" ]]; then 7z a worker_${WORKER_ID}.7z docs awsv2 s3 cp worker_${WORKER_ID}.7z s3://${BUCKET_NAME}/${COMMIT_ID}/worker_${WORKER_ID}.7z elif [[ "${JOB_TYPE}" == "manager" ]]; then + export RUNTHIS="" # Step 1: Generate no-plot HTML pages for all tutorials pip3 install -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme make html-noplot diff --git a/.jenkins/get_files_to_run.py b/.jenkins/get_files_to_run.py index bdf4562a827..38a5d964c89 100644 --- a/.jenkins/get_files_to_run.py +++ b/.jenkins/get_files_to_run.py @@ -3,6 +3,7 @@ import os from pathlib import Path from remove_runnable_code import remove_runnable_code +from validate_tutorials_built import NOT_RUN # Calculate repo base dir @@ -96,6 +97,7 @@ def main() -> None: all_files = get_all_files() files_to_run = calculate_shards(all_files, num_shards=args.num_shards)[args.shard_num - 1] + files_to_run = [x for x in files_to_run if x not in [f"{f}.py" for f in NOT_RUN]] if not args.dry_run: remove_other_files(all_files, compute_files_to_keep(files_to_run)) stripped_file_names = [Path(x).stem for x in files_to_run] diff --git a/.jenkins/sphinx_files.py b/.jenkins/sphinx_files.py new file mode 100644 index 00000000000..f4168e162fc --- /dev/null +++ b/.jenkins/sphinx_files.py @@ -0,0 +1,53 @@ +import glob +from pathlib import Path +import shutil +import subprocess +import os +import time +from get_files_to_run import remove_other_files, compute_files_to_keep, calculate_shards, get_all_files +from validate_tutorials_built import NOT_RUN + +def print_files(files): + print(f"Files to run ({len(files)}):") + for file in files: + print(f"- {file}") + + +def main() -> None: + all_files = get_all_files() + files_to_run = calculate_shards(all_files, num_shards=15)[int(os.environ.get("WORKER_ID", "1")) - 1] + files_to_run = [x for x in files_to_run if x not in [f"{f}.py" for f in NOT_RUN]] + + os.makedirs("/tmp/docs_to_zip", exist_ok=True) + + env = os.environ.copy() + for file in files_to_run: + print(f"Running {file}") + start = time.time() + remove_other_files(all_files, compute_files_to_keep([file])) + stem = Path(file).stem + env["RUNTHIS"] = stem + env["FILES_TO_RUN"] = stem + + subprocess.check_output(["make", "download"], env=env) + result = subprocess.check_output(["make", "html"], env=env) + print(result.decode("utf-8")) + subprocess.check_output(["make", "postprocess"], env=env) + print("Done running") + for file in glob.glob(f"docs/**/*", recursive=True): + if stem in file: + relative_path = Path(os.path.relpath(file, "docs")) + print(relative_path) + print(relative_path.parent) + os.makedirs(os.path.dirname(f"/tmp/docs_to_zip/{relative_path}"), exist_ok=True) + shutil.copy(file, f"/tmp/docs_to_zip/{relative_path}") + subprocess.check_output(["git", "reset", "--hard", "HEAD"]) + subprocess.check_output(["git", "clean", "-f", "-d"]) + print(f"Done with {file} in {time.time() - start:.2f} seconds") + + shutil.rmtree("_build") + os.makedirs("_build", exist_ok=True) + shutil.move("/tmp/docs_to_zip", "_build/html") + +if __name__ == "__main__": + main() diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py index f5cd187dbc6..5c9e60e90bd 100644 --- a/.jenkins/validate_tutorials_built.py +++ b/.jenkins/validate_tutorials_built.py @@ -53,7 +53,6 @@ "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release. "advanced_source/semi_structured_sparse", # reenable after 3303 is fixed. "intermediate_source/torchrec_intro_tutorial", # reenable after 3302 is fixe - "intermediate_source/memory_format_tutorial", # causes other tutorials like torch_logs fail. "state" issue, reseting dynamo didn't help ] def tutorial_source_dirs() -> List[Path]: diff --git a/Makefile b/Makefile index 9068d32b2ab..7721e81a19c 100644 --- a/Makefile +++ b/Makefile @@ -90,15 +90,18 @@ download-last-reviewed-json: @echo "Downloading tutorials-review-data.json..." curl -o tutorials-review-data.json https://raw.githubusercontent.com/pytorch/tutorials/refs/heads/last-reviewed-data-json/tutorials-review-data.json @echo "Finished downloading tutorials-review-data.json." -docs: - make download - make download-last-reviewed-json - make html + +postprocess: @python .jenkins/insert_last_verified.py $(BUILDDIR)/html rm -rf docs cp -r $(BUILDDIR)/html docs touch docs/.nojekyll - rm -rf tutorials-review-data.json + +docs: + make download + make download-last-reviewed-json + make html + make postprocess html-noplot: $(SPHINXBUILD) -D plot_gallery=0 -b html $(SPHINXOPTS) "$(SOURCEDIR)" "$(BUILDDIR)/html" diff --git a/conf.py b/conf.py index 05cfa11ca1b..f89ee56d77f 100644 --- a/conf.py +++ b/conf.py @@ -98,24 +98,13 @@ # -- Sphinx-gallery configuration -------------------------------------------- def reset_seeds(gallery_conf, fname): - torch.cuda.empty_cache() - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - torch._dynamo.reset() - torch._inductor.config.force_disable_caches = True - torch.manual_seed(42) - torch.set_default_device(None) - random.seed(10) - numpy.random.seed(10) - torch.set_grad_enabled(True) - - gc.collect() + pass sphinx_gallery_conf = { 'examples_dirs': ['beginner_source', 'intermediate_source', 'advanced_source', 'recipes_source', 'prototype_source'], 'gallery_dirs': ['beginner', 'intermediate', 'advanced', 'recipes', 'prototype'], - 'filename_pattern': re.compile(SPHINX_SHOULD_RUN), + 'filename_pattern': os.getenv("RUNTHIS"), 'promote_jupyter_magic': True, 'backreferences_dir': None, 'first_notebook_cell': ("# For tips on running notebooks in Google Colab, see\n"