diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a6fe8df..2d19244 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,16 +19,19 @@ jobs: with: fetch-depth: 2 + - name: Install uv + uses: astral-sh/setup-uv@v4 + - name: Set up Python - uses: actions/setup-python@v5.3.0 - with: - python-version: "3.10" + run: uv python install - - name: Install uv - uses: astral-sh/setup-uv@v5 + - name: Install dependencies + run: uv sync --all-extras --dev + + - name: Install Quarto + uses: quarto-dev/quarto-actions/setup@v2 with: - # Install a specific version of uv. - version: "0.5.15" + version: "1.5.57" - name: Check if there is a parent commit id: check-parent-commit @@ -63,16 +66,16 @@ jobs: run: | sudo apt-get -y install openssl graphviz nano texlive graphviz-dev unzip build-essential - - name: Install dependencies + - name: build the book run: | - uv sync - - - name: Build book - run: uv run jupyter-book build . --verbose + uv run quarto render --execute + env: + # This forces Quarto to use the Python inside your uv venv + QUARTO_PYTHON: ${{ github.workspace }}/.venv/bin/python - name: Publish if: steps.check-version.outputs.tag - run: uv run ghp-import -n -p -f _build/html + run: uv run quarto publish gh-pages --no-render --no-browser - name: Publish the release notes uses: release-drafter/release-drafter@v6.0.0 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 73d860e..cac3d2d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,8 +1,13 @@ name: tests on: - - pull_request - - push + push: + branches: [main] + pull_request: + +concurrency: + group: tests-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true jobs: pre-commit: @@ -22,16 +27,19 @@ jobs: with: fetch-depth: 2 - - name: Set up Python - uses: actions/setup-python@v5.3.0 - with: - python-version: "3.10" - - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@v4 + + - name: Set up Python + run: uv python install + + - name: Install dependencies + run: uv sync --all-extras --dev + + - name: Install Quarto + uses: quarto-dev/quarto-actions/setup@v2 with: - # Install a specific version of uv. - version: "0.5.2" + version: "1.5.57" - name: set timezone run: | @@ -42,13 +50,9 @@ jobs: run: | sudo apt-get -y install openssl graphviz nano texlive graphviz-dev unzip build-essential - - name: Install dependencies - run: | - uv sync - - name: build the book run: | - uv run jupyter-book build . --verbose + uv run quarto render --execute - name: success run: | diff --git a/.gitignore b/.gitignore index 09507ca..af04659 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,12 @@ # Mac .DS_Store -# jupyterbook -_build/** +# quarto +_book/ +/.quarto/ +**/*.quarto_ipynb +_freeze/ +.panache* # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ccf8257..7b71d2a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,3 +28,7 @@ repos: # Run the formatter. - id: ruff-format types_or: [python, pyi, jupyter] + - repo: https://github.com/jolars/panache + rev: v2.32.0 # Use the latest release version + hooks: + - id: panache-format diff --git a/README.md b/README.md index 989893d..2c7dc63 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ When you make a pull request, pre-commit and build will run automatically, and f ## Installing the development environment locally -You will need installations of Python 3.10 and [**uv**](https://docs.astral.sh/uv/). **uv** can be used to install certain distributions of Python through the `uv python install 3.10` command but you can use other Python installations. +You will need installations of Python 3.10 and [**uv**](https://docs.astral.sh/uv/). **uv** can be used to install certain distributions of Python through the `uv python install 3.12` command but you can use other Python installations. Clone this repository. @@ -26,15 +26,15 @@ To install the development environment, run `uv sync` from the project root. Thi ## Building the book -The book is compiled from source markdown and Jupyter notebook files [**jupyter-book**](https://jupyterbook.org/en/stable/) package. +The book is compiled from source markdown and Jupyter notebook files using [**Quarto**](https://quarto.org/). You will need Quarto installed on your system — see the [Quarto installation guide](https://quarto.org/docs/get-started/). To build the book, run ```bash -uv run jupyter-book build . +uv run quarto render --execute ``` -Once this command is run, you should be able to look at the HTML files for the book locally on your computer. They will be in `_build`. The project is configured to stop the build if any errors are encountered. This is a frequent occurrence! You'll need to look at the logs to work out what might have gone wrong. +Once this command is run, you should be able to look at the HTML files for the book locally on your computer. They will be in `_book`. The `freeze: auto` setting means only notebooks whose source has changed will be re-executed on subsequent builds. ## Uploading the book @@ -46,10 +46,10 @@ This repo is configured such that new versions automatically build and upload th You shouldn't need to upload the book if you are a regular contributor. There are times when you might need to as an admin, but normally the book will be updated automatically upon release of a new version. -See [here](https://jupyterbook.org/publish/gh-pages.html) for how to upload revised HTML files, but the key command is +First build the book, then publish with: ```bash -uv run ghp-import -n -p -f _build/html +uv run quarto publish gh-pages --no-render --no-browser ``` ## Code hygiene @@ -78,7 +78,7 @@ on your staged files. Ensure pre-commit reports all tests as having passed befor ## Running and developing in a Docker container -There is a Dockerfile associated with this project. Pre-reqs +There is a Dockerfile associated with this project. To use it: 1. Pre-reqs: docker installed, VS Code installed, VS Code docker and Remote Explorer extensions installed. @@ -90,5 +90,5 @@ There is a Dockerfile associated with this project. Pre-reqs If you wish to copy any files (eg the built HTML files) back to your local machine to check them, use ```bash -docker cp CONTAINER:app/_build/html/ temp_dir/ +docker cp CONTAINER:app/_book/ temp_dir/ ``` diff --git a/_config.yml b/_config.yml deleted file mode 100644 index 824e3fc..0000000 --- a/_config.yml +++ /dev/null @@ -1,53 +0,0 @@ -# Book settings -# Learn more at https://jupyterbook.org/customize/config.html - -title: Python for Data Science -author: The Py4DS Community -logo: logo.png -exclude_patterns: [_build, Thumbs.db, .DS_Store, "**.ipynb_checkpoints", ".venv", "README.md"] -# Force re-execution of notebooks on each build. -# See https://jupyterbook.org/content/execute.html -execute: - execute_notebooks: force - timeout: 120 - exclude_patterns: ["README.md"] - allow_errors: false - nb_output_stderr: show - - -# Define the name of the latex output file for PDF builds -latex: - latex_documents: - targetname: book.tex - -# Add a bibtex file so that we can create citations -bibtex_bibfiles: - - references.bib - -# Information about where the book exists on the web -repository: - url: https://github.com/aeturrell/python4DS # Online location of your book - branch: main # Which branch of the repository should be used when creating links (optional) - -# Add GitHub buttons to your book -# See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository -html: - use_issues_button: true - use_repository_button: true - favicon: "favicon.ico" # A path to a favicon image - analytics: - google_analytics_id: "G-LXJC37BJVX" # A GA id that can be used to track book views. - -launch_buttons: - colab_url: "https://colab.research.google.com" - notebook_interface: "jupyterlab" # or "classic" - binderhub_url: "https://mybinder.org" - -sphinx: - config: - html_js_files: - - https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js - bibtex_reference_style: author_year - suppress_warnings: ["mystnb.unknown_mime_type"] - nb_execution_show_tb: true - nb_execution_raise_on_error: true # Make build fail any content errors (don't want to publish if errors) diff --git a/_quarto.yml b/_quarto.yml new file mode 100644 index 0000000..51699c0 --- /dev/null +++ b/_quarto.yml @@ -0,0 +1,94 @@ +project: + type: book + output-dir: _book + +book: + title: "Python for Data Science" + author: "The Py4DS Community" + favicon: favicon.ico + cover-image: logo.png + repo-url: https://github.com/aeturrell/python4DS + repo-branch: main + repo-actions: [edit, issue] + google-analytics: "G-LXJC37BJVX" + sidebar: + style: docked + collapse-level: 1 + chapters: + - index.md + - part: "Introduction" + chapters: + - introduction.ipynb + - prerequisites.ipynb + - part: "Quick Start" + chapters: + - whole-game.ipynb + - data-visualise.ipynb + - workflow-basics.ipynb + - data-transform.ipynb + - workflow-style.ipynb + - data-tidy.ipynb + - workflow-writing-code.ipynb + - data-import.ipynb + - workflow-packages-and-environments.qmd + - workflow-help.qmd + - part: "Visualise" + chapters: + - visualise.qmd + - vis-layers.ipynb + - exploratory-data-analysis.ipynb + - communicate-plots.ipynb + - part: "Transform" + chapters: + - boolean-data.ipynb + - numbers.ipynb + - strings.ipynb + - regex.ipynb + - categorical-data.ipynb + - dates-and-times.ipynb + - missing-values.ipynb + - joins.ipynb + - part: "Import" + chapters: + - spreadsheets.ipynb + - databases.ipynb + - rectangling.ipynb + - webscraping-and-apis.ipynb + - part: "Programme" + chapters: + - functions.ipynb + - iteration.ipynb + - command-line.md + - part: "Communicate" + chapters: + - markdown.md + - quarto.qmd + appendices: + - references.md + +bibliography: references.bib +search: true +twitter-card: true + +format: + html: + theme: + light: flatly + dark: superhero + toc: true + toc-depth: 3 + toc-expand: false + html-math-method: katex + code-copy: true + code-link: true + code-overflow: wrap + number-sections: false + include-in-header: + - text: | + + +execute: + freeze: auto + error: false + +jupyter: python3 diff --git a/_toc.yml b/_toc.yml deleted file mode 100644 index 99d5015..0000000 --- a/_toc.yml +++ /dev/null @@ -1,61 +0,0 @@ -# Table of contents - -format: jb-book -root: welcome -parts: -- caption: Introduction - chapters: - - file: introduction - - file: prerequisites -- caption: Quick Start - numbered: true - chapters: - - file: whole-game - - file: data-visualise - - file: workflow-basics - - file: data-transform - - file: workflow-style - - file: data-tidy - - file: workflow-writing-code - - file: data-import - - file: workflow-packages-and-environments - - file: workflow-help -- caption: Visualise - numbered: true - chapters: - - file: visualise - - file: vis-layers - - file: exploratory-data-analysis - - file: communicate-plots -- caption: Transform - numbered: true - chapters: - - file: boolean-data - - file: numbers - - file: strings - - file: regex - - file: categorical-data - - file: dates-and-times - - file: missing-values - - file: joins -- caption: Import - numbered: true - chapters: - - file: spreadsheets - - file: databases - - file: rectangling - - file: webscraping-and-apis -- caption: Programme - numbered: true - chapters: - - file: functions - - file: iteration - - file: command-line -- caption: Communicate - numbered: true - chapters: - - file: markdown - - file: quarto -- caption: Appendix - chapters: - - file: zreferences \ No newline at end of file diff --git a/boolean-data.ipynb b/boolean-data.ipynb index c8ba880..931bbb2 100644 --- a/boolean-data.ipynb +++ b/boolean-data.ipynb @@ -5,8 +5,7 @@ "id": "95f0a171", "metadata": {}, "source": [ - "(boolean-data)=\n", - "# Boolean Data\n", + "# Boolean Data {#sec-boolean-data}\n", "\n", "## Introduction\n", "\n", @@ -101,9 +100,9 @@ "id": "e4367753", "metadata": {}, "source": [ - "```{admonition} Exercise\n", + "::: {.callout-tip title=\"Exercise\"}\n", "What does `not (not True)` evaluate to?\n", - "```" + ":::" ] }, { @@ -194,9 +193,9 @@ "id": "e044d14a", "metadata": {}, "source": [ - "```{admonition} Exercise\n", + "::: {.callout-tip title=\"Exercise\"}\n", "Check if \"a\" is in the string \"Walloping weasels\" using `in`. Is \"a\" `in` \"Anodyne\"?\n", - "```" + ":::" ] }, { @@ -235,9 +234,9 @@ "source": [ "Note that this does nothing if the score is between 11 and 90, and prints a message otherwise.\n", "\n", - "```{admonition} Exercise\n", + "::: {.callout-tip title=\"Exercise\"}\n", "Create a new `if` ... `elif` ... `else` statement that prints \"well done\" if a score is over 90, \"good\" if between 40 and 90, and \"bad luck\" otherwise.\n", - "```\n", + ":::\n", "\n", "One nice feature of Python is that you can make multiple boolean comparisons in a single line." ] @@ -715,7 +714,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.12.12" }, "toc-showtags": true }, diff --git a/categorical-data.ipynb b/categorical-data.ipynb index 7a670c9..5796a26 100644 --- a/categorical-data.ipynb +++ b/categorical-data.ipynb @@ -5,8 +5,7 @@ "id": "95f0a171", "metadata": {}, "source": [ - "(categorical-data)=\n", - "# Categorical Data\n", + "# Categorical Data {#sec-categorical-data}\n", "\n", "## Introduction\n", "\n", @@ -150,7 +149,7 @@ "id": "76dc9dd6", "metadata": {}, "source": [ - "Note that NaNs appear for any value that *isn't* in the categories we specified—you can find more on this in {ref}`missing-values`." + "Note that NaNs appear for any value that *isn't* in the categories we specified—you can find more on this in @sec-missing-values." ] }, { @@ -379,7 +378,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.12.12" }, "toc-showtags": true }, diff --git a/command-line.md b/command-line.md index 25fcc67..1079842 100644 --- a/command-line.md +++ b/command-line.md @@ -1,16 +1,4 @@ ---- -jupytext: - formats: md:myst - text_representation: - extension: .md - format_name: myst -kernelspec: - display_name: py4ds2e - language: python - name: python3 ---- -(command-line)= -# The Command Line +# The Command Line {#sec-command-line} In this chapter, you'll meet the *command line* and learn how to use it. Beyond a few key commands like `uv add ` you don't strictly need to know how to use the command line to follow the rest of this book. However, even a tiny bit of knowledge of the command line goes a long way in coding and will serve you well. @@ -79,9 +67,9 @@ here `head` is the command that looks at the start of the file, `-n` is an optio The flags or options, such as `-n` in the example above, typically begin with a dash (`-`) or, occasionally, a double dash (`--`). They can also be chained together, for example `ls -la` combines `ls -a` and `ls -l`. -```{warning} +::: {.callout-warning} Spaces take on a special role when using the command line. For this reason, it's good practice to avoid spaces in file names. If you need to refer to a filename with spaces in, you’ll need to use quotes or escape the spaces in the file names using a `\`, for example `this is my file.txt` becomes `this\ is\ my\ file.txt` -``` +::: To run programmes from the command line, all you need is the name of the programme as the command: in fact, commands *are* programmes. The `date` command refers to an actual programme on your computer that you can find. And this also explains a bit of what's going on when you *run a script from the command line* (more on that later). @@ -111,14 +99,14 @@ To find out "where" you are when you open a terminal, you can use the `pwd` comm The table below shows some useful commands for moving around your computer using the command line. Note that `cd` accepts a location *relative* to your current directory. -| Command | What it does | -| ----------- | ----------- | -| `pwd` | Shows current directory | -| `cd` | Change directory command | -| `cd ..` | Go up one level in the directory (`cd ../..` for two levels) | -| `cd ~` | Go to your home directory | -| `cd -` | Go to the previous directory | -| `cd documents/papers` | Go directly to a directory named 'papers' | + | Command | What it does | + | --------------------- | ------------------------------------------------------------ | + | `pwd` | Shows current directory | + | `cd` | Change directory command | + | `cd ..` | Go up one level in the directory (`cd ../..` for two levels) | + | `cd ~` | Go to your home directory | + | `cd -` | Go to the previous directory | + | `cd documents/papers` | Go directly to a directory named 'papers' | ## Using Python on the command line @@ -146,39 +134,39 @@ which python Now we'll see some useful commands for the terminal. -| Command                                          | What it does | -| ----------- | ----------- | -| `man ` | Shows a manual for the given command | -| `touch ` | Creates an empty file named `` | -| `code ` | Open a file in VS Code (creating it, if it does not exist) | -| `mkdir ` | creates a new folder called `foldername`| -| `echo ` | Prints `` | -| `cat ` | Print the full contents of `` | -| `head ` | Print the start of a file | -| `tail ` | Print the end of a file | -| `> ` | Redirects output from screen to ``. For example, `echo "Hello World" > hello.txt` | -| `>> ` | Redirects output from screen to the end of ``, ie appends output rather than overwrites it | -| `|` | The pipe symbol: uses output from one command as input into another. For example, `head -n 10 data.csv | > hello_world.txt` would write the first 10 lines of data.csv into a file called hello_world.txt -| `less ` | Print out the contents of a file in paginated form. Use `ctrl+v` and `Alt+v` (or `⌘+v` and `⌥+v` on Mac) to move up and down. Press `q` to quit.| -| `wc -l` | Returns number of lines in input, for example `cat | wc -l`. Use `wc` alone for word count. | -| `sort` | Arrange lines in a file in alphabetical order | -| `uniq` | Remove duplicate lines from input, for example `cat | uniq` or `uniq -d` to show duplicate files | -| `mv` | Move or rename a file; for example, `mv file1 file2` would rename `file1` to `file2` while `mv file1 ~` would move `file1` to the home directory| -| `cp` | Copy a file; for example, `cp file1 file2` would copy `file1` to `file2` while `cp file1 ~` would make a copy of `file1` in the home directory| -| `rm ` | Permanently remove a file | -| `rmdir ` | Permanently remove an empty directory | -| `rm -rf ` | ⚠ Permanently remove everything in a directory ⚠ | -| `grep ` | Search for a given term, for example `cat hello_world.txt | grep world` | -| `ls` | Basically, this means list stuff (files and folders) in the current directory | -| `ls -a` | List stuff in the current directory even if it's hidden | -| `ls -l` | List stuff in a more readable format and show permissions | -| `ls -S` | List stuff by size | -| `file ` | Give information on the file type of ``| -| `find` | Find specific files on your computer, can be piped into other commands for example `find *.md -size +5k -type f | xargs wc -l` will count the number of lines `wc -l` of all files, `-type f`, ending in `.md` that are greater than 5 kilobytes in size, `-size +5k`. | -| `diff -u ` | Show a single summary of the differences between two files. | + | Command                                          | What it does | + | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | + | `man ` | Shows a manual for the given command | + | `touch ` | Creates an empty file named `` | + | `code ` | Open a file in VS Code (creating it, if it does not exist) | + | `mkdir ` | creates a new folder called `foldername` | + | `echo ` | Prints `` | + | `cat ` | Print the full contents of `` | + | `head ` | Print the start of a file | + | `tail ` | Print the end of a file | + | `> ` | Redirects output from screen to ``. For example, `echo "Hello World" > hello.txt` | + | `>> ` | Redirects output from screen to the end of ``, ie appends output rather than overwrites it | + | ` | ` | The pipe symbol: uses output from one command as input into another. For example, `head -n 10 data.csv | > hello_world.txt` would write the first 10 lines of data.csv into a file called hello_world.txt | + | `less ` | Print out the contents of a file in paginated form. Use `ctrl+v` and `Alt+v` (or `⌘+v` and `⌥+v` on Mac) to move up and down. Press `q` to quit. | + | `wc -l` | Returns number of lines in input, for example `cat | wc -l`. Use `wc` alone for word count. | + | `sort` | Arrange lines in a file in alphabetical order | + | `uniq` | Remove duplicate lines from input, for example `cat | uniq` or `uniq -d` to show duplicate files | + | `mv` | Move or rename a file; for example, `mv file1 file2` would rename `file1` to `file2` while `mv file1 ~` would move `file1` to the home directory | + | `cp` | Copy a file; for example, `cp file1 file2` would copy `file1` to `file2` while `cp file1 ~` would make a copy of `file1` in the home directory | + | `rm ` | Permanently remove a file | + | `rmdir ` | Permanently remove an empty directory | + | `rm -rf ` | ⚠ Permanently remove everything in a directory ⚠ | + | `grep ` | Search for a given term, for example `cat hello_world.txt | grep world` | + | `ls` | Basically, this means list stuff (files and folders) in the current directory | + | `ls -a` | List stuff in the current directory even if it's hidden | + | `ls -l` | List stuff in a more readable format and show permissions | + | `ls -S` | List stuff by size | + | `file ` | Give information on the file type of `` | + | `find` | Find specific files on your computer, can be piped into other commands for example `find *.md -size +5k -type f | xargs wc -l` will count the number of lines `wc -l` of all files, `-type f`, ending in `.md` that are greater than 5 kilobytes in size, `-size +5k`. | + | `diff -u ` | Show a single summary of the differences between two files. | ![More details of the grep command](https://pbs.twimg.com/media/DcPeD_CW0AEkSar?format=jpg&name=small) -*More details of the grep command, by [@b0rk](https://twitter.com/b0rk).* +*More details of the grep command, by [\@b0rk](https://twitter.com/b0rk).* You can write for loops in bash (remember, it's a language). The general structure is diff --git a/communicate-plots.ipynb b/communicate-plots.ipynb index 01b7435..8531e79 100644 --- a/communicate-plots.ipynb +++ b/communicate-plots.ipynb @@ -5,14 +5,13 @@ "id": "95f0a171", "metadata": {}, "source": [ - "(communicate-plots)=\n", - "# Graphics for Communication\n", + "# Graphics for Communication {#sec-communicate-plots}\n", "\n", "## Introduction\n", "\n", "In this chapter, you'll learn about using visualisation to communicate.\n", "\n", - "In {ref}`exploratory-data-analysis`, you learned how to use plots as tools for *exploration*.\n", + "In @sec-exploratory-data-analysis, you learned how to use plots as tools for *exploration*.\n", "When you make exploratory plots, you know—even before looking—which variables the plot will display.\n", "You made each plot for a purpose, quickly looked at it, and then moved on to the next plot.\n", "In the course of most analyses, you'll produce tens or hundreds of plots, most of which are immediately thrown away.\n", @@ -771,7 +770,7 @@ }, "outputs": [], "source": [ - "# remove-input\n", + "# | echo: false\n", "cmaps = [\n", " (\n", " \"Perceptually Uniform Sequential\",\n", @@ -1332,7 +1331,7 @@ "main_language": "python" }, "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1346,7 +1345,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.0" + "version": "3.12.12" }, "toc-showtags": true }, diff --git a/conversion_scripts/fix_kernels.py b/conversion_scripts/fix_kernels.py new file mode 100644 index 0000000..2c33ab8 --- /dev/null +++ b/conversion_scripts/fix_kernels.py @@ -0,0 +1,33 @@ +import glob + +import nbformat + +# Find all notebooks +notebooks = glob.glob("**/*.ipynb", recursive=True) + +for nb_path in notebooks: + if ".ipynb_checkpoints" in nb_path: + continue + + with open(nb_path, "r", encoding="utf-8") as f: + try: + nb = nbformat.read(f, as_version=4) + print(f"Fixing {nb_path}...") + + # Wipe and replace kernelspec + nb.metadata["kernelspec"] = { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3", + } + + # Wipe language_info version (this prevents the 3.10.17 mismatch) + if "language_info" in nb.metadata: + nb.metadata["language_info"]["version"] = "3" + + with open(nb_path, "w", encoding="utf-8") as f_out: + nbformat.write(nb, f_out) + except Exception as e: + print(f"Could not process {nb_path}: {e}") + +print("Successfully standardized all notebooks.") diff --git a/conversion_scripts/jb_to_quarto.py b/conversion_scripts/jb_to_quarto.py new file mode 100644 index 0000000..3eec10a --- /dev/null +++ b/conversion_scripts/jb_to_quarto.py @@ -0,0 +1,261 @@ +"""Convert MyST/Jupyter Book syntax to Quarto syntax. + +Handles both .qmd (markdown) files and .ipynb (notebook) files. +Idempotent: running twice on an already-converted file is a no-op. + +Usage: + python scripts/jb_to_quarto.py --dry-run *.qmd + python scripts/jb_to_quarto.py --apply *.qmd *.ipynb +""" + +import argparse +import difflib +import json +import re +import sys +from pathlib import Path + +# --- Title → callout-type mapping for {admonition} --- +TITLE_MAP = { + "exercise": "tip", + "exercises": "tip", + "tip": "tip", + "tips": "tip", + "important": "important", + "warning": "warning", + "caution": "caution", + "note": "note", + "hint": "tip", +} + + +def convert_text(text: str) -> str: + """Apply all MyST→Quarto transformations to a block of text.""" + lines = text.split("\n") + out_lines = [] + i = 0 + + # Track open callout fences for proper closing + fence_stack = [] # list of (backtick_count, is_callout) + + while i < len(lines): + line = lines[i] + + # --- Pass A: Label definitions --- + # Pattern: (label-name)= on its own line, followed by a heading + label_match = re.match(r"^\(([a-zA-Z0-9_-]+)\)=\s*$", line) + if label_match and i + 1 < len(lines): + label = label_match.group(1) + next_line = lines[i + 1] + heading_match = re.match(r"^(#{1,6})\s+(.+)$", next_line) + if heading_match: + hashes = heading_match.group(1) + title = heading_match.group(2) + # Remove any existing {#...} from title + title = re.sub(r"\s*\{#[^}]+\}\s*$", "", title) + out_lines.append(f"{hashes} {title} {{#sec-{label}}}") + i += 2 + continue + # Label not followed by heading — emit as-is (shouldn't happen often) + + # --- Pass B: Fence-aware callout conversion --- + # Match opening fences: ```{note}, ````{admonition} Title, etc. + fence_open = re.match( + r"^(`{3,})\{(note|tip|warning|caution|important|exercise|solution)\}\s*(.*)$", + line, + ) + admonition_open = re.match(r"^(`{3,})\{admonition\}\s*(.+)$", line) + + if fence_open: + backticks = fence_open.group(1) + directive = fence_open.group(2) + extra = fence_open.group(3).strip() + + if directive == "solution": + callout_type = "note" + title_attr = ' title="Solution" collapse="true"' + elif directive == "exercise": + callout_type = "tip" + title_attr = ' title="Exercise"' + else: + callout_type = directive + title_attr = "" + + if extra: + title_attr = f' title="{extra}"' + + out_lines.append(f"::: {{.callout-{callout_type}{title_attr}}}") + fence_stack.append((len(backticks), True)) + i += 1 + continue + + elif admonition_open: + backticks = admonition_open.group(1) + title = admonition_open.group(2).strip() + callout_type = TITLE_MAP.get(title.lower(), "note") + out_lines.append(f'::: {{.callout-{callout_type} title="{title}"}}') + fence_stack.append((len(backticks), True)) + i += 1 + continue + + # Match code-cell fences (MyST executable cells in .md files) + code_cell_match = re.match(r"^(`{3,})\{code-cell\}(?: ipython3)?\s*$", line) + if code_cell_match: + backticks = code_cell_match.group(1) + out_lines.append(f"{backticks}{{python}}") + fence_stack.append((len(backticks), False)) + i += 1 + continue + + # Match closing fences + close_fence = re.match(r"^(`{3,})\s*$", line) + if close_fence and fence_stack: + backtick_count = len(close_fence.group(1)) + # Check if this closes the most recent fence + top_count, is_callout = fence_stack[-1] + if backtick_count >= top_count: + fence_stack.pop() + if is_callout: + out_lines.append(":::") + i += 1 + continue + # For code-cell→python fences, keep the closing backticks + out_lines.append(line) + i += 1 + continue + + # --- Pass A continued: Inline substitutions --- + converted = line + + # Citations: {cite:t}`key` → @key + converted = re.sub(r"\{cite:t\}`([^`]+)`", r"@\1", converted) + # Citations: {cite:p}`key` → [@key] + converted = re.sub(r"\{cite:p\}`([^`]+)`", r"[@\1]", converted) + # Citations: {cite:ps}`key` → [@key] + converted = re.sub(r"\{cite:ps\}`([^`]+)`", r"[@\1]", converted) + + # Cross-references: {ref}`text