diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 138b7df..70b3673 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -73,6 +73,8 @@ jobs: with: name: khisto-python-docs path: ./docs/_build/html + - name: Checkout doc sources (to get the current environment) + uses: actions/checkout@v6 - name: Deploy web site run: | # Install tool for pushing to GH pages diff --git a/docs/api_comparison.rst b/docs/api_comparison.rst deleted file mode 100644 index 4dc73ab..0000000 --- a/docs/api_comparison.rst +++ /dev/null @@ -1,351 +0,0 @@ -API Comparison -============== - -This document compares the current Khisto APIs with NumPy and Matplotlib. - -NumPy Comparison ----------------- - -``numpy.histogram`` vs ``khisto.histogram`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Khisto's ``histogram`` function is designed as a drop-in replacement for ``numpy.histogram``. - -Signature Comparison -"""""""""""""""""""" - -.. code-block:: python - - # NumPy - numpy.histogram( - a, - bins=10, - range=None, - density=None, - weights=None, - ) - - # Khisto - khisto.histogram( - a, - range=None, - max_bins=None, - density=False, - ) - -Key Differences -""""""""""""""" - -.. list-table:: - :header-rows: 1 - :widths: 20 40 40 - - * - Feature - - NumPy - - Khisto - * - **Binning method** - - Fixed-width bins - - Optimal variable-width bins - * - **Bins parameter** - - ``bins`` (int or edges) - - ``max_bins`` (optional limit) - * - **Default bins** - - 10 fixed bins - - Auto-determined optimal - * - **Weights support** - - Yes - - No - * - **Returns** - - ``(hist, bin_edges)`` - - ``(hist, bin_edges)`` - -Usage Comparison -"""""""""""""""" - -.. code-block:: python - - import numpy as np - from khisto import histogram - - data = np.random.normal(0, 1, 1000) - - # NumPy - fixed 10 bins - np_hist, np_edges = np.histogram(data) - - # Khisto - optimal bins (automatic) - khisto_hist, khisto_edges = histogram(data) - - # NumPy - specified bin count - np_hist, np_edges = np.histogram(data, bins=20) - - # Khisto - maximum bin count - khisto_hist, khisto_edges = histogram(data, max_bins=20) - - # Both support density normalization - np_density, _ = np.histogram(data, density=True) - khisto_density, _ = histogram(data, density=True) - - # Both support range specification - np_hist, _ = np.histogram(data, range=(-2, 2)) - khisto_hist, _ = histogram(data, range=(-2, 2)) - -When to Use Each -"""""""""""""""" - -.. list-table:: - :header-rows: 1 - :widths: 50 50 - - * - Use NumPy - - Use Khisto - * - Need fixed-width bins - - Want optimal data representation - * - Need weighted histograms - - Want automatic bin selection - * - Need specific bin edges - - Want adaptive bin widths - * - Performance-critical loops - - Data visualization - ----- - -Matplotlib Comparison ---------------------- - -``matplotlib.pyplot.hist`` vs ``khisto.matplotlib.hist`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Khisto's ``hist`` function works similarly to matplotlib's ``hist``, but with optimal binning. - -Signature Comparison -"""""""""""""""""""" - -.. code-block:: python - - # Matplotlib - matplotlib.pyplot.hist( - x, - bins=10, - range=None, - density=False, - weights=None, - cumulative=False, - bottom=None, - histtype='bar', - align='mid', - orientation='vertical', - rwidth=None, - log=False, - color=None, - label=None, - stacked=False, - **kwargs, - ) - - # Khisto - khisto.matplotlib.hist( - x, - range=None, - max_bins=None, - density=False, - cumulative=False, - histtype='bar', - orientation='vertical', - log=False, - color=None, - label=None, - ax=None, - edgecolor=None, - linewidth=None, - alpha=None, - **kwargs, - ) - -Key Differences -""""""""""""""" - -.. list-table:: - :header-rows: 1 - :widths: 20 40 40 - - * - Feature - - Matplotlib - - Khisto - * - **Binning** - - Fixed-width - - Optimal variable-width - * - **Bins param** - - ``bins`` - - ``max_bins`` - * - **Axes param** - - Implicit (current) - - Optional ``ax`` parameter - * - **Cumulative** - - Supported - - Supported - * - **Reverse cumulative** - - Supported with negative ``cumulative`` - - Supported with negative ``cumulative`` - * - **Stacked** - - Supported - - Not supported - * - **Weights** - - Supported - - Not supported (not relevant to the Khiops approach) - * - **Unsupported histogram args** - - None - - ``bins``, ``stacked``, and ``weights`` raise a ``TypeError`` - * - **Multiple datasets** - - Supported - - Not supported. Only 1-D arrays are accepted. - -Usage Comparison -"""""""""""""""" - -.. code-block:: python - - import numpy as np - import matplotlib.pyplot as plt - from khisto.matplotlib import hist - - data = np.random.normal(0, 1, 1000) - - # Matplotlib - fixed bins - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) - - ax1.hist(data, bins=30) - ax1.set_title('Matplotlib (30 bins)') - - hist(data, ax=ax2) - ax2.set_title('Khisto (optimal bins)') - - plt.tight_layout() - plt.show() - -Common Parameters (Same Behavior) -"""""""""""""""""""""""""""""""""" - -.. code-block:: python - - # Both support these parameters identically: - - # density normalization - plt.hist(data, density=True) - hist(data, density=True) - - # cumulative view - plt.hist(data, density=True, cumulative=True) - hist(data, density=True, cumulative=True) - - # reverse cumulative view - plt.hist(data, cumulative=-1) - hist(data, cumulative=-1) - - # histogram type - plt.hist(data, histtype='step') - hist(data, histtype='step') - - # orientation - plt.hist(data, orientation='horizontal') - hist(data, orientation='horizontal') - - # log scale - plt.hist(data, log=True) - hist(data, log=True) - - # color and label - plt.hist(data, color='blue', label='Data') - hist(data, color='blue', label='Data') - ----- - -Migration Guide ---------------- - -From NumPy -^^^^^^^^^^ - -.. code-block:: python - - # Before (NumPy) - import numpy as np - hist, edges = np.histogram(data, bins=30) - - # After (Khisto) - from khisto import histogram - hist, edges = histogram(data, max_bins=30) # max_bins is optional - -From Matplotlib -^^^^^^^^^^^^^^^ - -.. code-block:: python - - # Before (Matplotlib) - import matplotlib.pyplot as plt - n, bins, patches = plt.hist(data, bins=30) - - # After (Khisto) - from khisto.matplotlib import hist - n, bins, patches = hist(data, max_bins=30) # max_bins is optional - ----- - -Feature Matrix --------------- - -.. list-table:: - :header-rows: 1 - :widths: 25 15 15 15 - - * - Feature - - NumPy - - Matplotlib - - Khisto - * - Fixed-width bins - - Yes - - Yes - - No - * - Optimal bins - - No - - No - - Yes - * - Variable-width bins - - Manual - - Manual - - Auto - * - Density - - Yes - - Yes - - Yes - * - Range - - Yes - - Yes - - Yes - * - Weights - - Yes - - Yes - - No - * - Cumulative - - No - - Yes - - Yes - * - Reverse cumulative - - No - - Yes - - Yes - * - Plotting - - No - - Yes - - Yes - * - Step histogram - - No - - Yes - - Yes - * - Horizontal - - No - - Yes - - Yes - * - Log scale - - No - - Yes - - Yes diff --git a/docs/index.rst b/docs/index.rst index 224ac1d..b1bb828 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -98,5 +98,4 @@ Get started :caption: Guides :hidden: - API Comparison Demo