From ffca731dc239ca90745e2b62ecc4c6209703b534 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 20 Jul 2025 19:57:49 -0700
Subject: [PATCH 01/24] Makefile.am: adding radmeth_model.cpp to sources

---
 Makefile.am | 1 +
 1 file changed, 1 insertion(+)
diff --git a/Makefile.am b/Makefile.am
index 7ebf1d31..068ffad6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -236,6 +236,7 @@ dnmtools_SOURCES += src/amrfinder/amrtester.cpp
 dnmtools_SOURCES += src/radmeth/dmr.cpp
 dnmtools_SOURCES += src/radmeth/methdiff.cpp
 dnmtools_SOURCES += src/radmeth/radmeth_optimize.cpp
+dnmtools_SOURCES += src/radmeth/radmeth_model.cpp
 dnmtools_SOURCES += src/radmeth/radmeth.cpp
 dnmtools_SOURCES += src/radmeth/radmeth-adjust.cpp
 dnmtools_SOURCES += src/radmeth/radmeth-merge.cpp

From c5d8645bc93edc321201057f0880b70b48522fbd Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 20 Jul 2025 19:58:09 -0700
Subject: [PATCH 02/24] src/analysis/nanopore.cpp: removing a debug variable

---
 src/analysis/nanopore.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/analysis/nanopore.cpp b/src/analysis/nanopore.cpp
index d8886c5b..6eb3ba53 100644
--- a/src/analysis/nanopore.cpp
+++ b/src/analysis/nanopore.cpp
@@ -760,7 +760,6 @@ consistent_existing_targets(
   return true;
 }
 
-int counter{};
 struct read_processor {
   static constexpr auto default_expected_basecall_model =
     "dna_r10.4.1_e8.2_400bps_hac@v4.3.0";

From 9fd69ad60f26fc1566f727b459090b54b8fc671e Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 20 Jul 2025 19:59:17 -0700
Subject: [PATCH 03/24] src/radmeth/radmeth_optimize.cpp: major rewrite for
 speed within the existing approach via general optmizers

---
 src/radmeth/radmeth_optimize.cpp | 265 +++++++++++++++++--------------
 1 file changed, 146 insertions(+), 119 deletions(-)

diff --git a/src/radmeth/radmeth_optimize.cpp b/src/radmeth/radmeth_optimize.cpp
index b1942328..e957cc6b 100644
--- a/src/radmeth/radmeth_optimize.cpp
+++ b/src/radmeth/radmeth_optimize.cpp
@@ -21,179 +21,206 @@
 #include <gsl/gsl_multimin.h>
 #include <gsl/gsl_vector.h>
 
+#include <iomanip>
+#include <iostream>
+#include <numeric>
 #include <stdexcept>
 #include <vector>
 
-using std::vector;
-using std::runtime_error;
-
-static double
-pi(Regression *reg, size_t sample, const gsl_vector *params) {
+[[nodiscard]] static double
+pi(const std::vector<double> &v, const gsl_vector *params) {
   // ADS: this function doesn't have a very helpful name
-  double dot_prod = 0;
-  for (size_t fact = 0; fact < reg->design.n_factors(); ++fact)
-    dot_prod += reg->design.matrix[sample][fact]*gsl_vector_get(params, fact);
-
-  double p = exp(dot_prod)/(1 + exp(dot_prod));
-
-  return p;
+  const auto a = v.data();
+  const double dot = std::inner_product(a, a + std::size(v), params->data, 0.0);
+  // const double p = -1.0 / std::expm1(-dot_prod); ?
+  return std::exp(dot) / (1.0 + std::exp(dot));
 }
 
-
-static double
+[[nodiscard]] static double
 neg_loglik(const gsl_vector *params, void *object) {
   Regression *reg = (Regression *)(object);
-  const size_t n_params = reg->design.factor_names.size() + 1;
-
-  double log_lik = 0;
 
   // ADS: the dispersion parameter phi is the last element of
   // parameter vector
-  const double disp_param = gsl_vector_get(params, n_params - 1);
-  const double phi = exp(disp_param)/(1.0 + exp(disp_param));
-
-  for(size_t s = 0; s < reg->design.n_samples(); ++s) {
-    const double n_s = reg->props.total[s];
-    const double y_s = reg->props.meth[s];
-    const double p_s = pi(reg, s, params);
-
-    for (int k = 0; k < y_s; ++k)
-      log_lik += log((1 - phi)*p_s + phi*k);
+  const double disp_param = gsl_vector_get(params, reg->design.n_factors());
+  const double phi = std::exp(disp_param) / (1.0 + std::exp(disp_param));
+  const double one_minus_phi = 1.0 - phi;
 
-    for (int k = 0; k < n_s - y_s; ++k)
-      log_lik += log((1 - phi)*(1 - p_s) + phi*k);
+  const auto &mc = reg->props.mc;
 
-    for (int k = 0; k < n_s; ++k)
-      log_lik -= log(1.0 + phi*(k - 1));
+  double log_lik = 0;
+  const std::size_t n_samples = reg->design.n_samples();
+  for (std::size_t col_idx = 0; col_idx < n_samples; ++col_idx) {
+    const double n = mc[col_idx].n_reads;
+    const double y = mc[col_idx].n_meth;
+    const double p = pi(reg->design.matrix[col_idx], params);
+    const double one_minus_p = 1.0 - p;
+
+    const double term1 = one_minus_phi * p;
+    for (int k = 0; k < y; ++k)
+      log_lik += std::log(term1 + phi * k);
+
+    const double term2 = one_minus_phi * one_minus_p;
+    for (int k = 0; k < n - y; ++k)
+      log_lik += std::log(term2 + phi * k);
+
+    for (int k = 0; k < n; ++k)
+      log_lik -= std::log(1.0 + phi * (k - 1.0));
   }
-
   return -log_lik;
 }
 
-
 static void
-neg_gradient(const gsl_vector *params, void *object,
-             gsl_vector *output) {
-
+neg_gradient(const gsl_vector *params, void *object, gsl_vector *output) {
   Regression *reg = (Regression *)(object);
-  const size_t n_params = reg->design.n_factors() + 1;
+  const std::size_t n_samples = reg->design.n_samples();
+  const std::size_t n_factors = reg->design.n_factors();
 
-  const double disp_param = gsl_vector_get(params, n_params - 1);
+  std::vector<double> p_v(n_samples, 0.0);
+  for (auto col_idx = 0u; col_idx < n_samples; ++col_idx)
+    p_v[col_idx] = pi(reg->design.matrix[col_idx], params);
 
-  const double phi = exp(disp_param)/(1 + exp(disp_param));
+  const double disp_param = gsl_vector_get(params, n_factors);
 
-  for(size_t f = 0; f < n_params; ++f) {
+  const auto &tmatrix = reg->design.tmatrix;
+  const double phi = std::exp(disp_param) / (1.0 + std::exp(disp_param));
+  // const double phi = -1.0 / std::expm1(-disp_param);
+  const double one_minus_phi = 1.0 - phi;
 
-    double deriv = 0;
+  const auto &mc = reg->props.mc;
 
-    for(size_t s = 0; s < reg->design.n_samples(); ++s) {
-      int n_s = reg->props.total[s];
-      int y_s = reg->props.meth[s];
-      double p_s = pi(reg, s, params);
+  for (std::size_t f = 0; f < n_factors; ++f) {
+    double deriv = 0;
+    const auto &vec = tmatrix[f];
+    for (std::size_t col_idx = 0; col_idx < n_samples; ++col_idx) {
+      const double n = mc[col_idx].n_reads;
+      const double y = mc[col_idx].n_meth;
+      const double p = p_v[col_idx];
+      const double one_minus_p = 1.0 - p;
+
+      const double denom_term1 = one_minus_phi * p;
+      const double factor = denom_term1 * one_minus_p * vec[col_idx];
+      if (factor == 0)
+        continue;
 
       double term = 0;
 
-      //a parameter linked to p
-      if(f < reg->design.factor_names.size()) {
-        double factor = (1 - phi)*p_s*(1 - p_s)*reg->design.matrix[s][f];
-        if (factor == 0) continue;
-
-        for(int k = 0; k < y_s; ++k)
-          term += 1/((1 - phi)*p_s + phi*k);
+      const auto accumulate_term = [&](int k, const int lim,
+                                       const double denom_base) {
+        for (; k < lim; ++k)
+          term += 1.0 / (denom_base + phi * k);
+      };
 
-        for(int k = 0; k < n_s - y_s; ++k)
-          term -= 1/((1 - phi)*(1 - p_s) + phi*k);
+      const auto accumulate_subtract_term = [&](int k, const int lim,
+                                                const double denom_base) {
+        for (; k < lim; ++k)
+          term -= 1.0 / (denom_base + phi * k);
+      };
 
-        deriv += term*factor;
-      } else { // the parameter linked to phi
-        for(int k = 0; k < y_s; ++k)
-          term += (k - p_s)/((1 - phi)*p_s + phi*k);
+      accumulate_term(0, y, denom_term1);
+      accumulate_subtract_term(0, n - y, one_minus_phi * one_minus_p);
 
-        for(int k = 0; k < n_s - y_s; ++k)
-          term += (k - (1 - p_s))/((1 - phi)*(1 - p_s) + phi*k);
-
-        for(int k = 0; k < n_s; ++k) {
-          term -= (k - 1)/(1 + phi*(k - 1));
-        }
-
-        deriv += term * phi * (1 - phi);
-      }
+      deriv += term * factor;
     }
-
     gsl_vector_set(output, f, deriv);
   }
 
+  double deriv = 0;
+  for (std::size_t col_idx = 0; col_idx < n_samples; ++col_idx) {
+    const double n = mc[col_idx].n_reads;
+    const double y = mc[col_idx].n_meth;
+    const double p = p_v[col_idx];
+    const double one_minus_p = 1.0 - p;
+
+    double term = 0;
+    const auto accumulate_term = [&](int k, const int lim,
+                                     const double num_shift,
+                                     const double denom_base) {
+      for (; k < lim; ++k)
+        term += (k - num_shift) / (denom_base + phi * k);
+    };
+    const auto accumulate_subtract_term = [&](int k, const int lim) {
+      for (; k < lim; ++k)
+        term -= (k - 1.0) / (1.0 + phi * (k - 1.0));
+    };
+
+    accumulate_term(0, y, p, one_minus_phi * p);
+    accumulate_term(0, n - y, one_minus_p, one_minus_phi * one_minus_p);
+    accumulate_subtract_term(0, n);
+
+    deriv += term * phi * one_minus_phi;
+  }
+  gsl_vector_set(output, n_factors, deriv);
   gsl_vector_scale(output, -1.0);
 }
 
 static void
-neg_loglik_and_grad(const gsl_vector *params,
-                    void *object,
-                    double *loglik_val,
+neg_loglik_and_grad(const gsl_vector *params, void *object, double *loglik_val,
                     gsl_vector *d_loglik_val) {
-
   *loglik_val = neg_loglik(params, object);
   neg_gradient(params, object, d_loglik_val);
 }
 
-
 bool
-fit_regression_model(Regression &r, vector<double> &initial_params) {
+fit_regression_model(Regression &r, std::vector<double> &params_init) {
+  const auto tolerance = Regression::tolerance;
+  const auto stepsize = Regression::stepsize;
+  const auto max_iter = Regression::max_iter;
 
   // one more than the number of factors
-  const size_t n_params = r.n_factors() + 1;
-  if (initial_params.empty()) {
-    initial_params.resize(n_params, 0.0);
-    initial_params.back() = -2.5;
+  const std::size_t n_params = r.n_factors() + 1;
+  if (params_init.empty()) {
+    params_init.resize(n_params, 0.0);
+    params_init.back() = -2.5;
   }
 
-  if (initial_params.size() != n_params)
-    throw runtime_error("Wrong number of initial parameters.");
-
-  int status = 0;
+  if (params_init.size() != n_params)
+    throw std::runtime_error("Wrong number of initial parameters.");
 
-  size_t iter = 0;
+  // clang-format off
+  gsl_multimin_function_fdf loglik_bundle = {
+    &neg_loglik,              // objective function
+    &neg_gradient,            // gradient
+    &neg_loglik_and_grad,     // combined obj and grad
+    n_params,                 // number of model params
+    static_cast<void *>(&r)   // parameters for objective and gradient functions
+  };
+  // clang-format on
 
-  {
-    gsl_multimin_function_fdf loglik_bundle;
-    loglik_bundle.f = &neg_loglik;
-    loglik_bundle.df = &neg_gradient;
-    loglik_bundle.fdf = &neg_loglik_and_grad;
-    loglik_bundle.n = n_params;
-    loglik_bundle.params = (void *)&r;
+  // Alternatives:
+  // - gsl_multimin_fdfminimizer_conjugate_pr
+  // - gsl_multimin_fdfminimizer_conjugate_fr
+  // - gsl_multimin_fdfminimizer_vector_bfgs2
+  // - gsl_multimin_fdfminimizer_steepest_descent
+  const gsl_multimin_fdfminimizer_type *minimizer =
+    gsl_multimin_fdfminimizer_conjugate_fr;
 
-    gsl_vector *params = gsl_vector_alloc(n_params);
+  gsl_multimin_fdfminimizer *s =
+    gsl_multimin_fdfminimizer_alloc(minimizer, n_params);
 
-    for (size_t param = 0; param < initial_params.size(); ++param)
-      gsl_vector_set(params, param, initial_params[param]);
+  gsl_vector *params = gsl_vector_alloc(n_params);
+  for (std::size_t i = 0; i < n_params; ++i)
+    gsl_vector_set(params, i, params_init[i]);
 
-    //can also try gsl_multimin_fdfminimizer_conjugate_pr;
-    const gsl_multimin_fdfminimizer_type *T = gsl_multimin_fdfminimizer_conjugate_fr;
+  gsl_multimin_fdfminimizer_set(s, &loglik_bundle, params, stepsize, tolerance);
 
-    gsl_multimin_fdfminimizer *s = gsl_multimin_fdfminimizer_alloc (T, n_params);
-
-    // ADS: 0.001 is the step size and 1e-4 is the tolerance
-    // get the minimizer
-    gsl_multimin_fdfminimizer_set(s, &loglik_bundle, params, 0.001, 1e-4);
-
-    do {
-      iter++;
-      // do one iteration and get the status
-      status = gsl_multimin_fdfminimizer_iterate(s);
-      if (status)
-        break;
-      // check the status based on gradient
-      status = gsl_multimin_test_gradient (s->gradient, 1e-4);
-    }
-    while (status == GSL_CONTINUE && iter < 700);
-    // It it reasonable to reduce the number of iterations to 500?
-    // ADS: 700 vs. 500? what's the difference?
-
-    r.max_loglik = (-1)*neg_loglik(s->x, &r);
-
-    gsl_multimin_fdfminimizer_free(s);
-    gsl_vector_free(params);
-  }
+  int status = 0;
+  std::size_t iter = 0;
+
+  do {
+    status = gsl_multimin_fdfminimizer_iterate(s);  // one iter and get status
+    if (status)
+      break;
+    // check status from gradient
+    status = gsl_multimin_test_gradient(s->gradient, tolerance);
+  } while (status == GSL_CONTINUE && ++iter < max_iter);
+  // It it reasonable to reduce the number of iterations to 500?
+  // ADS: 700 vs. 500? what's the difference?
+
+  r.max_loglik = -1.0 * neg_loglik(s->x, &r);
+
+  gsl_multimin_fdfminimizer_free(s);
+  gsl_vector_free(params);
 
   return status == GSL_SUCCESS;
 }

From 85d9818dec6467e71a759e739fa2e2f446fa4d4a Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 20 Jul 2025 19:59:47 -0700
Subject: [PATCH 04/24] src/radmeth/radmeth_model.cpp: adding the
 radmeth_model.cpp source file

---
 src/radmeth/radmeth_model.cpp | 103 ++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 src/radmeth/radmeth_model.cpp

diff --git a/src/radmeth/radmeth_model.cpp b/src/radmeth/radmeth_model.cpp
new file mode 100644
index 00000000..53778283
--- /dev/null
+++ b/src/radmeth/radmeth_model.cpp
@@ -0,0 +1,103 @@
+/* Copyright (C) 2025 Andrew D Smith
+ *
+ * Authors: Andrew D. Smith
+ *
+ * This program is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include "radmeth_model.hpp"
+
+#include <algorithm>
+#include <charconv>
+#include <cstdint>
+#include <sstream>
+#include <string>
+
+double Regression::tolerance = 1e-4;
+double Regression::stepsize = 0.001;
+std::uint32_t Regression::max_iter = 700;
+
+void
+SiteProportions::parse(const std::string &line) {
+  const auto first_ws = line.find_first_of(" \t");
+
+  // Parse the row name (must be like: "chr:position:strand:context")
+  bool failed = false;
+
+  const auto label_end = line.data() + first_ws;
+  auto field_s = line.data();
+  auto field_e = std::find(field_s + 1, label_end, ':');
+  if (field_e == label_end)
+    failed = true;
+
+  {
+    const std::uint32_t d = std::distance(field_s, field_e);
+    chrom = std::string{field_s, d};
+  }
+
+  field_s = field_e + 1;
+  field_e = std::find(field_s + 1, label_end, ':');
+  failed = failed || (field_e == label_end);
+
+  {
+    const auto [ptr, ec] = std::from_chars(field_s, field_e, position);
+    failed = failed || (ptr == field_s);
+  }
+
+  field_s = field_e + 1;
+  field_e = std::find(field_s + 1, label_end, ':');
+  failed = failed || (field_e != field_s + 1 || field_e == label_end);
+
+  strand = *field_s;
+  failed = failed || (strand != '-' && strand != '+');
+
+  field_s = field_e + 1;
+  field_e = std::find_if(field_s + 1, label_end,
+                         [](const auto x) { return x == ' ' || x == '\t'; });
+  failed = failed || (field_e != label_end);
+
+  {
+    const std::uint32_t d = std::distance(field_s, field_e);
+    context = std::string{field_s, d};
+  }
+
+  if (failed)
+    throw std::runtime_error("failed to parse label from:\n" + line);
+
+  // Parse the counts of total reads and methylated reads
+  const auto is_sep = [](const auto x) { return std::isspace(x); };
+  const auto not_sep = [](const auto x) { return std::isdigit(x); };
+
+  const auto line_end = line.data() + std::size(line);
+  mcounts mc1{};
+  mc.clear();
+  while (field_e != line_end) {
+    // get the total count
+    field_s = std::find_if(field_e + 1, line_end, not_sep);
+    field_e = std::find_if(field_s + 1, line_end, is_sep);
+    {
+      const auto [ptr, ec] = std::from_chars(field_s, field_e, mc1.n_reads);
+      failed = failed || (ptr != field_e);
+    }
+
+    field_s = std::find_if(field_e + 1, line_end, not_sep);
+    field_e = std::find_if(field_s + 1, line_end, is_sep);
+    {
+      const auto [ptr, ec] = std::from_chars(field_s, field_e, mc1.n_meth);
+      failed = failed || (ptr != field_e);
+    }
+
+    mc.push_back(mc1);
+  }
+
+  if (failed)
+    throw std::runtime_error("failed to parse counts from:\n" + line);
+}

From ba4dc52359aa53f8f6a1c257ae2637d542486759 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 20 Jul 2025 20:00:19 -0700
Subject: [PATCH 05/24] src/radmeth/radmeth_model.hpp: cleanup and adding some
 functions to help with organization and efficiency

---
 src/radmeth/radmeth_model.hpp | 58 +++++++++++++++++++++++++++++------
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/src/radmeth/radmeth_model.hpp b/src/radmeth/radmeth_model.hpp
index 0402a269..5ff20fb0 100644
--- a/src/radmeth/radmeth_model.hpp
+++ b/src/radmeth/radmeth_model.hpp
@@ -19,32 +19,70 @@
 #ifndef RADMETH_MODEL_HPP
 #define RADMETH_MODEL_HPP
 
+#include <cstdint>
+#include <istream>
 #include <string>
 #include <vector>
 
 struct Design {
   std::vector<std::string> factor_names;
   std::vector<std::string> sample_names;
-  std::vector<std::vector<double> > matrix;
-  size_t n_factors() const {return factor_names.size();}
-  size_t n_samples() const {return sample_names.size();}
+  std::vector<std::vector<double>> matrix;
+  std::vector<std::vector<double>> tmatrix;
+  std::size_t
+  n_factors() const {
+    return factor_names.size();
+  }
+  std::size_t
+  n_samples() const {
+    return sample_names.size();
+  }
 };
 
+struct mcounts {
+  std::uint32_t n_reads{};
+  std::uint32_t n_meth{};
+};
+
+[[nodiscard]] inline std::istream &
+operator>>(std::istream &in, mcounts &rm) {
+  return in >> rm.n_reads >> rm.n_meth;
+}
+
 struct SiteProportions {
   std::string chrom;
-  size_t position;
-  std::string strand;
+  std::size_t position{};
+  char strand{};
   std::string context;
-  std::vector<size_t> total;
-  std::vector<size_t> meth;
+  std::vector<mcounts> mc;
+
+  void
+  parse(const std::string &line);
 };
 
 struct Regression {
+  static double tolerance;        // 1e-4;
+  static double stepsize;         // 0.001;
+  static std::uint32_t max_iter;  // 700;
+
   Design design;
   SiteProportions props;
-  double max_loglik;
-  size_t n_factors() const {return design.n_factors();}
-  size_t n_samples() const {return design.n_samples();}
+  double max_loglik{};
+
+  [[nodiscard]] std::size_t
+  n_factors() const {
+    return design.n_factors();
+  }
+
+  [[nodiscard]] std::size_t
+  props_size() const {
+    return std::size(props.mc);
+  }
+
+  [[nodiscard]] std::size_t
+  n_samples() const {
+    return design.n_samples();
+  }
 };
 
 #endif

From 99df6b727807c5d0ed32d46e68f76fb59ee1ecd3 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 20 Jul 2025 20:01:00 -0700
Subject: [PATCH 06/24] src/radmeth/radmeth.cpp: using multi-threaded

---
 src/radmeth/radmeth.cpp | 611 ++++++++++++++++++++++------------------
 1 file changed, 330 insertions(+), 281 deletions(-)

diff --git a/src/radmeth/radmeth.cpp b/src/radmeth/radmeth.cpp
index 815e599a..cb0a43e9 100644
--- a/src/radmeth/radmeth.cpp
+++ b/src/radmeth/radmeth.cpp
@@ -16,15 +16,17 @@
  * General Public License for more details.
  */
 
-#include <gsl/gsl_cdf.h> // GSL header for chisqrd distribution
+#include <gsl/gsl_cdf.h>  // GSL header for chisqrd distribution
 
 #include <algorithm>
 #include <cmath>
+#include <filesystem>
 #include <fstream>
 #include <iomanip>
 #include <iostream>
 #include <sstream>
 #include <string>
+#include <thread>
 #include <vector>
 
 // smithlab headers
@@ -37,175 +39,107 @@
 #include "radmeth_optimize.hpp"
 
 using std::begin;
-using std::cerr;
 using std::cout;
 using std::distance;
 using std::end;
-using std::endl;
-using std::istringstream;
-using std::runtime_error;
-using std::sort;
-using std::string;
-using std::vector;
+
+struct file_progress {
+  double one_hundred_over_filesize{};
+  std::size_t prev_offset{};
+  explicit file_progress(const std::string &filename) :
+    one_hundred_over_filesize{100.0 / std::filesystem::file_size(filename)} {}
+  void
+  operator()(std::ifstream &in) {
+    const std::size_t curr_offset = in.tellg() * one_hundred_over_filesize;
+    if (curr_offset <= prev_offset)
+      return;
+    std::cerr << "[progress: " << std::setw(3) << curr_offset
+              << (curr_offset == 100 ? "%]\n" : "%]\r");
+    prev_offset = (curr_offset == 100) ? std::numeric_limits<std::size_t>::max()
+                                       : curr_offset;
+  }
+};
 
 static std::istream &
 operator>>(std::istream &is, Design &design) {
-  string header_encoding;
-  getline(is, header_encoding);
+  std::string header_encoding;
+  std::getline(is, header_encoding);
 
-  istringstream header_is(header_encoding);
-  string header_name;
-  while (header_is >> header_name) design.factor_names.push_back(header_name);
+  std::istringstream header_is(header_encoding);
+  std::string header_name;
+  while (header_is >> header_name)
+    design.factor_names.push_back(header_name);
 
-  string row;
-  while (getline(is, row)) {
-    if (row.empty()) continue;
+  std::string row;
+  while (std::getline(is, row)) {
+    if (row.empty())
+      continue;
 
-    istringstream row_is(row);
-    string token;
+    std::istringstream row_is(row);
+    std::string token;
     row_is >> token;
     design.sample_names.push_back(token);
 
-    vector<double> matrix_row;
+    std::vector<double> matrix_row;
     while (row_is >> token) {
-      if (token.length() == 1 && (token == "0" || token == "1"))
-        matrix_row.push_back(token == "1");
-      else
-        throw runtime_error("only binary factor levels are allowed:\n" + row);
+      if (std::size(token) != 1 || (token != "0" && token != "1"))
+        throw std::runtime_error("Must use binary factor levels:\n" + row);
+      matrix_row.push_back(token == "1");
     }
 
-    if (matrix_row.size() != design.n_factors())
-      throw runtime_error(
-        "each row must have as many columns as factors:\n" +
-        row);
+    if (std::size(matrix_row) != design.n_factors())
+      throw std::runtime_error(
+        "each row must have as many columns as factors:\n" + row);
 
-    design.matrix.push_back(vector<double>());
+    design.matrix.push_back(std::vector<double>());
     swap(design.matrix.back(), matrix_row);
   }
+
+  const auto n_row = std::size(design.matrix);
+  const auto n_col = std::size(design.matrix.front());
+  design.tmatrix.resize(n_col, std::vector<double>(n_row, 0.0));
+  for (auto row_idx = 0u; row_idx < n_row; ++row_idx)
+    for (auto col_idx = 0u; col_idx < n_col; ++col_idx)
+      design.tmatrix[col_idx][row_idx] = design.matrix[row_idx][col_idx];
+
   return is;
 }
 
 static std::ostream &
 operator<<(std::ostream &out, const Design &design) {
-  for (size_t factor = 0; factor < design.factor_names.size(); ++factor) {
+  for (std::size_t factor = 0; factor < design.factor_names.size(); ++factor) {
+    if (factor != 0)
+      out << '\t';
     out << design.factor_names[factor];
-    if (factor + 1 != design.factor_names.size()) out << "\t";
   }
-  out << endl;
+  out << '\n';
 
-  for (size_t i = 0; i < design.n_samples(); ++i) {
+  for (std::size_t i = 0; i < design.n_samples(); ++i) {
     out << design.sample_names[i];
-    for (size_t factor = 0; factor < design.factor_names.size(); ++factor)
-      out << "\t" << design.matrix[i][factor];
+    for (std::size_t j = 0; j < design.n_factors(); ++j)
+      out << "\t" << design.matrix[i][j];
     out << "\n";
   }
   return out;
 }
 
 static void
-remove_factor(Design &design, const size_t factor) {
-  design.factor_names.erase(begin(design.factor_names) + factor);
-  for (size_t i = 0; i < design.n_samples(); ++i)
-    design.matrix[i].erase(begin(design.matrix[i]) + factor);
-}
-
-// Parses a natural number from its string representation. Throws exception if
-// the string does not encode one.
-static size_t
-parse_natural_number(string encoding) {
-  istringstream iss(encoding);
-  size_t number;
-  iss >> number;
-  if (!iss)
-    throw runtime_error("The token \"" + encoding +
-                        "\" does not encode a natural number");
-  return number;
-}
-
-static std::istream &
-operator>>(std::istream &table_encoding, SiteProportions &props) {
-  props.chrom.clear();
-  props.position = 0;
-  props.strand.clear();
-  props.context.clear();
-  props.meth.clear();
-  props.total.clear();
-
-  string row_encoding;
-  getline(table_encoding, row_encoding);
-
-  // Skip lines contining only the newline character (e.g. the last line of the
-  // proportion table).
-  if (row_encoding.empty()) return table_encoding;
-
-  // Get the row name (which must be specified like this: "chr:position") and
-  // parse it.
-  istringstream row_stream(row_encoding);
-  string row_name_encoding;
-  row_stream >> row_name_encoding;
-
-  // Every row must start an identifier consisiting of genomic loci of the
-  // corresponding site. Here we check this identifier has the correct number
-  // of colons.
-  const size_t num_colon =
-    count(row_name_encoding.begin(), row_name_encoding.end(), ':');
-
-  if (num_colon != 3)
-    throw runtime_error(
-      "Each row in the count table must start with "
-      "a line chromosome:position:strand:context. Got \"" +
-      row_name_encoding + "\" instead.");
-
-  // First parse the row identifier.
-  istringstream name_stream(row_name_encoding);
-  getline(name_stream, props.chrom, ':');
-
-  if (props.chrom.empty())
-    throw runtime_error("Error parsing " + row_name_encoding +
-                        ": chromosome name is missing.");
-
-  string position_encoding;
-
-  getline(name_stream, position_encoding, ':');
-  props.position = parse_natural_number(position_encoding);
-  getline(name_stream, props.strand, ':');
-  getline(name_stream, props.context, ':');
-
-  // After parsing the row identifier, parse count proportions.
-  size_t total_count, meth_count;
-
-  while (row_stream >> total_count >> meth_count) {
-    props.total.push_back(total_count);
-    props.meth.push_back(meth_count);
-  }
-
-  if (!row_stream.eof())
-    throw runtime_error("Some row entries are not natural numbers: " +
-                        row_stream.str());
-
-  if (props.total.size() != props.meth.size())
-    throw runtime_error(
-      "This row does not encode proportions"
-      "correctly:\n" +
-      row_encoding);
-  return table_encoding;
-}
-
-static bool
-fit_regression_model(Regression &r) {
-  vector<double> initial_params;
-  return fit_regression_model(r, initial_params);
+remove_factor(Design &design, const std::size_t factor_idx) {
+  design.factor_names.erase(std::begin(design.factor_names) + factor_idx);
+  for (std::size_t i = 0; i < design.n_samples(); ++i)
+    design.matrix[i].erase(std::begin(design.matrix[i]) + factor_idx);
 }
 
 /***************** RADMETH ALGORITHM *****************/
 static bool
-consistent_sample_names(const Regression &reg, const string &header) {
-  istringstream iss(header);
-  auto nm_itr(begin(reg.design.sample_names));
-  const auto nm_end(end(reg.design.sample_names));
-  string token;
+consistent_sample_names(const Regression &reg, const std::string &header) {
+  std::istringstream iss(header);
+  auto nm_itr(std::begin(reg.design.sample_names));
+  const auto nm_end(std::end(reg.design.sample_names));
+  std::string token;
   while (iss >> token && nm_itr != nm_end)
-    if (token != *nm_itr++) return false;
+    if (token != *nm_itr++)
+      return false;
   return true;
 }
 
@@ -213,13 +147,13 @@ consistent_sample_names(const Regression &reg, const string &header) {
 // function outputs the p-value of the log-likelihood ratio. *Note* that it is
 // assumed that the reduced model has one fewer factor than the reduced model.
 static double
-loglikratio_test(double null_loglik, double full_loglik) {
+llr_test(double null_loglik, double full_loglik) {
   // The log-likelihood ratio statistic.
   const double log_lik_stat = -2 * (null_loglik - full_loglik);
 
   // It is assumed that null model has one fewer factor than the full
   // model. Hence the number of degrees of freedom is 1.
-  const size_t degrees_of_freedom = 1;
+  const std::size_t degrees_of_freedom = 1;
 
   // Log-likelihood ratio statistic has a chi-sqare distribution.
   const double chisq_p = gsl_cdf_chisq_P(log_lik_stat, degrees_of_freedom);
@@ -229,46 +163,249 @@ loglikratio_test(double null_loglik, double full_loglik) {
 }
 
 static bool
-has_low_coverage(const Regression &reg, const size_t test_fac) {
+has_low_coverage(const Regression &reg, const std::size_t test_fac) {
   bool cvrd_in_test_fact_smpls = false;
-  for (size_t i = 0; i < reg.n_samples() && !cvrd_in_test_fact_smpls; ++i)
+  for (std::size_t i = 0; i < reg.n_samples() && !cvrd_in_test_fact_smpls; ++i)
     cvrd_in_test_fact_smpls =
-      (reg.design.matrix[i][test_fac] == 1 && reg.props.total[i] != 0);
+      (reg.design.matrix[i][test_fac] == 1 && reg.props.mc[i].n_reads != 0);
 
   bool cvrd_in_other_smpls = false;
-  for (size_t i = 0; i < reg.n_samples() && !cvrd_in_other_smpls; ++i)
+  for (std::size_t i = 0; i < reg.n_samples() && !cvrd_in_other_smpls; ++i)
     cvrd_in_other_smpls =
-      (reg.design.matrix[i][test_fac] != 1 && reg.props.total[i] != 0);
+      (reg.design.matrix[i][test_fac] != 1 && reg.props.mc[i].n_reads != 0);
 
   return !cvrd_in_test_fact_smpls || !cvrd_in_other_smpls;
 }
 
-static bool
+[[nodiscard]] static bool
 has_extreme_counts(const Regression &reg) {
-  bool is_maximally_methylated = true;
-  for (size_t i = 0; i < reg.n_samples() && is_maximally_methylated; ++i)
-    is_maximally_methylated = (reg.props.total[i] == reg.props.meth[i]);
+  const auto &mc = reg.props.mc;
+
+  bool full_meth = true;
+  for (auto i = std::cbegin(mc); i != std::cend(mc) && full_meth; ++i)
+    full_meth = (i->n_reads == i->n_meth);
+
+  bool no_meth = true;
+  for (auto i = std::cbegin(mc); i != std::cend(mc) && no_meth; ++i)
+    no_meth = (i->n_meth == 0.0);
 
-  bool is_unmethylated = true;
-  for (size_t i = 0; i < reg.n_samples() && is_unmethylated; ++i)
-    is_unmethylated = (reg.props.meth[i] == 0.0);
+  return full_meth || no_meth;
+}
 
-  return is_maximally_methylated || is_unmethylated;
+[[nodiscard]] static bool
+has_two_values(const Regression &reg, const std::size_t test_factor) {
+  const auto &v = reg.design.tmatrix[test_factor];
+  for (auto i = std::cbegin(v); i != std::cend(v); ++i)
+    if (*i != v[0])
+      return true;
+  return false;
 }
 
+[[nodiscard]] static std::uint32_t
+get_test_factor_idx(const Regression &model, const std::string &test_factor) {
+  const auto &factors = model.design.factor_names;
+  const auto itr =
+    std::find(std::cbegin(factors), std::cend(factors), test_factor);
 
-static bool
-verify_multiple_levels(const Regression &full_regression,
-                       const size_t test_factor) {
-  const size_t n_samples = full_regression.design.n_samples();
-  const auto first_level = full_regression.design.matrix[0][test_factor];
-  bool test_fact_mult_levels = false;
-  for (size_t i = 1; i < n_samples; ++i)
-    if (full_regression.design.matrix[i][test_factor] != first_level)
-      test_fact_mult_levels = true;
-  return test_fact_mult_levels;
+  if (itr == std::cend(factors))
+    throw std::runtime_error("Factor not part of design: " + test_factor);
+
+  return std::distance(std::cbegin(factors), itr);
 }
 
+static void
+read_design(const bool verbose, const std::string &design_filename,
+            Design &design) {
+  if (verbose)
+    std::cerr << "design table filename: " << design_filename << std::endl;
+  std::ifstream design_file(design_filename);
+  if (!design_file)
+    throw std::runtime_error("could not open file: " + design_filename);
+
+  // initialize full design matrix from file
+  design_file >> design;
+
+  if (verbose)
+    std::cerr << design << std::endl;
+}
+
+enum class row_status : std::uint8_t {
+  ok,
+  na,
+  na_low_cov,
+  na_extreme_cnt,
+};
+
+[[nodiscard]] static std::vector<double>
+drop_idx(const std::vector<double> &v, const std::size_t to_drop) {
+  std::vector<double> u;
+  u.reserve(std::size(v) - 1);
+  for (auto i = 0u; i < std::size(v); ++i)
+    if (i != to_drop)
+      u.push_back(v[i]);
+  return u;
+}
+
+static void
+radmeth(const bool show_progress, const bool more_na_info,
+        const std::uint32_t n_threads, const std::string &table_filename,
+        const std::string &outfile, const Regression &alt_model,
+        const Regression &null_model, const std::uint32_t test_factor_idx) {
+  static constexpr auto prefix_fmt = "%s\t%ld\t%c\t%s\t";
+  static constexpr auto suffix_fmt = "\t%ld\t%ld\t%ld\t%ld\n";
+  static constexpr auto buf_size = 1024;
+  static constexpr auto n_lines_at_once = 1024;
+
+  // ADS: open the data table file
+  std::ifstream table_file(table_filename);
+  if (!table_file)
+    throw std::runtime_error("could not open file: " + table_filename);
+
+  // Make sure that the first line of the proportion table file contains
+  // names of the samples. Throw an exception if the names or their order
+  // in the proportion table does not match those in the full design matrix.
+  std::string sample_names_header;
+  std::getline(table_file, sample_names_header);
+
+  if (!consistent_sample_names(alt_model, sample_names_header))
+    throw std::runtime_error(
+      "header:\n" + sample_names_header + "\n" +
+      "does not match factor names or their order in the\n"
+      "design matrix. Check that the design matrix and\n"
+      "the proportion table are correctly formatted.");
+
+  const std::size_t n_samples = alt_model.design.n_samples();
+
+  std::ofstream out(outfile);
+  if (!out)
+    throw std::runtime_error("failed to open output file: " + outfile);
+
+  file_progress progress{table_filename};
+
+  std::vector<std::vector<char>> bufs(n_threads,
+                                      std::vector<char>(buf_size, 0));
+  std::vector<int> n_bytes(n_threads, 0);
+  std::vector<Regression> alt_models(n_threads, alt_model);
+  std::vector<Regression> null_models(n_threads, null_model);
+
+  // iterate over rows in the file and do llr test on proportions from each
+  std::vector<std::string> lines(n_threads);
+  while (true) {
+
+    std::uint32_t n_lines = 0;
+    while (n_lines < n_lines_at_once &&
+           std::getline(table_file, lines[n_lines]))
+      ++n_lines;
+
+    if (show_progress)
+      progress(table_file);
+
+    std::vector<std::thread> threads;
+    for (auto th_id = 0u; th_id < std::min(n_lines, n_threads); ++th_id)
+      threads.emplace_back([&, th_id] {
+        auto &t_alt_model = alt_models[th_id];
+        auto &t_null_model = null_models[th_id];
+        t_alt_model.props.parse(lines[th_id]);
+        if (t_alt_model.props_size() != n_samples)
+          throw std::runtime_error("found row with wrong number of columns");
+
+        const auto [p_val, status] = [&]() -> std::tuple<double, row_status> {
+          // Skip the test if (1) no coverage in all cases or in all controls,
+          // or (2) the site is completely methylated or completely
+          // unmethylated across all samples.
+          if (has_low_coverage(t_alt_model, test_factor_idx))
+            return std::tuple{1.0, row_status::na_low_cov};
+
+          if (has_extreme_counts(t_alt_model))
+            return std::tuple{1.0, row_status::na_extreme_cnt};
+
+          std::vector<double> alternate_params;
+          fit_regression_model(t_alt_model, alternate_params);
+          t_null_model.props = t_alt_model.props;
+
+          auto null_params = drop_idx(alternate_params, test_factor_idx);
+
+          fit_regression_model(t_null_model, null_params);
+          const double p_value =
+            llr_test(t_null_model.max_loglik, t_alt_model.max_loglik);
+
+          return (p_value != p_value) ? std::tuple{1.0, row_status::na}
+                                      : std::tuple{p_value, row_status::ok};
+        }();
+
+        std::size_t n_reads_factor = 0;
+        std::size_t n_reads_others = 0;
+        std::size_t n_meth_factor = 0;
+        std::size_t n_meth_others = 0;
+
+        const auto &mc = t_alt_model.props.mc;
+        const auto &vec = t_alt_model.design.tmatrix[test_factor_idx];
+        for (std::size_t s = 0; s < n_samples; ++s)
+          if (vec[s] != 0) {
+            n_reads_factor += mc[s].n_reads;
+            n_meth_factor += mc[s].n_meth;
+          }
+          else {
+            n_reads_others += mc[s].n_reads;
+            n_meth_others += mc[s].n_meth;
+          }
+
+        n_bytes[th_id] = [&] {
+          // clang-format off
+          const int n_prefix_bytes = std::sprintf(bufs[th_id].data(), prefix_fmt,
+                                                  t_alt_model.props.chrom.data(),
+                                                  t_alt_model.props.position,
+                                                  t_alt_model.props.strand,
+                                                  t_alt_model.props.context.data());
+          // clang-format on
+          if (n_prefix_bytes < 0)
+            return n_prefix_bytes;
+
+          auto cursor = bufs[th_id].data() + n_prefix_bytes;
+
+          const int n_pval_bytes = [&] {
+            if (status == row_status::ok)
+              return std::sprintf(cursor, "%.6g", p_val);
+            if (!more_na_info || status == row_status::na)
+              return std::sprintf(cursor, "NA");
+            if (status == row_status::na_extreme_cnt)
+              return std::sprintf(cursor, "NA_EXTREME_CNT");
+            // if (status == row_status::na_low_cov)
+            return std::sprintf(cursor, "NA_LOW_COV");
+          }();
+
+          if (n_pval_bytes < 0)
+            return n_pval_bytes;
+
+          cursor += n_pval_bytes;
+
+          // clang-format off
+          const int n_suffix_bytes = std::sprintf(cursor, suffix_fmt,
+                                                  n_reads_factor,
+                                                  n_meth_factor,
+                                                  n_reads_others,
+                                                  n_meth_others);
+          // clang-format on
+          if (n_suffix_bytes < 0)
+            return n_suffix_bytes;
+
+          return n_prefix_bytes + n_pval_bytes + n_suffix_bytes;
+        }();
+      });
+
+    for (auto &thread : threads)
+      thread.join();
+
+    for (auto i = 0u; i < std::min(n_lines, n_threads); ++i) {
+      if (n_bytes[i] < 0)
+        throw std::runtime_error("failed to write to output buffer");
+      out.write(bufs[i].data(), n_bytes[i]);
+    }
+
+    if (n_lines < n_lines_at_once)
+      break;
+  }
+}
 
 /***********************************************************************
  * Run beta-binoimial regression using the specified table with
@@ -277,171 +414,83 @@ verify_multiple_levels(const Regression &full_regression,
 int
 main_radmeth(int argc, char *argv[]) {
   try {
-    static const string description =
+    static const std::string description =
       "calculate differential methylation scores";
 
-    string outfile;
-    string test_factor_name;
-    bool VERBOSE = false;
-    bool more_na_info = false;
+    std::string outfile;
+    std::string test_factor;
+    bool verbose{false};
+    bool show_progress{false};
+    bool more_na_info{false};
+    std::uint32_t n_threads{1};
 
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(strip_path(argv[0]), description,
                            "<design-matrix> <data-matrix>");
     opt_parse.set_show_defaults();
-    opt_parse.add_opt("out", 'o', "output file (default: stdout)", false,
-                      outfile);
-    opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
+    opt_parse.add_opt("out", 'o', "output file", true, outfile);
+    opt_parse.add_opt("threads", 't', "number of threads to use", false,
+                      n_threads);
+    opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
+    opt_parse.add_opt("progress", '\0', "show progress", false, show_progress);
     opt_parse.add_opt(
       "na-info", 'n',
       "if a p-value is not calculated, print NAs in more detail: "
       "low count (NA_LOW_COV) extreme values (NA_EXTREME_CNT) or "
       "numerical errors in likelihood ratios (NA)",
       false, more_na_info);
-    opt_parse.add_opt("factor", 'f', "a factor to test", true,
-                      test_factor_name);
+    opt_parse.add_opt("factor", 'f', "a factor to test", true, test_factor);
 
-    vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << endl
-           << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.help_message() << std::endl
+                << opt_parse.about_message() << std::endl;
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.about_message() << std::endl;
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << endl;
+      std::cerr << opt_parse.option_missing_message() << std::endl;
       return EXIT_SUCCESS;
     }
     if (leftover_args.size() != 2) {
-      cerr << opt_parse.help_message() << endl;
+      std::cerr << opt_parse.help_message() << std::endl;
       return EXIT_SUCCESS;
     }
-    const string design_filename(leftover_args.front());
-    const string table_filename(leftover_args.back());
+    const std::string design_filename(leftover_args.front());
+    const std::string table_filename(leftover_args.back());
     /****************** END COMMAND LINE OPTIONS *****************/
 
-    if (VERBOSE) cerr << "design table filename: " << design_filename << endl;
-    std::ifstream design_file(design_filename);
-    if (!design_file)
-      throw runtime_error("could not open file: " + design_filename);
-
     // initialize full design matrix from file
-    Regression full_regression;
-    design_file >> full_regression.design;
-
-    if (VERBOSE) cerr << full_regression.design << endl;
+    Regression orig_alt_model;
+    read_design(verbose, design_filename, orig_alt_model.design);
 
     // Check that provided test factor name exists and find its index.
-    // Identify test factors with their indexes to simplify naming
-    auto test_fact_it =
-      find(begin(full_regression.design.factor_names),
-           end(full_regression.design.factor_names), test_factor_name);
-
-    if (test_fact_it == end(full_regression.design.factor_names))
-      throw runtime_error(test_factor_name +
-                          " factor not part of design specification");
-
-    const size_t test_factor =
-      distance(begin(full_regression.design.factor_names), test_fact_it);
+    const auto test_factor_idx =
+      get_test_factor_idx(orig_alt_model, test_factor);
 
     // verify that the design includes more than one level for the
     // test factor
-    if (!verify_multiple_levels(full_regression, test_factor)) {
-      const auto first_level = full_regression.design.matrix[0][test_factor];
-      throw runtime_error("test factor only one level: " +
-                          test_factor_name + ", " +
-                          std::to_string(first_level));
+    if (!has_two_values(orig_alt_model, test_factor_idx)) {
+      const auto first_level = orig_alt_model.design.matrix[0][test_factor_idx];
+      throw std::runtime_error("test factor only one level: " + test_factor +
+                               ", " + std::to_string(first_level));
     }
 
-    Regression null_regression;
-    null_regression.design = full_regression.design;
-    remove_factor(null_regression.design, test_factor);
+    Regression orig_null_model;
+    orig_null_model.design = orig_alt_model.design;
+    remove_factor(orig_null_model.design, test_factor_idx);
     // ADS: done setup for the model
 
-    // ADS: open the data table file
-    std::ifstream table_file(table_filename);
-    if (!table_file)
-      throw runtime_error("could not open file: " + table_filename);
-
-    // Make sure that the first line of the proportion table file contains
-    // names of the samples. Throw an exception if the names or their order
-    // in the proportion table does not match those in the full design matrix.
-    string sample_names_header;
-    getline(table_file, sample_names_header);
-
-    if (!consistent_sample_names(full_regression, sample_names_header))
-      throw runtime_error("header:\n" + sample_names_header + "\n" +
-                          "does not match factor names or their order in the\n"
-                          "design matrix. Check that the design matrix and\n"
-                          "the proportion table are correctly formatted.");
-
-    const size_t n_samples = full_regression.design.n_samples();
-
-    // ADS: now open the output file because each row is processed
-    // sequentially
-    std::ofstream of;
-    if (!outfile.empty()) of.open(outfile);
-    std::ostream out(outfile.empty() ? cout.rdbuf() : of.rdbuf());
-
-    // Performing the log-likelihood ratio test on proportions from each row
-    // of the proportion table.
-    while (table_file >> full_regression.props) {
-      if (full_regression.props.total.size() != n_samples)
-        throw runtime_error("found row with wrong number of columns");
-
-      size_t coverage_factor = 0, coverage_rest = 0, meth_factor = 0,
-             meth_rest = 0;
-
-      for (size_t s = 0; s < n_samples; ++s) {
-        if (full_regression.design.matrix[s][test_factor] != 0) {
-          coverage_factor += full_regression.props.total[s];
-          meth_factor += full_regression.props.meth[s];
-        }
-        else {
-          coverage_rest += full_regression.props.total[s];
-          meth_rest += full_regression.props.meth[s];
-        }
-      }
-
-      out << full_regression.props.chrom << "\t"
-          << full_regression.props.position << "\t"
-          << full_regression.props.strand << "\t"
-          << full_regression.props.context << "\t";
-
-      // Do not perform the test if there's no coverage in either all
-      // case or all control samples. Also do not test if the site is
-      // completely methylated or completely unmethylated across all
-      // samples.
-      if (has_low_coverage(full_regression, test_factor)) {
-        out << ((more_na_info) ? "NA_LOW_COV" : "NA");
-      }
-      else if (has_extreme_counts(full_regression)) {
-        out << ((more_na_info) ? "NA_EXTREME_CNT" : "NA");
-      }
-      else {
-        fit_regression_model(full_regression);
-        null_regression.props = full_regression.props;
-        fit_regression_model(null_regression);
-        const double p_value = loglikratio_test(null_regression.max_loglik,
-                                                full_regression.max_loglik);
-
-        // If error occured in fitting (p-val = nan or -nan).
-        if (p_value != p_value)
-          out << "NA";
-        else
-          out << p_value;
-      }
-      out << "\t" << coverage_factor << "\t" << meth_factor << "\t"
-          << coverage_rest << "\t" << meth_rest << endl;
-    }
+    radmeth(show_progress, more_na_info, n_threads, table_filename, outfile,
+            orig_alt_model, orig_null_model, test_factor_idx);
   }
   catch (const std::exception &e) {
-    cerr << "ERROR: " << e.what() << endl;
-    exit(EXIT_FAILURE);
+    std::cerr << "ERROR: " << e.what() << std::endl;
+    return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
 }

From df6ec6a924a632c2bfbde2278098d1e52bec82f4 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 20 Jul 2025 20:23:44 -0700
Subject: [PATCH 07/24] src/radmeth/radmeth.cpp: removing some accidental
 code..

---
 src/radmeth/radmeth.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/radmeth/radmeth.cpp b/src/radmeth/radmeth.cpp
index cb0a43e9..26d1f06a 100644
--- a/src/radmeth/radmeth.cpp
+++ b/src/radmeth/radmeth.cpp
@@ -254,7 +254,6 @@ radmeth(const bool show_progress, const bool more_na_info,
   static constexpr auto prefix_fmt = "%s\t%ld\t%c\t%s\t";
   static constexpr auto suffix_fmt = "\t%ld\t%ld\t%ld\t%ld\n";
   static constexpr auto buf_size = 1024;
-  static constexpr auto n_lines_at_once = 1024;
 
   // ADS: open the data table file
   std::ifstream table_file(table_filename);
@@ -293,8 +292,7 @@ radmeth(const bool show_progress, const bool more_na_info,
   while (true) {
 
     std::uint32_t n_lines = 0;
-    while (n_lines < n_lines_at_once &&
-           std::getline(table_file, lines[n_lines]))
+    while (n_lines < n_threads && std::getline(table_file, lines[n_lines]))
       ++n_lines;
 
     if (show_progress)
@@ -402,7 +400,7 @@ radmeth(const bool show_progress, const bool more_na_info,
       out.write(bufs[i].data(), n_bytes[i]);
     }
 
-    if (n_lines < n_lines_at_once)
+    if (n_lines < n_threads)
       break;
   }
 }

From d65e24dc7f9fd5c9b69aafdcac89fdee845c9d68 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Mon, 21 Jul 2025 15:27:51 -0700
Subject: [PATCH 08/24] src/radmeth/radmeth_optimize.hcpp: fixing includes

---
 src/radmeth/radmeth_optimize.cpp | 2 ++
 src/radmeth/radmeth_optimize.hpp | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/radmeth/radmeth_optimize.cpp b/src/radmeth/radmeth_optimize.cpp
index e957cc6b..1e201100 100644
--- a/src/radmeth/radmeth_optimize.cpp
+++ b/src/radmeth/radmeth_optimize.cpp
@@ -16,6 +16,8 @@
  * General Public License for more details.
  */
 
+#include "radmeth_optimize.hpp"
+
 #include "radmeth_model.hpp"
 
 #include <gsl/gsl_multimin.h>
diff --git a/src/radmeth/radmeth_optimize.hpp b/src/radmeth/radmeth_optimize.hpp
index 2bf115de..16f4a061 100644
--- a/src/radmeth/radmeth_optimize.hpp
+++ b/src/radmeth/radmeth_optimize.hpp
@@ -19,10 +19,11 @@
 #ifndef RADMETH_OPTIMIZE_HPP
 #define RADMETH_OPTIMIZE_HPP
 
+#include <vector>
+
 struct Regression;
 
 bool
 fit_regression_model(Regression &r, std::vector<double> &initial_params);
 
-
 #endif

From 53d9ba9f93716c29cbe21a41b485b1be8875c7a8 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Mon, 21 Jul 2025 16:22:39 -0700
Subject: [PATCH 09/24] src/radmeth/radmeth.cpp: changing strategies for
 parallelism to get chunks of data for each thread and avoid too much thread
 creation. Currently performance has problems.

---
 src/radmeth/radmeth.cpp | 204 ++++++++++++++++++++++------------------
 1 file changed, 115 insertions(+), 89 deletions(-)

diff --git a/src/radmeth/radmeth.cpp b/src/radmeth/radmeth.cpp
index 26d1f06a..dd7b1c6f 100644
--- a/src/radmeth/radmeth.cpp
+++ b/src/radmeth/radmeth.cpp
@@ -246,6 +246,22 @@ drop_idx(const std::vector<double> &v, const std::size_t to_drop) {
   return u;
 }
 
+template <typename T>
+static inline std::vector<std::pair<T, T>>
+get_chunk_bounds(const T n_elements, const T n_chunks) {
+  std::vector<std::pair<T, T>> chunks;
+  const T q = n_elements / n_chunks;
+  const T r = n_elements - q * n_chunks;
+  T block_start{};
+  for (std::size_t i = 0; i < n_chunks; ++i) {
+    const auto sz = (i < r) ? q + 1 : q;
+    const auto block_end = block_start + sz;
+    chunks.emplace_back(block_start, block_end);
+    block_start = block_end;
+  }
+  return chunks;
+}
+
 static void
 radmeth(const bool show_progress, const bool more_na_info,
         const std::uint32_t n_threads, const std::string &table_filename,
@@ -254,6 +270,7 @@ radmeth(const bool show_progress, const bool more_na_info,
   static constexpr auto prefix_fmt = "%s\t%ld\t%c\t%s\t";
   static constexpr auto suffix_fmt = "\t%ld\t%ld\t%ld\t%ld\n";
   static constexpr auto buf_size = 1024;
+  static constexpr auto max_lines = 256;
 
   // ADS: open the data table file
   std::ifstream table_file(table_filename);
@@ -281,120 +298,129 @@ radmeth(const bool show_progress, const bool more_na_info,
 
   file_progress progress{table_filename};
 
-  std::vector<std::vector<char>> bufs(n_threads,
+  std::vector<std::vector<char>> bufs(max_lines,
                                       std::vector<char>(buf_size, 0));
-  std::vector<int> n_bytes(n_threads, 0);
+  std::vector<int> n_bytes(max_lines, 0);
+  std::vector<std::string> lines(max_lines);
+
   std::vector<Regression> alt_models(n_threads, alt_model);
   std::vector<Regression> null_models(n_threads, null_model);
 
-  // iterate over rows in the file and do llr test on proportions from each
-  std::vector<std::string> lines(n_threads);
+  // Iterate over rows in the file and do llr test on proportions from
+  // each. Do this in sets of rows to avoid having to spawn too many threads.
   while (true) {
-
     std::uint32_t n_lines = 0;
-    while (n_lines < n_threads && std::getline(table_file, lines[n_lines]))
+    while (n_lines < max_lines && std::getline(table_file, lines[n_lines]))
       ++n_lines;
+    if (n_lines == 0)
+      break;
 
     if (show_progress)
       progress(table_file);
 
+    const auto chunks = get_chunk_bounds(n_lines, n_threads);
+
     std::vector<std::thread> threads;
-    for (auto th_id = 0u; th_id < std::min(n_lines, n_threads); ++th_id)
-      threads.emplace_back([&, th_id] {
-        auto &t_alt_model = alt_models[th_id];
-        auto &t_null_model = null_models[th_id];
-        t_alt_model.props.parse(lines[th_id]);
-        if (t_alt_model.props_size() != n_samples)
-          throw std::runtime_error("found row with wrong number of columns");
-
-        const auto [p_val, status] = [&]() -> std::tuple<double, row_status> {
-          // Skip the test if (1) no coverage in all cases or in all controls,
-          // or (2) the site is completely methylated or completely
-          // unmethylated across all samples.
-          if (has_low_coverage(t_alt_model, test_factor_idx))
-            return std::tuple{1.0, row_status::na_low_cov};
-
-          if (has_extreme_counts(t_alt_model))
-            return std::tuple{1.0, row_status::na_extreme_cnt};
-
-          std::vector<double> alternate_params;
-          fit_regression_model(t_alt_model, alternate_params);
-          t_null_model.props = t_alt_model.props;
-
-          auto null_params = drop_idx(alternate_params, test_factor_idx);
-
-          fit_regression_model(t_null_model, null_params);
-          const double p_value =
-            llr_test(t_null_model.max_loglik, t_alt_model.max_loglik);
-
-          return (p_value != p_value) ? std::tuple{1.0, row_status::na}
-                                      : std::tuple{p_value, row_status::ok};
-        }();
-
-        std::size_t n_reads_factor = 0;
-        std::size_t n_reads_others = 0;
-        std::size_t n_meth_factor = 0;
-        std::size_t n_meth_others = 0;
-
-        const auto &mc = t_alt_model.props.mc;
-        const auto &vec = t_alt_model.design.tmatrix[test_factor_idx];
-        for (std::size_t s = 0; s < n_samples; ++s)
-          if (vec[s] != 0) {
-            n_reads_factor += mc[s].n_reads;
-            n_meth_factor += mc[s].n_meth;
-          }
-          else {
-            n_reads_others += mc[s].n_reads;
-            n_meth_others += mc[s].n_meth;
-          }
-
-        n_bytes[th_id] = [&] {
-          // clang-format off
-          const int n_prefix_bytes = std::sprintf(bufs[th_id].data(), prefix_fmt,
-                                                  t_alt_model.props.chrom.data(),
-                                                  t_alt_model.props.position,
-                                                  t_alt_model.props.strand,
-                                                  t_alt_model.props.context.data());
-          // clang-format on
-          if (n_prefix_bytes < 0)
-            return n_prefix_bytes;
-
-          auto cursor = bufs[th_id].data() + n_prefix_bytes;
-
-          const int n_pval_bytes = [&] {
-            if (status == row_status::ok)
-              return std::sprintf(cursor, "%.6g", p_val);
-            if (!more_na_info || status == row_status::na)
-              return std::sprintf(cursor, "NA");
-            if (status == row_status::na_extreme_cnt)
-              return std::sprintf(cursor, "NA_EXTREME_CNT");
-            // if (status == row_status::na_low_cov)
-            return std::sprintf(cursor, "NA_LOW_COV");
+    for (auto thread_id = 0u; thread_id < n_threads; ++thread_id) {
+      threads.emplace_back([&, thread_id] {
+        const auto &[chunk_beg, chunk_end] = chunks[thread_id];
+        auto &t_alt_model = alt_models[thread_id];
+        auto &t_null_model = null_models[thread_id];
+        for (auto b = chunk_beg; b != chunk_end; ++b) {
+          t_alt_model.props.parse(lines[b]);
+          if (t_alt_model.props_size() != n_samples)
+            throw std::runtime_error("found row with wrong number of columns");
+
+          const auto [p_val, status] = [&]() -> std::tuple<double, row_status> {
+            // Skip the test if (1) no coverage in all cases or in all controls,
+            // or (2) the site is completely methylated or completely
+            // unmethylated across all samples.
+            if (has_low_coverage(t_alt_model, test_factor_idx))
+              return std::tuple{1.0, row_status::na_low_cov};
+
+            if (has_extreme_counts(t_alt_model))
+              return std::tuple{1.0, row_status::na_extreme_cnt};
+
+            std::vector<double> alternate_params;
+            fit_regression_model(t_alt_model, alternate_params);
+            t_null_model.props = t_alt_model.props;
+
+            auto null_params = drop_idx(alternate_params, test_factor_idx);
+
+            fit_regression_model(t_null_model, null_params);
+            const double p_value =
+              llr_test(t_null_model.max_loglik, t_alt_model.max_loglik);
+
+            return (p_value != p_value) ? std::tuple{1.0, row_status::na}
+                                        : std::tuple{p_value, row_status::ok};
           }();
 
-          if (n_pval_bytes < 0)
-            return n_pval_bytes;
-
-          cursor += n_pval_bytes;
-
-          // clang-format off
+          std::size_t n_reads_factor = 0;
+          std::size_t n_reads_others = 0;
+          std::size_t n_meth_factor = 0;
+          std::size_t n_meth_others = 0;
+
+          const auto &mc = t_alt_model.props.mc;
+          const auto &vec = t_alt_model.design.tmatrix[test_factor_idx];
+          for (std::size_t s = 0; s < n_samples; ++s)
+            if (vec[s] != 0) {
+              n_reads_factor += mc[s].n_reads;
+              n_meth_factor += mc[s].n_meth;
+            }
+            else {
+              n_reads_others += mc[s].n_reads;
+              n_meth_others += mc[s].n_meth;
+            }
+
+          n_bytes[b] = [&] {
+            // clang-format off
+            const int n_prefix_bytes = std::sprintf(bufs[b].data(), prefix_fmt,
+                                                    t_alt_model.props.chrom.data(),
+                                                    t_alt_model.props.position,
+                                                    t_alt_model.props.strand,
+                                                    t_alt_model.props.context.data());
+            // clang-format on
+            if (n_prefix_bytes < 0)
+              return n_prefix_bytes;
+
+            auto cursor = bufs[b].data() + n_prefix_bytes;
+
+            const int n_pval_bytes = [&] {
+              if (status == row_status::ok)
+                return std::sprintf(cursor, "%.6g", p_val);
+              if (!more_na_info || status == row_status::na)
+                return std::sprintf(cursor, "NA");
+              if (status == row_status::na_extreme_cnt)
+                return std::sprintf(cursor, "NA_EXTREME_CNT");
+              // if (status == row_status::na_low_cov)
+              return std::sprintf(cursor, "NA_LOW_COV");
+            }();
+
+            if (n_pval_bytes < 0)
+              return n_pval_bytes;
+
+            cursor += n_pval_bytes;
+
+            // clang-format off
           const int n_suffix_bytes = std::sprintf(cursor, suffix_fmt,
                                                   n_reads_factor,
                                                   n_meth_factor,
                                                   n_reads_others,
                                                   n_meth_others);
-          // clang-format on
-          if (n_suffix_bytes < 0)
-            return n_suffix_bytes;
+            // clang-format on
+            if (n_suffix_bytes < 0)
+              return n_suffix_bytes;
 
-          return n_prefix_bytes + n_pval_bytes + n_suffix_bytes;
-        }();
+            return n_prefix_bytes + n_pval_bytes + n_suffix_bytes;
+          }();
+        }
       });
+    }
 
     for (auto &thread : threads)
       thread.join();
 
-    for (auto i = 0u; i < std::min(n_lines, n_threads); ++i) {
+    for (auto i = 0u; i < n_lines; ++i) {
       if (n_bytes[i] < 0)
         throw std::runtime_error("failed to write to output buffer");
       out.write(bufs[i].data(), n_bytes[i]);

From 5b8c1d3e5201721785cc4932acac0b0fbdeb9d77 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Mon, 21 Jul 2025 18:10:26 -0700
Subject: [PATCH 10/24] src/radmeth/radmeth_model.hcpp: changing the design
 matrix to uint8_t because it's really just binary. Moving the functions that
 do IO on design matrices here also because this is where they belong

---
 src/radmeth/radmeth_model.cpp | 66 +++++++++++++++++++++++++++++++++++
 src/radmeth/radmeth_model.hpp | 11 ++++--
 2 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/src/radmeth/radmeth_model.cpp b/src/radmeth/radmeth_model.cpp
index 53778283..1f7dc6d7 100644
--- a/src/radmeth/radmeth_model.cpp
+++ b/src/radmeth/radmeth_model.cpp
@@ -18,8 +18,11 @@
 #include <algorithm>
 #include <charconv>
 #include <cstdint>
+#include <istream>
+#include <ostream>
 #include <sstream>
 #include <string>
+#include <vector>
 
 double Regression::tolerance = 1e-4;
 double Regression::stepsize = 0.001;
@@ -101,3 +104,66 @@ SiteProportions::parse(const std::string &line) {
   if (failed)
     throw std::runtime_error("failed to parse counts from:\n" + line);
 }
+
+std::istream &
+operator>>(std::istream &is, Design &design) {
+  std::string header_encoding;
+  std::getline(is, header_encoding);
+
+  std::istringstream header_is(header_encoding);
+  std::string header_name;
+  while (header_is >> header_name)
+    design.factor_names.push_back(header_name);
+
+  std::string row;
+  while (std::getline(is, row)) {
+    if (row.empty())
+      continue;
+
+    std::istringstream row_is(row);
+    std::string token;
+    row_is >> token;
+    design.sample_names.push_back(token);
+
+    std::vector<std::uint8_t> matrix_row;
+    while (row_is >> token) {
+      if (std::size(token) != 1 || (token != "0" && token != "1"))
+        throw std::runtime_error("Must use binary factor levels:\n" + row);
+      matrix_row.push_back(token == "1");
+    }
+
+    if (std::size(matrix_row) != design.n_factors())
+      throw std::runtime_error(
+        "each row must have as many columns as factors:\n" + row);
+
+    design.matrix.push_back(std::vector<std::uint8_t>());
+    swap(design.matrix.back(), matrix_row);
+  }
+
+  const auto n_row = std::size(design.matrix);
+  const auto n_col = std::size(design.matrix.front());
+  design.tmatrix.resize(n_col, std::vector<std::uint8_t>(n_row, 0.0));
+  for (auto row_idx = 0u; row_idx < n_row; ++row_idx)
+    for (auto col_idx = 0u; col_idx < n_col; ++col_idx)
+      design.tmatrix[col_idx][row_idx] = design.matrix[row_idx][col_idx];
+
+  return is;
+}
+
+std::ostream &
+operator<<(std::ostream &out, const Design &design) {
+  for (std::size_t factor = 0; factor < design.factor_names.size(); ++factor) {
+    if (factor != 0)
+      out << '\t';
+    out << design.factor_names[factor];
+  }
+  out << '\n';
+
+  for (std::size_t i = 0; i < design.n_samples(); ++i) {
+    out << design.sample_names[i];
+    for (std::size_t j = 0; j < design.n_factors(); ++j)
+      out << "\t" << design.matrix[i][j];
+    out << "\n";
+  }
+  return out;
+}
diff --git a/src/radmeth/radmeth_model.hpp b/src/radmeth/radmeth_model.hpp
index 5ff20fb0..3189ecd0 100644
--- a/src/radmeth/radmeth_model.hpp
+++ b/src/radmeth/radmeth_model.hpp
@@ -21,14 +21,15 @@
 
 #include <cstdint>
 #include <istream>
+#include <ostream>
 #include <string>
 #include <vector>
 
 struct Design {
   std::vector<std::string> factor_names;
   std::vector<std::string> sample_names;
-  std::vector<std::vector<double>> matrix;
-  std::vector<std::vector<double>> tmatrix;
+  std::vector<std::vector<std::uint8_t>> matrix;
+  std::vector<std::vector<std::uint8_t>> tmatrix;
   std::size_t
   n_factors() const {
     return factor_names.size();
@@ -39,6 +40,12 @@ struct Design {
   }
 };
 
+std::istream &
+operator>>(std::istream &is, Design &design);
+
+std::ostream &
+operator<<(std::ostream &os, const Design &design);
+
 struct mcounts {
   std::uint32_t n_reads{};
   std::uint32_t n_meth{};

From 6093554b51629e95fe518ef2e1c23b068cbcde3b Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Mon, 21 Jul 2025 18:18:27 -0700
Subject: [PATCH 11/24] src/radmeth/radmeth_optimize.cpp: changes to allow the
 desig matrix to have uint8_t for its binary values rather than the full
 double

---
 src/radmeth/radmeth_optimize.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/radmeth/radmeth_optimize.cpp b/src/radmeth/radmeth_optimize.cpp
index 1e201100..380ddeaf 100644
--- a/src/radmeth/radmeth_optimize.cpp
+++ b/src/radmeth/radmeth_optimize.cpp
@@ -29,8 +29,9 @@
 #include <stdexcept>
 #include <vector>
 
+template <typename T>
 [[nodiscard]] static double
-pi(const std::vector<double> &v, const gsl_vector *params) {
+pi(const std::vector<T> &v, const gsl_vector *params) {
   // ADS: this function doesn't have a very helpful name
   const auto a = v.data();
   const double dot = std::inner_product(a, a + std::size(v), params->data, 0.0);

From 07b45d0ab164f0af34bf1d2a5e67624775168e94 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Mon, 21 Jul 2025 18:27:11 -0700
Subject: [PATCH 12/24] src/radmeth/radmeth.cpp: moving code for IO for Design
 into radmeth_model sources and setting the number of lines to read/write at
 once to 4096

---
 src/radmeth/radmeth.cpp | 66 +----------------------------------------
 1 file changed, 1 insertion(+), 65 deletions(-)

diff --git a/src/radmeth/radmeth.cpp b/src/radmeth/radmeth.cpp
index dd7b1c6f..342db53b 100644
--- a/src/radmeth/radmeth.cpp
+++ b/src/radmeth/radmeth.cpp
@@ -60,69 +60,6 @@ struct file_progress {
   }
 };
 
-static std::istream &
-operator>>(std::istream &is, Design &design) {
-  std::string header_encoding;
-  std::getline(is, header_encoding);
-
-  std::istringstream header_is(header_encoding);
-  std::string header_name;
-  while (header_is >> header_name)
-    design.factor_names.push_back(header_name);
-
-  std::string row;
-  while (std::getline(is, row)) {
-    if (row.empty())
-      continue;
-
-    std::istringstream row_is(row);
-    std::string token;
-    row_is >> token;
-    design.sample_names.push_back(token);
-
-    std::vector<double> matrix_row;
-    while (row_is >> token) {
-      if (std::size(token) != 1 || (token != "0" && token != "1"))
-        throw std::runtime_error("Must use binary factor levels:\n" + row);
-      matrix_row.push_back(token == "1");
-    }
-
-    if (std::size(matrix_row) != design.n_factors())
-      throw std::runtime_error(
-        "each row must have as many columns as factors:\n" + row);
-
-    design.matrix.push_back(std::vector<double>());
-    swap(design.matrix.back(), matrix_row);
-  }
-
-  const auto n_row = std::size(design.matrix);
-  const auto n_col = std::size(design.matrix.front());
-  design.tmatrix.resize(n_col, std::vector<double>(n_row, 0.0));
-  for (auto row_idx = 0u; row_idx < n_row; ++row_idx)
-    for (auto col_idx = 0u; col_idx < n_col; ++col_idx)
-      design.tmatrix[col_idx][row_idx] = design.matrix[row_idx][col_idx];
-
-  return is;
-}
-
-static std::ostream &
-operator<<(std::ostream &out, const Design &design) {
-  for (std::size_t factor = 0; factor < design.factor_names.size(); ++factor) {
-    if (factor != 0)
-      out << '\t';
-    out << design.factor_names[factor];
-  }
-  out << '\n';
-
-  for (std::size_t i = 0; i < design.n_samples(); ++i) {
-    out << design.sample_names[i];
-    for (std::size_t j = 0; j < design.n_factors(); ++j)
-      out << "\t" << design.matrix[i][j];
-    out << "\n";
-  }
-  return out;
-}
-
 static void
 remove_factor(Design &design, const std::size_t factor_idx) {
   design.factor_names.erase(std::begin(design.factor_names) + factor_idx);
@@ -270,7 +207,7 @@ radmeth(const bool show_progress, const bool more_na_info,
   static constexpr auto prefix_fmt = "%s\t%ld\t%c\t%s\t";
   static constexpr auto suffix_fmt = "\t%ld\t%ld\t%ld\t%ld\n";
   static constexpr auto buf_size = 1024;
-  static constexpr auto max_lines = 256;
+  static constexpr auto max_lines = 4096;
 
   // ADS: open the data table file
   std::ifstream table_file(table_filename);
@@ -507,7 +444,6 @@ main_radmeth(int argc, char *argv[]) {
     Regression orig_null_model;
     orig_null_model.design = orig_alt_model.design;
     remove_factor(orig_null_model.design, test_factor_idx);
-    // ADS: done setup for the model
 
     radmeth(show_progress, more_na_info, n_threads, table_filename, outfile,
             orig_alt_model, orig_null_model, test_factor_idx);

From f063a4ff532e35a817c14c7877a09f09f1096c50 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Mon, 21 Jul 2025 18:36:03 -0700
Subject: [PATCH 13/24] src/radmeth/radmeth.cpp: showing progress after it has
 been done, preallocating the chunks vector, and changing the default number
 of lines to read at once to 16k

---
 src/radmeth/radmeth.cpp | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/radmeth/radmeth.cpp b/src/radmeth/radmeth.cpp
index 342db53b..509ab76b 100644
--- a/src/radmeth/radmeth.cpp
+++ b/src/radmeth/radmeth.cpp
@@ -183,20 +183,18 @@ drop_idx(const std::vector<double> &v, const std::size_t to_drop) {
   return u;
 }
 
-template <typename T>
-static inline std::vector<std::pair<T, T>>
-get_chunk_bounds(const T n_elements, const T n_chunks) {
-  std::vector<std::pair<T, T>> chunks;
-  const T q = n_elements / n_chunks;
-  const T r = n_elements - q * n_chunks;
-  T block_start{};
+static inline void
+get_chunk_bounds(const std::uint32_t n_elements, const std::uint32_t n_chunks,
+                 std::vector<std::pair<std::uint32_t, std::uint32_t>> &chunks) {
+  const std::uint32_t q = n_elements / n_chunks;
+  const std::uint32_t r = n_elements - q * n_chunks;
+  std::uint32_t block_start{};
   for (std::size_t i = 0; i < n_chunks; ++i) {
     const auto sz = (i < r) ? q + 1 : q;
     const auto block_end = block_start + sz;
-    chunks.emplace_back(block_start, block_end);
+    chunks[i] = {block_start, block_end};
     block_start = block_end;
   }
-  return chunks;
 }
 
 static void
@@ -207,7 +205,7 @@ radmeth(const bool show_progress, const bool more_na_info,
   static constexpr auto prefix_fmt = "%s\t%ld\t%c\t%s\t";
   static constexpr auto suffix_fmt = "\t%ld\t%ld\t%ld\t%ld\n";
   static constexpr auto buf_size = 1024;
-  static constexpr auto max_lines = 4096;
+  static constexpr auto max_lines = 16384;
 
   // ADS: open the data table file
   std::ifstream table_file(table_filename);
@@ -243,6 +241,8 @@ radmeth(const bool show_progress, const bool more_na_info,
   std::vector<Regression> alt_models(n_threads, alt_model);
   std::vector<Regression> null_models(n_threads, null_model);
 
+  std::vector<std::pair<std::uint32_t, std::uint32_t>> chunks(n_threads);
+
   // Iterate over rows in the file and do llr test on proportions from
   // each. Do this in sets of rows to avoid having to spawn too many threads.
   while (true) {
@@ -252,10 +252,7 @@ radmeth(const bool show_progress, const bool more_na_info,
     if (n_lines == 0)
       break;
 
-    if (show_progress)
-      progress(table_file);
-
-    const auto chunks = get_chunk_bounds(n_lines, n_threads);
+    get_chunk_bounds(n_lines, n_threads, chunks);
 
     std::vector<std::thread> threads;
     for (auto thread_id = 0u; thread_id < n_threads; ++thread_id) {
@@ -363,6 +360,9 @@ radmeth(const bool show_progress, const bool more_na_info,
       out.write(bufs[i].data(), n_bytes[i]);
     }
 
+    if (show_progress)
+      progress(table_file);
+
     if (n_lines < n_threads)
       break;
   }

From a2185642628a909ea24b601eb949ce8d007df078 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Mon, 21 Jul 2025 18:56:20 -0700
Subject: [PATCH 14/24] docs/content/radmeth.md: updating docs for radmeth

---
 docs/content/radmeth.md | 380 ++++++++++++++++++++++------------------
 1 file changed, 210 insertions(+), 170 deletions(-)

diff --git a/docs/content/radmeth.md b/docs/content/radmeth.md
index 54d03292..96f595bd 100644
--- a/docs/content/radmeth.md
+++ b/docs/content/radmeth.md
@@ -1,62 +1,65 @@
 ## radmeth - Differential CpG methylation
 
 ## Synopsis
+
 ```console
-$ dnmtools radmeth -factor case design_matrix.txt proportion_table.txt >output.bed
+dnmtools radmeth -factor case -o output.txt design_matrix.txt proportion_table.txt
 ```
 
 ## Description
 
-The DM detection method described in this section is based on the
-beta-binomial regression. We recommend using this method when more
-than three replicates are available in each group. For rapid
-differential methylation analysis the regression-based method should
-be run on a computing cluster with a few hundred available nodes, in
-which case it takes approximately a few hours to process a dataset
-consisting of 30-50 WGBS samples. The analysis can be also performed
-on a personal workstation, but it will take substantially longer. Note
-that the actual processing time depends on the coverage of each
-methylome, the number of sites analyzed, and the number of methylomes
-in the dataset.
+The differential-methylation detection method described in this section is
+based on the beta-binomial regression. We recommend using this method when
+more than three replicates are available in each group. For rapid differential
+methylation analysis the regression-based method should be run on a large
+machine with many cores. Note that the actual processing time depends on the
+coverage of each methylome, the number of sites analyzed, and the number of
+methylomes in the dataset.
 
 ### Generating proportion tables
 
-The first step in the differential methylation analysis is to assemble
-a proportion table containing read proportions for all target
-methylomes. Dnmtools includes a program [merge](../merge) to generate
-a proportion table from the given collection of methylomes. Suppose
-that we want to compare methylomes named `control-a.meth`,
-`control-b.meth`, `control-c.meth` to the methylomes `case-a.meth`,
-`case-b.meth`, `case-c.meth`.  The proportion table can be created with
-the following command:
+The first step in the differential methylation analysis is to assemble a
+proportion table containing read proportions for all target
+methylomes. Dnmtools includes a program [merge](../merge) to generate a
+proportion table from the given collection of methylomes. Suppose that we want
+to compare methylomes named `control-a.meth`, `control-b.meth`,
+`control-c.meth` to the methylomes `case-a.meth`, `case-b.meth`,
+`case-c.meth`.  The proportion table can be created with the following
+command:
+
 ```console
 $ dnmtools merge -t -radmeth \
      control-a.meth control-b.meth control-c.meth \
      case-a.meth case-b.meth case-c.meth > proportion-table.txt
 ```
+
 This is what `proportion-table.txt` looks like:
+
 ```txt
-       control_a       control_b       control_c       case_a  case_b  case_c
+control_a       control_b       control_c       case_a  case_b  case_c
 chr1:108:+:CpG 9       6       10      8       1       1       2       2       2       1       14      1
 chr1:114:+:CpG 17      7       10      0       14      3       5       1       9       1       7       1
 chr1:160:+:CpG 12      8       10      5       17      4       15      14      13      6       4       4
 chr1:309:+:CpG 1       1       1       0       17      12      12      8       2       1       19      8
 ```
-As indicated in the header, this proportion table contains information
-about 6 methylomes: 3 controls and 3 cases.  Each row of the table
-contains information about a CpG site and a proportion of reads
-mapping over this site in each methylome. For example, the first row
-describes a cytosine within a CpG site located on chromosome 1 at
-position 108. This site is present in 9 reads in the methylome control
-a and is methylated in 6 of them. Note that the dnmtools `merge`
-command adds methylomes into the proportion table in the order in
-which they are listed on the command line.
+
+As indicated in the header, this proportion table contains information about 6
+methylomes: 3 controls and 3 cases.  Each row of the table contains
+information about a CpG site and a proportion of reads mapping over this site
+in each methylome. For example, the first row describes a cytosine within a
+CpG site located on chromosome 1 at position 108. This site is present in 9
+reads in the methylome control a and is methylated in 6 of them. Note that the
+dnmtools `merge` command adds methylomes into the proportion table in the
+order in which they are listed on the command line.
+
+Note: the header conforms to a traditional "data frame" from R, which does not
+include a column name for the row names, which are the first column.
 
 ### Design matrix
 
-The next step is to specify the design matrix, which describes the
-structure of the experiment. For our running example, the design
-matrix looks like this:
+The next step is to specify the design matrix, which describes the structure
+of the experiment. For our running example, the design matrix looks like this:
+
 ```txt
 base   case
 control_a      1       0
@@ -66,59 +69,59 @@ case_a 1       1
 case_b 1       1
 case_c 1       1
 ```
-The design matrix shows that samples in this dataset are associated
-with two factors: base and case. The first column corresponds to the
-base factor and will always be present in the design matrix. Think of
-it as stating that all samples have the same baseline mean methylation
-level. To distinguish cases from controls we add another factor case
-(second column). The `1`s in this column correspond to the samples
-which belong to the cases group. You can use this design matrix as a
-template to create design matrices for two-group comparisons involving
-arbitrary many methylomes.
-
-After creating the proportion table and the design matrix, we are now
-ready to start the differential methylation analysis.  It consists of
-(1) regression, (2) combining significance, and (3) multiple testing
-adjustment steps.
+
+The design matrix shows that samples in this dataset are associated with two
+factors: base and case. The first column corresponds to the base factor and
+will always be present in the design matrix. Think of it as stating that all
+samples have the same baseline mean methylation level. To distinguish cases
+from controls we add another factor case (second column). The `1`s in this
+column correspond to the samples which belong to the cases group. You can use
+this design matrix as a template to create design matrices for two-group
+comparisons involving arbitrary many methylomes.
+
+After creating the proportion table and the design matrix, we are now ready to
+start the differential methylation analysis.  It consists of (1) regression,
+(2) combining significance, and (3) multiple testing adjustment steps.
 
 ### Regression
 
 Suppose that the `proportion-table.txt` and `design-matrix.txt` are as
 described above. The regression step is run with the command
+
 ```console
-$ dnmtools radmeth -factor case design-matrix.txt proportion-table.txt > output.bed
+$ dnmtools radmeth -factor case -o output.txt design-matrix.txt proportion-table.txt
 ```
-The `-factor` parameter specifies the factor with respect to which we
-want to test for differential methylation. The test factor is case,
-meaning that we are testing for differential methylation between cases
-and controls. In the output file `output.bed`, the last four columns
-correspond to the total read counts and methylated read counts of the
-case group and control group, respectively. The p-value (5-th column)
-is given by the log-likelihood ratio test.
-
-Depending on the distribution of methylated and unmethylated CpGs,
-some p-values may not be calculated. If there are no reads that cover
-a given CpG, the value `NA_LOW_COV` will be printed instead of a
-p-value. If all CpGs are fully methylated and fully methylated in both
-case and control, we say the CpG has an "extreme distribution". In
-these cases, we cannot reject the null and output `NA_EXTREME_CNT`.
-Finally, if there are numerical errors in the log-likelihood ratio
-test, we simply print `NA`. You can opt out of these additional NA
-informations by not using the `-n` flag.
-
-We do not recommend using the individual p-values of CpGs from
-radmeth, as they often do not contain sufficient coverage for
-significant biological interpretations. Instead, we recommend merging
-p-values of neighboring CpGs using [radadjust](../radadjust).
+
+The `-factor` parameter specifies the factor with respect to which we want to
+test for differential methylation. The test factor is 'case', meaning that we
+are testing for differential methylation between cases and controls. In the
+output file `output.txt`, the last four columns correspond to the total read
+counts and methylated read counts of the case group and control group,
+respectively. The p-value (5-th column) is given by the log-likelihood ratio
+test.
+
+Depending on the distribution of methylated and unmethylated CpGs, some
+p-values may not be calculated. If there are no reads that cover a given CpG,
+the value `NA_LOW_COV` will be printed instead of a p-value. If all CpGs are
+fully methylated and fully methylated in both case and control, we say the CpG
+has an "extreme distribution". In these cases, we cannot reject the null and
+output `NA_EXTREME_CNT`.  Finally, if there are numerical errors in the
+log-likelihood ratio test, we simply print `NA`. You can opt out of these
+additional NA informations by not using the `-n` flag.
+
+We do not recommend relying too much on the individual p-values of CpGs from
+radmeth, as they often do not contain sufficient coverage for significant
+biological interpretations. Instead, we recommend merging p-values of
+neighboring CpGs using [radadjust](../radadjust).
 
 ## Tutorial
 
-This tutorial aims to answer questions users often have about the
-assumptions of radmeth and how to interepret the results. We will
-assume a factor of interest, with levels A and B. We will also assume
-a potentially confounding factor, sex, with levels M and F. Further,
-we have 8 total samples, 2 samples for each of the 4 combinations of
-{A,B}x{M,F}. The design matrix should look like this:
+This tutorial aims to answer questions users often have about the assumptions
+of radmeth and how to interepret the results. We will assume a factor of
+interest, with levels A and B. We will also assume a potentially confounding
+factor, sex, with levels M and F. Further, we have 8 total samples, 2 samples
+for each of the 4 combinations of {A,B}x{M,F}. The design matrix should look
+like this:
 
 ```txt
 base    sex factor
@@ -131,28 +134,28 @@ sample_MA2  1   0   1
 sample_MB1  1   0   0
 sample_MB2  1   0   0
 ```
-Note that the 3 headings correspond with the three binary columns, but
-need not be aligned over them. The first sample in the design matrix,
-`sample_FA1`, has an indicator in the other columns for both the "F"
-and the "A", both encoded with a "1" in their respective columns. But
-the replicate number 1 in the `_FA1` is just a replicate, and if it
-had meaning we would consider encoding it using some factor.
-Designating it as a replicate with no meaning is a choice here,
-aligning with the idea that the replicates are statistically
-identical. If all replicates identified with a "2" were done on "day
-2", for example, then we might reconsider.
-
-For our example we will use a genome file `genome.fa` that is very
-small and not a real genome. We will also use a file `features.bed`
-that will be the starting point for generating data that fit with our
-hypotheses. We will be generating data that *should* show us the
-differences we want to see -- assuming we use the tools properly. I
-will refer to each of the intervals in `features.bed` as a "feature"
-but just think of it as a genomic interval with a chromosome name, a
-start and an end.  We need to first obtain some features that are
-associated specifically with each of the levels of our factors: A, B,
+
+Note that the 3 headings correspond with the three binary columns, but need
+not be aligned over them. The first sample in the design matrix, `sample_FA1`,
+has an indicator in the other columns for both the "F" and the "A", both
+encoded with a "1" in their respective columns. But the replicate number 1 in
+the `_FA1` is just a replicate, and if it had meaning we would consider
+encoding it using some factor.  Designating it as a replicate with no meaning
+is a choice here, aligning with the idea that the replicates are statistically
+identical. If all replicates identified with a "2" were done on "day 2", for
+example, then we might reconsider.
+
+For our example we will use a genome file `genome.fa` that is very small and
+not a real genome. We will also use a file `features.bed` that will be the
+starting point for generating data that fit with our hypotheses. We will be
+generating data that *should* show us the differences we want to see --
+assuming we use the tools properly. I will refer to each of the intervals in
+`features.bed` as a "feature" but just think of it as a genomic interval with
+a chromosome name, a start and an end.  We need to first obtain some features
+that are associated specifically with each of the levels of our factors: A, B,
 M and F. We can do this starting with our features file and using the
 `bedtools` shuffle command:
+
 ```console
 $ bedtools shuffle -i features.bed -g genome.chrom.sizes > sim/features_M.bed;
 $ cat sim/features_*.bed | sort -k 1,1 -k 2,2g -o excl.bed;
@@ -162,10 +165,11 @@ $ bedtools shuffle -excl excl.bed -i features.bed -g genome.chrom.sizes > sim/fe
 $ cat sim/features_*.bed | sort -k 1,1 -k 2,2g -o excl.bed;
 $ bedtools shuffle -excl excl.bed -i features.bed -g genome.chrom.sizes > sim/features_B.bed;
 ```
-This ensures none of the "features" are overlapping between A, B, M or
-F, and all are similar in number and size distribution. Using each of
-these, I then made files of features corresponding to the
-*combinations* of factors:
+
+This ensures none of the "features" are overlapping between A, B, M or F, and
+all are similar in number and size distribution. Using each of these, I then
+made files of features corresponding to the *combinations* of factors:
+
 ```shell
 for i in M F; do
     for j in A B; do
@@ -174,28 +178,30 @@ for i in M F; do
     done;
 done;
 ```
+
 Now I have files named:
+
 ```txt
 sim/features_MA.bed
 sim/features_MB.bed
 sim/features_FA.bed
 sim/features_FB.bed
 ```
-The files named with an "M" include all intervals in
-`sim/features_M.bed`, and similarly the files named with a "B" include
-all intervals in `sim/features_B.bed`. So the file
-`sim/features_FA.bed` is all the features for "A" and all the features
-for "F". It will differ from `sim/features_FB.bed` less than it will
-from `sim/features_MB.bed`.
-
-Using these files, I simulated methylation levels at every CpG site in
-the fake genome `genome.fa` with CpGs outside the intervals receiving
-a methylation level of 0.8 and CpGs inside the intervals receiving a
-methylation level of 0.15. At each CpG site, a number of reads was
-sampled (Poisson with mean 10) and then the methylation levels were
-sampled with a coin flip weighed as either 0.8 or 0.15, depending on
-the CpG site. I did this twice for each combination of {M,F}x{A,B}.
-The result is 8 files named as follows:
+
+The files named with an "M" include all intervals in `sim/features_M.bed`, and
+similarly the files named with a "B" include all intervals in
+`sim/features_B.bed`. So the file `sim/features_FA.bed` is all the features
+for "A" and all the features for "F". It will differ from
+`sim/features_FB.bed` less than it will from `sim/features_MB.bed`.
+
+Using these files, I simulated methylation levels at every CpG site in the
+fake genome `genome.fa` with CpGs outside the intervals receiving a
+methylation level of 0.8 and CpGs inside the intervals receiving a methylation
+level of 0.15. At each CpG site, a number of reads was sampled (Poisson with
+mean 10) and then the methylation levels were sampled with a coin flip weighed
+as either 0.8 or 0.15, depending on the CpG site. I did this twice for each
+combination of {M,F}x{A,B}.  The result is 8 files named as follows:
+
 ```console
 $ ls -1 sim/*.counts
 sim/sample_FA1.counts
@@ -207,12 +213,14 @@ sim/sample_MA2.counts
 sim/sample_MB1.counts
 sim/sample_MB2.counts
 ```
-The program I used for the simulation above is not currently
-available. In this simulation, the only difference between replicates
-(e.g., `sim/sample_FA1.counts` vs. `sim/sample_FA2.counts`) is random
-noise due to sampling.
+
+The program I used for the simulation above is not currently available. In
+this simulation, the only difference between replicates (e.g.,
+`sim/sample_FA1.counts` vs. `sim/sample_FA2.counts`) is random noise due to
+sampling.
 
 Here is a look inside one of these files:
+
 ```console
 $ head sim/sample_FA1.counts
 chr1    163 +   CG  0.857143    7
@@ -226,12 +234,16 @@ chr1    324 +   CG  0.555556    9
 chr1    350 +   CG  0.933333    15
 chr1    356 +   CG  0.923077    13
 ```
+
 Then I used the `merge` command from dnmtools to make a data matrix as
 follows:
+
 ```console
 $ dnmtools merge -t -radmeth -v -remove .counts -o sim/table.txt sim/sample_*.counts
 ```
+
 And this is what it gave me:
+
 ```console
 $ head sim/table.txt
 sample_FA1  sample_FA2  sample_FB1  sample_FB2  sample_MA1  sample_MA2  sample_MB1  sample_MB2
@@ -245,18 +257,21 @@ chr1:322:+:CG   6   5   6   6   9   8   19  14  13  11  13  11  9   6   10  7
 chr1:324:+:CG   9   5   10  9   8   5   10  9   7   6   14  9   7   6   6   6
 chr1:350:+:CG   15  14  8   6   6   5   9   6   7   6   6   4   9   8   11  10
 ```
-With this simulated data, in which each column has the influence of
-two different factors, we can use `radmeth` with the design matrix
-shown above (in a file named `desgin.txt`) to get p-values for
-differential methylation between levels A and B of our factor of
-interest -- while controling for the effect of M/F:
+
+With this simulated data, in which each column has the influence of two
+different factors, we can use `radmeth` with the design matrix shown above (in
+a file named `desgin.txt`) to get p-values for differential methylation
+between levels A and B of our factor of interest -- while controling for the
+effect of M/F:
+
 ```console
 $ dnmtools radmeth -o sim/samples.radmeth -f factor design.txt sim/table.txt
 ```
-As explained already, the 5th column of the output gives the p-values
-for differential methylation between levels for our factor of interest
-(named "factor" in the design matrix).  Here is a look at part of the
-output:
+
+As explained already, the 5th column of the output gives the p-values for
+differential methylation between levels for our factor of interest (named
+"factor" in the design matrix).  Here is a look at part of the output:
+
 ```console
 $ head sim/samples.radmeth
 chr1    163 +   CG  0.0864953   35  22  45  36
@@ -270,19 +285,23 @@ chr1    324 +   CG  0.257851    40  29  31  26
 chr1    350 +   CG  0.893476    36  30  35  29
 chr1    356 +   CG  0.0527032   37  35  42  33
 ```
-Focusing on the 5th column, we see values between 0 and 1, but none of
-them are very extreme. This particular output file has 17903 rows.  We
-can use the dnmtools command `selectsites` to pull out the rows in
-this file that correspond to each of the original feature sets. Here
-is how we would do it for all 4 different original sets of features:
+
+Focusing on the 5th column, we see values between 0 and 1, but none of them
+are very extreme. This particular output file has 17903 rows.  We can use the
+dnmtools command `selectsites` to pull out the rows in this file that
+correspond to each of the original feature sets. Here is how we would do it
+for all 4 different original sets of features:
+
 ```shell
 for i in M F A B; do
     dnmtools selectsites -o sim/features_${i}.txt sim/features_${i}.bed sim/samples.radmeth;
 done
 ```
-The result allows us to verify that `radmeth` with the design matrix
-given above identifies sites as significant when they differ between
-levels A and B of the factor of interest:
+
+The result allows us to verify that `radmeth` with the design matrix given
+above identifies sites as significant when they differ between levels A and B
+of the factor of interest:
+
 ```console
 $ for i in sim/features_*.counts; do echo $i `awk 'BEGIN{k=0;c=0}{k+=$5;c+=1}END{print k,c,k/c}' $i`; done | column -t
 sim/features_A.counts  0.255448  430  0.000594064
@@ -290,22 +309,23 @@ sim/features_B.counts  0.114256  372  0.00030714
 sim/features_F.counts  196.093   400  0.490232
 sim/features_M.counts  295.234   588  0.502099
 ```
-The p-values in features associated with either M or F are averaging
-around 0.5, which is what we want if we are trying to control this
-possible confounding variable. In the features associated either with
-A or B, the p-values are very small on average. Of course this is
-literally by design. Knowing that we simulated just as much difference
-between M and F as between A and B, if we had used `-f sex` then
-`radmeth` would have correctly assigned low p-values to sites in the
-intervals of `features_M.bed` and `features_F.bed`.
-
-We can use the same simulated data to verify that we do need the
-intercept in our design. In other words, the column "base" in
-`design.txt` needs to be present (you can give it any name you want;
-make sure it's all 1s).
-
-We change the design matrix to make the following, which has the
-intercept column removed:
+
+The p-values in features associated with either M or F are averaging around
+0.5, which is what we want if we are trying to control this possible
+confounding variable. In the features associated either with A or B, the
+p-values are very small on average. Of course this is literally by
+design. Knowing that we simulated just as much difference between M and F as
+between A and B, if we had used `-f sex` then `radmeth` would have correctly
+assigned low p-values to sites in the intervals of `features_M.bed` and
+`features_F.bed`.
+
+We can use the same simulated data to verify that we do need the intercept in
+our design. In other words, the column "base" in `design.txt` needs to be
+present (you can give it any name you want; make sure it's all 1s).
+
+We change the design matrix to make the following, which has the intercept
+column removed:
+
 ```console
 $ cat design_noint.txt
 sex factor
@@ -318,8 +338,10 @@ sample_MA2  0   1
 sample_MB1  0   0
 sample_MB2  0   0
 ```
-Then we re-run all the above commands (I won't exaplain each) to get
-results analogous but with this modified design matrix:
+
+Then we re-run all the above commands (I won't exaplain each) to get results
+analogous but with this modified design matrix:
+
 ```console
 $ dnmtools radmeth -o sim/samples_noint.radmeth -f factor design_noint.txt sim/table.txt
 $ for i in M F A B; do dnmtools selectsites -o sim/features_noint_${i}.txt sim/features_${i}.bed sim/samples_noint.radmeth; done
@@ -329,33 +351,34 @@ sim/features_noint_B.txt  15.2391  372  0.0409653
 sim/features_noint_F.txt  77.2438  400  0.193109
 sim/features_noint_M.txt  93.7181  588  0.159385
 ```
-Although the sites within features corresponding to A and B are much
-lower on average than those for M or F, the distinction has been
-reduced. More importantly, we know that the sites in
-`sim/features_M.bed` and `sim/features_F.bed` are not influenced by
-the level A/B of our factor of interest. The p-values for sites in the
-last two rows above should have mean of 0.5. But without the intercept
-included in the design matrix, the outcomes to not match our
-statistical assumptions. I cannot predict what will happen if you use
-the wrong design matrix. In analyzing bisulfite sequencing data for
-differential methylation, we become aware of uncertainty through the
-counts, and we leverage this to try and be more accurate. So unlike
-other regression settings, if you try to normalize the data first, in
-order to avoid having to deal with an intercept, you will not arrive
-at an equivalent procedure -- if you want to normalize the data
-yourself (i.e. subtract mu and divide by sigma) then you are probably
-better off using a general regression package.
+
+Although the sites within features corresponding to A and B are much lower on
+average than those for M or F, the distinction has been reduced. More
+importantly, we know that the sites in `sim/features_M.bed` and
+`sim/features_F.bed` are not influenced by the level A/B of our factor of
+interest. The p-values for sites in the last two rows above should have mean
+of 0.5. But without the intercept included in the design matrix, the outcomes
+to not match our statistical assumptions. I cannot predict what will happen if
+you use the wrong design matrix. In analyzing bisulfite sequencing data for
+differential methylation, we become aware of uncertainty through the counts,
+and we leverage this to try and be more accurate. So unlike other regression
+settings, if you try to normalize the data first, in order to avoid having to
+deal with an intercept, you will not arrive at an equivalent procedure -- if
+you want to normalize the data yourself (i.e. subtract mu and divide by sigma)
+then you are probably better off using a general regression package.
 
 ## Options
 
 ```txt
  -o, -out
 ```
+
 The name of the output file (default: stdout).
 
 ```txt
  -n -na-info
 ```
+
 If a p-value is not calculated, print NAs in more detail: low coverage
 (`NA_LOW_COV`), extreme values (`NA_EXTREME`) or numerical errors in
 likelihood ratios (`NA`).
@@ -363,10 +386,27 @@ likelihood ratios (`NA`).
 ```txt
  -v, -verbose
 ```
+
 Print more information while the command is running.
 
+```txt
+ -progress
+```
+
+Show progress while radmeth runs.
+
 ```txt
  -f, -factor
 ```
+
 The name of the factor on which to test differences. This must be the
 name of one of the columns in file design matrix. This is required.
+
+```txt
+ -t, -threads
+```
+
+Use multiple threads. This gives very good speedup as long as you do not
+exceed the number of available physical cores. Most CPUs support 2 hardware
+threads per physical core via SMT/Hyper-Threading, so check your number of
+cores if you are unsure.

From 006bdacb7f7ee3363352bd55d7ecc8e434df429e Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Mon, 21 Jul 2025 19:58:33 -0700
Subject: [PATCH 15/24] src/radmeth/radmeth_model.cpp: changing default
 parameter values: tolerance from 1e-4 to 1e-4 and max_iter from 700 to 250

---
 src/radmeth/radmeth_model.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/radmeth/radmeth_model.cpp b/src/radmeth/radmeth_model.cpp
index 1f7dc6d7..89ed0943 100644
--- a/src/radmeth/radmeth_model.cpp
+++ b/src/radmeth/radmeth_model.cpp
@@ -24,9 +24,9 @@
 #include <string>
 #include <vector>
 
-double Regression::tolerance = 1e-4;
+double Regression::tolerance = 1e-3;
 double Regression::stepsize = 0.001;
-std::uint32_t Regression::max_iter = 700;
+std::uint32_t Regression::max_iter = 250;
 
 void
 SiteProportions::parse(const std::string &line) {

From a88951dc528b12c168216bfd5f2fb7b1f329ef68 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Mon, 21 Jul 2025 19:59:10 -0700
Subject: [PATCH 16/24] src/radmeth/radmeth.cpp: adding cli args for tolerance
 and max-iter in the GSL iterative estimation of model parameters

---
 src/radmeth/radmeth.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/radmeth/radmeth.cpp b/src/radmeth/radmeth.cpp
index 509ab76b..0fa8fcbe 100644
--- a/src/radmeth/radmeth.cpp
+++ b/src/radmeth/radmeth.cpp
@@ -401,6 +401,12 @@ main_radmeth(int argc, char *argv[]) {
       "numerical errors in likelihood ratios (NA)",
       false, more_na_info);
     opt_parse.add_opt("factor", 'f', "a factor to test", true, test_factor);
+    opt_parse.add_opt("tolerance", '\0',
+                      "numerical tolerance to test convergence", false,
+                      Regression::tolerance);
+    opt_parse.add_opt("max-iter", '\0',
+                      "max iterations when estimating parameters", false,
+                      Regression::max_iter);
 
     std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);

From 439cdccc5c211c8955d622a50acae38b5b7ae79f Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Mon, 21 Jul 2025 19:59:30 -0700
Subject: [PATCH 17/24] docs/content/radmeth.md: adding description of the cli
 args for tolerance and max-iter in the GSL iterative estimation of model
 parameters

---
 docs/content/radmeth.md | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/docs/content/radmeth.md b/docs/content/radmeth.md
index 96f595bd..ce1eec98 100644
--- a/docs/content/radmeth.md
+++ b/docs/content/radmeth.md
@@ -375,6 +375,15 @@ then you are probably better off using a general regression package.
 
 The name of the output file (default: stdout).
 
+```txt
+ -t, -threads
+```
+
+Use multiple threads. This gives very good speedup as long as you do not
+exceed the number of available physical cores. Most CPUs support 2 hardware
+threads per physical core via SMT/Hyper-Threading, so check your number of
+cores if you are unsure.
+
 ```txt
  -n -na-info
 ```
@@ -403,10 +412,15 @@ The name of the factor on which to test differences. This must be the
 name of one of the columns in file design matrix. This is required.
 
 ```txt
- -t, -threads
+ -tolerance
 ```
 
-Use multiple threads. This gives very good speedup as long as you do not
-exceed the number of available physical cores. Most CPUs support 2 hardware
-threads per physical core via SMT/Hyper-Threading, so check your number of
-cores if you are unsure.
+The numerical tolerance for convergence when estimating model parameters.
+This cutoff is applied to the norm of the gradient of the log-likelihood
+function for MLE parameter estimation.
+
+```txt
+ -max-iter
+```
+
+The maximum iterations to allow when estimating model parameters.

From 175a078eb2d312b1554e0c5cec961e3cec9b1ab1 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Mon, 21 Jul 2025 20:23:35 -0700
Subject: [PATCH 18/24] src/radmeth/radmeth_optimize.cpp: using a tolerance
 that is a multiplied by the sqrt of the number of params and by the number of
 obs/samples

---
 src/radmeth/radmeth_optimize.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/radmeth/radmeth_optimize.cpp b/src/radmeth/radmeth_optimize.cpp
index 380ddeaf..b787b964 100644
--- a/src/radmeth/radmeth_optimize.cpp
+++ b/src/radmeth/radmeth_optimize.cpp
@@ -166,7 +166,6 @@ neg_loglik_and_grad(const gsl_vector *params, void *object, double *loglik_val,
 
 bool
 fit_regression_model(Regression &r, std::vector<double> &params_init) {
-  const auto tolerance = Regression::tolerance;
   const auto stepsize = Regression::stepsize;
   const auto max_iter = Regression::max_iter;
 
@@ -177,6 +176,9 @@ fit_regression_model(Regression &r, std::vector<double> &params_init) {
     params_init.back() = -2.5;
   }
 
+  const double tolerance =
+    std::sqrt(n_params) * r.n_samples() * Regression::tolerance;
+
   if (params_init.size() != n_params)
     throw std::runtime_error("Wrong number of initial parameters.");
 

From 8c38ac296444473bef6436f83f4337a96e9dcecc Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Mon, 21 Jul 2025 20:45:25 -0700
Subject: [PATCH 19/24] docs/content/radmeth.md: updating the explanation of
 the tolerance parameter

---
 docs/content/radmeth.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/content/radmeth.md b/docs/content/radmeth.md
index ce1eec98..4bbcdc37 100644
--- a/docs/content/radmeth.md
+++ b/docs/content/radmeth.md
@@ -417,7 +417,9 @@ name of one of the columns in file design matrix. This is required.
 
 The numerical tolerance for convergence when estimating model parameters.
 This cutoff is applied to the norm of the gradient of the log-likelihood
-function for MLE parameter estimation.
+function for MLE parameter estimation, and is multiplied by both the square
+root of the number of parameters and by the number of methylomes in the
+analysis.
 
 ```txt
  -max-iter

From 549f31a7d69667ba9eea3c1d7acd1ef5e3e0beab Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Tue, 22 Jul 2025 15:44:59 -0700
Subject: [PATCH 20/24] src/radmeth/radmeth_model.cpp: refactored transpose

---
 src/radmeth/radmeth_model.cpp | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/radmeth/radmeth_model.cpp b/src/radmeth/radmeth_model.cpp
index 89ed0943..faa4a8bb 100644
--- a/src/radmeth/radmeth_model.cpp
+++ b/src/radmeth/radmeth_model.cpp
@@ -105,6 +105,18 @@ SiteProportions::parse(const std::string &line) {
     throw std::runtime_error("failed to parse counts from:\n" + line);
 }
 
+template <typename T>
+static void
+transpose(const std::vector<std::vector<T>> &mat,
+          std::vector<std::vector<T>> &tmat) {
+  const auto n_row = std::size(mat);
+  const auto n_col = std::size(mat.front());
+  tmat.resize(n_col, std::vector<T>(n_row, 0.0));
+  for (auto row_idx = 0u; row_idx < n_row; ++row_idx)
+    for (auto col_idx = 0u; col_idx < n_col; ++col_idx)
+      tmat[col_idx][row_idx] = mat[row_idx][col_idx];
+}
+
 std::istream &
 operator>>(std::istream &is, Design &design) {
   std::string header_encoding;
@@ -136,16 +148,10 @@ operator>>(std::istream &is, Design &design) {
       throw std::runtime_error(
         "each row must have as many columns as factors:\n" + row);
 
-    design.matrix.push_back(std::vector<std::uint8_t>());
-    swap(design.matrix.back(), matrix_row);
+    design.matrix.push_back(matrix_row);
   }
 
-  const auto n_row = std::size(design.matrix);
-  const auto n_col = std::size(design.matrix.front());
-  design.tmatrix.resize(n_col, std::vector<std::uint8_t>(n_row, 0.0));
-  for (auto row_idx = 0u; row_idx < n_row; ++row_idx)
-    for (auto col_idx = 0u; col_idx < n_col; ++col_idx)
-      design.tmatrix[col_idx][row_idx] = design.matrix[row_idx][col_idx];
+  transpose(design.matrix, design.tmatrix);
 
   return is;
 }

From 15ca070221124820b6074cfb578638afe075c19d Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Tue, 22 Jul 2025 15:45:36 -0700
Subject: [PATCH 21/24] src/radmeth/radmeth_model.hpp: declaring scratch space
 for the p_v vector to avoid reallocation in the gradient

---
 src/radmeth/radmeth_model.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/radmeth/radmeth_model.hpp b/src/radmeth/radmeth_model.hpp
index 3189ecd0..d7a01d63 100644
--- a/src/radmeth/radmeth_model.hpp
+++ b/src/radmeth/radmeth_model.hpp
@@ -75,6 +75,7 @@ struct Regression {
   Design design;
   SiteProportions props;
   double max_loglik{};
+  std::vector<double> p_v;  // scratch space
 
   [[nodiscard]] std::size_t
   n_factors() const {

From dd2e264afa29b75008214c3e67b6d3925eead0db Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Tue, 22 Jul 2025 15:46:48 -0700
Subject: [PATCH 22/24] src/radmeth/radmeth_optimize.cpp: slight changes to
 some numerical calculations and modifying the p_v in neg_gradient to be held
 in Regression to avoid reallocating

---
 src/radmeth/radmeth_optimize.cpp | 77 +++++++++++++++++---------------
 1 file changed, 41 insertions(+), 36 deletions(-)

diff --git a/src/radmeth/radmeth_optimize.cpp b/src/radmeth/radmeth_optimize.cpp
index b787b964..8affe87e 100644
--- a/src/radmeth/radmeth_optimize.cpp
+++ b/src/radmeth/radmeth_optimize.cpp
@@ -35,8 +35,8 @@ pi(const std::vector<T> &v, const gsl_vector *params) {
   // ADS: this function doesn't have a very helpful name
   const auto a = v.data();
   const double dot = std::inner_product(a, a + std::size(v), params->data, 0.0);
-  // const double p = -1.0 / std::expm1(-dot_prod); ?
-  return std::exp(dot) / (1.0 + std::exp(dot));
+  // const double p = std::exp(dot) / (1.0 + std::exp(dot));
+  return 1.0 / (1.0 / std::exp(dot) + 1.0);
 }
 
 [[nodiscard]] static double
@@ -46,29 +46,32 @@ neg_loglik(const gsl_vector *params, void *object) {
   // ADS: the dispersion parameter phi is the last element of
   // parameter vector
   const double disp_param = gsl_vector_get(params, reg->design.n_factors());
-  const double phi = std::exp(disp_param) / (1.0 + std::exp(disp_param));
+  // const double phi = std::exp(disp_param) / (1.0 + std::exp(disp_param));
+  const double phi = 1.0 / (1.0 / std::exp(disp_param) + 1.0);
   const double one_minus_phi = 1.0 - phi;
 
   const auto &mc = reg->props.mc;
+  const auto &mat = reg->design.matrix;
 
   double log_lik = 0;
   const std::size_t n_samples = reg->design.n_samples();
   for (std::size_t col_idx = 0; col_idx < n_samples; ++col_idx) {
-    const double n = mc[col_idx].n_reads;
-    const double y = mc[col_idx].n_meth;
-    const double p = pi(reg->design.matrix[col_idx], params);
+    const auto n = mc[col_idx].n_reads;
+    const auto y = mc[col_idx].n_meth;
+    const double p = pi(mat[col_idx], params);
     const double one_minus_p = 1.0 - p;
 
     const double term1 = one_minus_phi * p;
-    for (int k = 0; k < y; ++k)
+    for (auto k = 0u; k < y; ++k)
       log_lik += std::log(term1 + phi * k);
 
     const double term2 = one_minus_phi * one_minus_p;
-    for (int k = 0; k < n - y; ++k)
+    for (auto k = 0u; k < n - y; ++k)
       log_lik += std::log(term2 + phi * k);
 
-    for (int k = 0; k < n; ++k)
-      log_lik -= std::log(1.0 + phi * (k - 1.0));
+    for (auto k = 0u; k < n; ++k)
+      // log_lik -= std::log(1.0 + phi * (k - 1.0));
+      log_lik -= std::log1p(phi * (k - 1.0));
   }
   return -log_lik;
 }
@@ -79,25 +82,27 @@ neg_gradient(const gsl_vector *params, void *object, gsl_vector *output) {
   const std::size_t n_samples = reg->design.n_samples();
   const std::size_t n_factors = reg->design.n_factors();
 
-  std::vector<double> p_v(n_samples, 0.0);
+  /// ADS: using scratch space held in reg instead of allocating here
+  // std::vector<double> p_v(n_samples, 0.0);
+  auto &p_v = reg->p_v;
   for (auto col_idx = 0u; col_idx < n_samples; ++col_idx)
     p_v[col_idx] = pi(reg->design.matrix[col_idx], params);
 
   const double disp_param = gsl_vector_get(params, n_factors);
 
-  const auto &tmatrix = reg->design.tmatrix;
-  const double phi = std::exp(disp_param) / (1.0 + std::exp(disp_param));
-  // const double phi = -1.0 / std::expm1(-disp_param);
+  const auto &tmat = reg->design.tmatrix;
+  // const double phi = std::exp(disp_param) / (1.0 + std::exp(disp_param));
+  const double phi = 1.0 / (1.0 / std::exp(disp_param) + 1.0);
   const double one_minus_phi = 1.0 - phi;
 
   const auto &mc = reg->props.mc;
 
-  for (std::size_t f = 0; f < n_factors; ++f) {
+  for (std::size_t factor_idx = 0; factor_idx < n_factors; ++factor_idx) {
     double deriv = 0;
-    const auto &vec = tmatrix[f];
+    const auto &vec = tmat[factor_idx];
     for (std::size_t col_idx = 0; col_idx < n_samples; ++col_idx) {
-      const double n = mc[col_idx].n_reads;
-      const double y = mc[col_idx].n_meth;
+      const auto n = mc[col_idx].n_reads;
+      const auto y = mc[col_idx].n_meth;
       const double p = p_v[col_idx];
       const double one_minus_p = 1.0 - p;
 
@@ -108,48 +113,47 @@ neg_gradient(const gsl_vector *params, void *object, gsl_vector *output) {
 
       double term = 0;
 
-      const auto accumulate_term = [&](int k, const int lim,
-                                       const double denom_base) {
+      const auto accum_term = [&](auto k, const auto lim,
+                                  const double denom_base) {
         for (; k < lim; ++k)
           term += 1.0 / (denom_base + phi * k);
       };
 
-      const auto accumulate_subtract_term = [&](int k, const int lim,
-                                                const double denom_base) {
+      const auto accum_subtract_term = [&](auto k, const auto lim,
+                                           const double denom_base) {
         for (; k < lim; ++k)
           term -= 1.0 / (denom_base + phi * k);
       };
 
-      accumulate_term(0, y, denom_term1);
-      accumulate_subtract_term(0, n - y, one_minus_phi * one_minus_p);
+      accum_term(0u, y, denom_term1);
+      accum_subtract_term(0u, n - y, one_minus_phi * one_minus_p);
 
       deriv += term * factor;
     }
-    gsl_vector_set(output, f, deriv);
+    gsl_vector_set(output, factor_idx, deriv);
   }
 
   double deriv = 0;
   for (std::size_t col_idx = 0; col_idx < n_samples; ++col_idx) {
-    const double n = mc[col_idx].n_reads;
-    const double y = mc[col_idx].n_meth;
+    const auto n = mc[col_idx].n_reads;
+    const auto y = mc[col_idx].n_meth;
     const double p = p_v[col_idx];
     const double one_minus_p = 1.0 - p;
 
     double term = 0;
-    const auto accumulate_term = [&](int k, const int lim,
-                                     const double num_shift,
-                                     const double denom_base) {
+    const auto accum_term = [&](auto k, const auto lim, const double num_shift,
+                                const double denom_base) {
       for (; k < lim; ++k)
         term += (k - num_shift) / (denom_base + phi * k);
     };
-    const auto accumulate_subtract_term = [&](int k, const int lim) {
+    const auto accum_subtract_term = [&](auto k, const auto lim) {
       for (; k < lim; ++k)
         term -= (k - 1.0) / (1.0 + phi * (k - 1.0));
     };
 
-    accumulate_term(0, y, p, one_minus_phi * p);
-    accumulate_term(0, n - y, one_minus_p, one_minus_phi * one_minus_p);
-    accumulate_subtract_term(0, n);
+    accum_term(0u, y, p, one_minus_phi * p);
+    accum_term(0u, n - y, one_minus_p, one_minus_phi * one_minus_p);
+    accum_subtract_term(0u, n);
 
     deriv += term * phi * one_minus_phi;
   }
@@ -176,6 +180,8 @@ fit_regression_model(Regression &r, std::vector<double> &params_init) {
     params_init.back() = -2.5;
   }
 
+  r.p_v.resize(r.n_samples());
+
   const double tolerance =
     std::sqrt(n_params) * r.n_samples() * Regression::tolerance;
 
@@ -219,8 +225,7 @@ fit_regression_model(Regression &r, std::vector<double> &params_init) {
     // check status from gradient
     status = gsl_multimin_test_gradient(s->gradient, tolerance);
   } while (status == GSL_CONTINUE && ++iter < max_iter);
-  // It it reasonable to reduce the number of iterations to 500?
-  // ADS: 700 vs. 500? what's the difference?
+  // ADS: What is a reasonable number of iterations?
 
   r.max_loglik = -1.0 * neg_loglik(s->x, &r);
 

From a767a6ef97bd134699b7cbfd189a5300a8b6970a Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Tue, 22 Jul 2025 16:30:52 -0700
Subject: [PATCH 23/24] src/utils/selectsites.cpp: adding an option to read
 sites with additional fields -- noticed when analysing output from radmeth

---
 src/utils/selectsites.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/utils/selectsites.cpp b/src/utils/selectsites.cpp
index 43b288a7..b0389ac4 100644
--- a/src/utils/selectsites.cpp
+++ b/src/utils/selectsites.cpp
@@ -294,6 +294,7 @@ main_selectsites(int argc, char *argv[]) {
     bool VERBOSE = false;
     bool keep_file_on_disk = false;
     bool compress_output = false;
+    bool allow_extra_fields = false;
 
     std::string outfile("-");
     std::string summary_file;
@@ -315,6 +316,8 @@ main_selectsites(int argc, char *argv[]) {
     opt_parse.add_opt("summary", 'S', "write summary to this file", false,
                       summary_file);
     opt_parse.add_opt("zip", 'z', "output gzip format", false, compress_output);
+    opt_parse.add_opt("relaxed", '\0', "input has extra fields", false,
+                      allow_extra_fields);
     opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
     opt_parse.set_show_defaults();
     std::vector<std::string> leftover_args;
@@ -340,6 +343,8 @@ main_selectsites(int argc, char *argv[]) {
     const std::string sites_file = leftover_args.back();
     /****************** END COMMAND LINE OPTIONS *****************/
 
+    MSite::no_extra_fields = (allow_extra_fields == false);
+
     selectsites_summary summary;
     summary.command_line = get_command_line(argc, argv);
 

From 2595624a279042e6821188887c80148e2733affc Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Tue, 22 Jul 2025 16:32:00 -0700
Subject: [PATCH 24/24] docs/content/radmeth.md: updating numbers for changes
 in radmeth code

---
 docs/content/radmeth.md | 52 ++++++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/docs/content/radmeth.md b/docs/content/radmeth.md
index 4bbcdc37..d46dc108 100644
--- a/docs/content/radmeth.md
+++ b/docs/content/radmeth.md
@@ -116,7 +116,15 @@ neighboring CpGs using [radadjust](../radadjust).
 
 ## Tutorial
 
-This tutorial aims to answer questions users often have about the assumptions
+The data used here is available at:
+
+```txt
+https://github.com/andrewdavidsmith/radmeth-demo-simulated-data
+```
+
+And the results below were generated with v1.5.1 on Linux.
+
+This tutorial should answer questions users often have about the assumptions
 of radmeth and how to interepret the results. We will assume a factor of
 interest, with levels A and B. We will also assume a potentially confounding
 factor, sex, with levels M and F. Further, we have 8 total samples, 2 samples
@@ -274,16 +282,16 @@ differential methylation between levels for our factor of interest (named
 
 ```console
 $ head sim/samples.radmeth
-chr1    163 +   CG  0.0864953   35  22  45  36
-chr1    206 +   CG  0.444024    37  30  43  38
-chr1    232 +   CG  0.756591    28  20  35  26
-chr1    278 +   CG  0.780385    43  35  37  31
-chr1    296 +   CG  0.64704 33  29  38  32
-chr1    310 +   CG  0.640826    37  28  31  22
-chr1    322 +   CG  0.0971364   38  33  47  35
-chr1    324 +   CG  0.257851    40  29  31  26
-chr1    350 +   CG  0.893476    36  30  35  29
-chr1    356 +   CG  0.0527032   37  35  42  33
+chr1    163 +   CG  0.0877822   35  22  45  36
+chr1    206 +   CG  0.442745    37  30  43  38
+chr1    232 +   CG  0.756925    28  20  35  26
+chr1    278 +   CG  0.794626    43  35  37  31
+chr1    296 +   CG  0.635986    33  29  38  32
+chr1    310 +   CG  0.641986    37  28  31  22
+chr1    322 +   CG  0.0973903   38  33  47  35
+chr1    324 +   CG  0.260703    40  29  31  26
+chr1    350 +   CG  0.888242    36  30  35  29
+chr1    356 +   CG  0.0535286   37  35  42  33
 ```
 
 Focusing on the 5th column, we see values between 0 and 1, but none of them
@@ -294,7 +302,7 @@ for all 4 different original sets of features:
 
 ```shell
 for i in M F A B; do
-    dnmtools selectsites -o sim/features_${i}.txt sim/features_${i}.bed sim/samples.radmeth;
+    dnmtools selectsites -relaxed -o sim/features_${i}.txt sim/features_${i}.bed sim/samples.radmeth;
 done
 ```
 
@@ -303,11 +311,11 @@ above identifies sites as significant when they differ between levels A and B
 of the factor of interest:
 
 ```console
-$ for i in sim/features_*.counts; do echo $i `awk 'BEGIN{k=0;c=0}{k+=$5;c+=1}END{print k,c,k/c}' $i`; done | column -t
-sim/features_A.counts  0.255448  430  0.000594064
-sim/features_B.counts  0.114256  372  0.00030714
-sim/features_F.counts  196.093   400  0.490232
-sim/features_M.counts  295.234   588  0.502099
+$ for i in sim/features_[ABFM].txt; do echo $i `awk 'BEGIN{k=0;c=0}{k+=$5;c+=1}END{print k,c,k/c}' $i`; done | column -t
+sim/features_A.txt  0.256072  430  0.000595517
+sim/features_B.txt  0.114786  372  0.000308564
+sim/features_F.txt  197.332   400  0.493329
+sim/features_M.txt  297.728   588  0.50634
 ```
 
 The p-values in features associated with either M or F are averaging around
@@ -344,12 +352,12 @@ analogous but with this modified design matrix:
 
 ```console
 $ dnmtools radmeth -o sim/samples_noint.radmeth -f factor design_noint.txt sim/table.txt
-$ for i in M F A B; do dnmtools selectsites -o sim/features_noint_${i}.txt sim/features_${i}.bed sim/samples_noint.radmeth; done
+$ for i in M F A B; do dnmtools selectsites -relaxed -o sim/features_noint_${i}.txt sim/features_${i}.bed sim/samples_noint.radmeth; done
 $ for i in sim/*_noint_*.txt; do echo $i `awk 'BEGIN{k=0;c=0}{k+=$5;c+=1}END{print k,c,k/c}' $i`; done | column -t
-sim/features_noint_A.txt  8.27788  430  0.0192509
-sim/features_noint_B.txt  15.2391  372  0.0409653
-sim/features_noint_F.txt  77.2438  400  0.193109
-sim/features_noint_M.txt  93.7181  588  0.159385
+sim/features_noint_A.txt  8.27931  430  0.0192542
+sim/features_noint_B.txt  15.2397  372  0.040967
+sim/features_noint_F.txt  77.2656  400  0.193164
+sim/features_noint_M.txt  93.7312  588  0.159407
 ```
 
 Although the sites within features corresponding to A and B are much lower on