From 7f0db95d4d85294ff7057c361c57d4420c39db53 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Fri, 8 Aug 2025 14:55:34 -0700
Subject: [PATCH 1/2] src/radmeth/radmeth.cpp: some cleanup

---
 src/radmeth/radmeth.cpp | 98 ++++++++++++++++++-----------------------
 1 file changed, 44 insertions(+), 54 deletions(-)
diff --git a/src/radmeth/radmeth.cpp b/src/radmeth/radmeth.cpp
index 0fa8fcbe..b5b5ba09 100644
--- a/src/radmeth/radmeth.cpp
+++ b/src/radmeth/radmeth.cpp
@@ -1,9 +1,7 @@
-/* Copyright (C) 2013-2023 University of Southern California and
- *                         Egor Dolzhenko
- *                         Andrew D Smith
- *                         Guilherme Sena
+/* Copyright (C) 2013-2025 Andrew D Smith
  *
- * Authors: Andrew D. Smith and Egor Dolzhenko and Guilherme Sena
+ * Author: Andrew D. Smith
+ * Contributors: Andrew D. Smith and Egor Dolzhenko and Guilherme Sena
  *
  * This program is free software: you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -16,6 +14,15 @@
  * General Public License for more details.
  */
 
+#include "radmeth_model.hpp"
+#include "radmeth_optimize.hpp"
+
+// smithlab headers
+#include "GenomicRegion.hpp"
+#include "OptionParser.hpp"
+#include "smithlab_os.hpp"
+#include "smithlab_utils.hpp"
+
 #include <gsl/gsl_cdf.h>  // GSL header for chisqrd distribution
 
 #include <algorithm>
@@ -29,20 +36,6 @@
 #include <thread>
 #include <vector>
 
-// smithlab headers
-#include "GenomicRegion.hpp"
-#include "OptionParser.hpp"
-#include "smithlab_os.hpp"
-#include "smithlab_utils.hpp"
-
-#include "radmeth_model.hpp"
-#include "radmeth_optimize.hpp"
-
-using std::begin;
-using std::cout;
-using std::distance;
-using std::end;
-
 struct file_progress {
   double one_hundred_over_filesize{};
   std::size_t prev_offset{};
@@ -67,7 +60,6 @@ remove_factor(Design &design, const std::size_t factor_idx) {
     design.matrix[i].erase(std::begin(design.matrix[i]) + factor_idx);
 }
 
-/***************** RADMETH ALGORITHM *****************/
 static bool
 consistent_sample_names(const Regression &reg, const std::string &header) {
   std::istringstream iss(header);
@@ -84,7 +76,7 @@ consistent_sample_names(const Regression &reg, const std::string &header) {
 // function outputs the p-value of the log-likelihood ratio. *Note* that it is
 // assumed that the reduced model has one fewer factor than the reduced model.
 static double
-llr_test(double null_loglik, double full_loglik) {
+llr_test(const double null_loglik, const double full_loglik) {
   // The log-likelihood ratio statistic.
   const double log_lik_stat = -2 * (null_loglik - full_loglik);
 
@@ -100,16 +92,15 @@ llr_test(double null_loglik, double full_loglik) {
 }
 
 static bool
-has_low_coverage(const Regression &reg, const std::size_t test_fac) {
+has_low_coverage(const Regression &reg, const std::size_t test_factor) {
   bool cvrd_in_test_fact_smpls = false;
+  const auto &tcol = reg.design.tmatrix[test_factor];
   for (std::size_t i = 0; i < reg.n_samples() && !cvrd_in_test_fact_smpls; ++i)
-    cvrd_in_test_fact_smpls =
-      (reg.design.matrix[i][test_fac] == 1 && reg.props.mc[i].n_reads != 0);
+    cvrd_in_test_fact_smpls = (tcol[i] == 1 && reg.props.mc[i].n_reads != 0);
 
   bool cvrd_in_other_smpls = false;
   for (std::size_t i = 0; i < reg.n_samples() && !cvrd_in_other_smpls; ++i)
-    cvrd_in_other_smpls =
-      (reg.design.matrix[i][test_fac] != 1 && reg.props.mc[i].n_reads != 0);
+    cvrd_in_other_smpls = (tcol[i] != 1 && reg.props.mc[i].n_reads != 0);
 
   return !cvrd_in_test_fact_smpls || !cvrd_in_other_smpls;
 }
@@ -131,9 +122,9 @@ has_extreme_counts(const Regression &reg) {
 
 [[nodiscard]] static bool
 has_two_values(const Regression &reg, const std::size_t test_factor) {
-  const auto &v = reg.design.tmatrix[test_factor];
-  for (auto i = std::cbegin(v); i != std::cend(v); ++i)
-    if (*i != v[0])
+  const auto &tcol = reg.design.tmatrix[test_factor];
+  for (const auto x : tcol)
+    if (x != tcol[0])
       return true;
   return false;
 }
@@ -145,7 +136,7 @@ get_test_factor_idx(const Regression &model, const std::string &test_factor) {
     std::find(std::cbegin(factors), std::cend(factors), test_factor);
 
   if (itr == std::cend(factors))
-    throw std::runtime_error("Factor not part of design: " + test_factor);
+    throw std::runtime_error("factor not part of design: " + test_factor);
 
   return std::distance(std::cbegin(factors), itr);
 }
@@ -154,16 +145,15 @@ static void
 read_design(const bool verbose, const std::string &design_filename,
             Design &design) {
   if (verbose)
-    std::cerr << "design table filename: " << design_filename << std::endl;
+    std::cerr << "design table filename: " << design_filename << '\n';
   std::ifstream design_file(design_filename);
   if (!design_file)
     throw std::runtime_error("could not open file: " + design_filename);
 
   // initialize full design matrix from file
   design_file >> design;
-
   if (verbose)
-    std::cerr << design << std::endl;
+    std::cerr << design << '\n';
 }
 
 enum class row_status : std::uint8_t {
@@ -174,11 +164,11 @@ enum class row_status : std::uint8_t {
 };
 
 [[nodiscard]] static std::vector<double>
-drop_idx(const std::vector<double> &v, const std::size_t to_drop) {
+drop_idx(const std::vector<double> &v, const std::size_t idx_to_drop) {
   std::vector<double> u;
   u.reserve(std::size(v) - 1);
   for (auto i = 0u; i < std::size(v); ++i)
-    if (i != to_drop)
+    if (i != idx_to_drop)
       u.push_back(v[i]);
   return u;
 }
@@ -218,12 +208,16 @@ radmeth(const bool show_progress, const bool more_na_info,
   std::string sample_names_header;
   std::getline(table_file, sample_names_header);
 
-  if (!consistent_sample_names(alt_model, sample_names_header))
-    throw std::runtime_error(
-      "header:\n" + sample_names_header + "\n" +
-      "does not match factor names or their order in the\n"
-      "design matrix. Check that the design matrix and\n"
-      "the proportion table are correctly formatted.");
+  if (!consistent_sample_names(alt_model, sample_names_header)) {
+    // clang-format off
+    const auto message =
+      R"(
+does not match factor names or their order in the design matrix. Check
+that the design matrix and the proportion table are correctly formatted.
+)";
+    // clang-format on
+    throw std::runtime_error("header:\n" + sample_names_header + message);
+  }
 
   const std::size_t n_samples = alt_model.design.n_samples();
 
@@ -266,9 +260,9 @@ radmeth(const bool show_progress, const bool more_na_info,
             throw std::runtime_error("found row with wrong number of columns");
 
           const auto [p_val, status] = [&]() -> std::tuple<double, row_status> {
-            // Skip the test if (1) no coverage in all cases or in all controls,
-            // or (2) the site is completely methylated or completely
-            // unmethylated across all samples.
+            // Skip the test if (1) no coverage in all cases or in all
+            // controls, or (2) the site is completely methylated or
+            // completely unmethylated across all samples.
             if (has_low_coverage(t_alt_model, test_factor_idx))
               return std::tuple{1.0, row_status::na_low_cov};
 
@@ -336,11 +330,11 @@ radmeth(const bool show_progress, const bool more_na_info,
             cursor += n_pval_bytes;
 
             // clang-format off
-          const int n_suffix_bytes = std::sprintf(cursor, suffix_fmt,
-                                                  n_reads_factor,
-                                                  n_meth_factor,
-                                                  n_reads_others,
-                                                  n_meth_others);
+            const int n_suffix_bytes = std::sprintf(cursor, suffix_fmt,
+                                                    n_reads_factor,
+                                                    n_meth_factor,
+                                                    n_reads_others,
+                                                    n_meth_others);
             // clang-format on
             if (n_suffix_bytes < 0)
               return n_suffix_bytes;
@@ -368,10 +362,6 @@ radmeth(const bool show_progress, const bool more_na_info,
   }
 }
 
-/***********************************************************************
- * Run beta-binoimial regression using the specified table with
- * proportions and design matrix
- */
 int
 main_radmeth(int argc, char *argv[]) {
   try {
@@ -455,7 +445,7 @@ main_radmeth(int argc, char *argv[]) {
             orig_alt_model, orig_null_model, test_factor_idx);
   }
   catch (const std::exception &e) {
-    std::cerr << "ERROR: " << e.what() << std::endl;
+    std::cerr << e.what() << '\n';
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;

From b366af136a867fa390073165c6a3be91c5ae1b32 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sat, 9 Aug 2025 19:29:00 -0700
Subject: [PATCH 2/2] src/utils/merge-methcounts.cpp: modernizing some code

---
 src/utils/merge-methcounts.cpp | 429 ++++++++++++++++-----------------
 1 file changed, 204 insertions(+), 225 deletions(-)

diff --git a/src/utils/merge-methcounts.cpp b/src/utils/merge-methcounts.cpp
index 08470b83..8aec4083 100644
--- a/src/utils/merge-methcounts.cpp
+++ b/src/utils/merge-methcounts.cpp
@@ -1,9 +1,9 @@
 /* merge-methcounts: a program for merging methcounts files
  *
- * Copyright (C) 2011-2022 University of Southern California and
+ * Copyright (C) 2011-2025 University of Southern California and
  *                         Andrew D. Smith
  *
- * Authors: Benjamin E Decato, Meng Zhou and Andrew D Smith
+ * Authors: Andrew D Smith
  *
  * This program is free software: you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -16,64 +16,59 @@
  * General Public License for more details.
  */
 
+#include "MSite.hpp"
+#include "OptionParser.hpp"
+#include "smithlab_os.hpp"
+#include "smithlab_utils.hpp"
+
+#include <bamxx.hpp>
+
+#include <algorithm>
 #include <cmath>
-#include <string>
-#include <vector>
+#include <fstream>
 #include <iostream>
 #include <iterator>
-#include <fstream>
-#include <algorithm>
+#include <memory>
 #include <numeric>
+#include <queue>
 #include <stdexcept>
-#include <unordered_set>
+#include <string>
 #include <unordered_map>
-#include <queue>
-
-#include <bamxx.hpp>
-
-#include "OptionParser.hpp"
-#include "smithlab_utils.hpp"
-#include "smithlab_os.hpp"
-#include "MSite.hpp"
-
-using std::string;
-using std::vector;
-using std::cout;
-using std::cerr;
-using std::endl;
-using std::runtime_error;
-using std::numeric_limits;
-using std::unordered_set;
-using std::unordered_map;
-
-using bamxx::bgzf_file;
+#include <unordered_set>
+#include <vector>
 
 static void
-set_invalid(MSite &s) {s.pos = numeric_limits<size_t>::max();}
+set_invalid(MSite &s) {
+  s.pos = std::numeric_limits<std::size_t>::max();
+}
 
 static bool
-is_valid(const MSite &s) {return s.pos != numeric_limits<size_t>::max();}
+is_valid(const MSite &s) {
+  return s.pos != std::numeric_limits<std::size_t>::max();
+}
 
-static bool
-any_sites_unprocessed(const vector<string> &filenames,
-                      const vector<bgzf_file*> &infiles,
-                      vector<bool> &outdated, vector<MSite> &sites) {
+[[nodiscard]] static bool
+any_sites_unprocessed(
+  const std::vector<std::string> &filenames,
+  const std::vector<std::unique_ptr<bamxx::bgzf_file>> &infiles,
+  std::vector<bool> &outdated, std::vector<MSite> &sites) {
 
-  const size_t n_files = sites.size();
+  const std::size_t n_files = std::size(sites);
 
   bool sites_remain = false;
-  for (size_t i = 0; i < n_files; ++i) {
+  for (std::size_t i = 0; i < n_files; ++i) {
     if (outdated[i]) {
       outdated[i] = false;
       MSite tmp_site;
       if (read_site(*infiles[i], tmp_site)) {
         // ADS: chrom order within a file already tested
         if (tmp_site.pos <= sites[i].pos && tmp_site.chrom == sites[i].chrom)
-          throw runtime_error("sites not sorted in " + filenames[i]);
+          throw std::runtime_error("sites not sorted in " + filenames[i]);
         sites_remain = true;
         sites[i] = tmp_site;
       }
-      else set_invalid(sites[i]);
+      else
+        set_invalid(sites[i]);
     }
     else if (is_valid(sites[i]))
       sites_remain = true;
@@ -81,42 +76,41 @@ any_sites_unprocessed(const vector<string> &filenames,
   return sites_remain;
 }
 
-
-static bool
-site_precedes(const vector<MSite> &sites,
-              const unordered_map<string, size_t> &chroms_order,
-              const size_t a, const size_t b) {
+[[nodiscard]] static bool
+site_precedes(const std::vector<MSite> &sites,
+              const std::unordered_map<std::string, std::size_t> &chroms_order,
+              const std::size_t a, const std::size_t b) {
   if (chroms_order.empty())
     return sites[a] < sites[b];
 
-  const size_t a_chr = chroms_order.find(sites[a].chrom)->second;
-  const size_t b_chr = chroms_order.find(sites[b].chrom)->second;
+  const std::size_t a_chr = chroms_order.find(sites[a].chrom)->second;
+  const std::size_t b_chr = chroms_order.find(sites[b].chrom)->second;
 
-  return ((a_chr < b_chr) ||
-          (a_chr == b_chr && sites[a].pos < sites[b].pos));
+  return a_chr < b_chr || (a_chr == b_chr && sites[a].pos < sites[b].pos);
 }
 
+[[nodiscard]] static std::size_t
+find_minimum_site(
+  const std::vector<MSite> &sites,
+  const std::unordered_map<std::string, std::size_t> &chroms_order,
+  const std::vector<bool> &outdated) {
 
-static size_t
-find_minimum_site(const vector<MSite> &sites,
-                  const unordered_map<string, size_t> &chroms_order,
-                  const vector<bool> &outdated) {
-
-  const size_t n_files = sites.size();
+  const std::size_t n_files = std::size(sites);
 
   // ms_id is the id of the minimum site, the next one to print
-  size_t ms_id = numeric_limits<size_t>::max();
+  std::size_t ms_id = std::numeric_limits<std::size_t>::max();
 
   // make sure there is at least one remaining site to print
-  for (size_t i = 0; i < n_files && ms_id == numeric_limits<size_t>::max(); ++i)
+  for (std::size_t i = 0;
+       i < n_files && ms_id == std::numeric_limits<std::size_t>::max(); ++i)
     if (is_valid(sites[i]) && !outdated[i])
       ms_id = i;
 
-  if (ms_id == numeric_limits<size_t>::max())
-    throw runtime_error("failed in a next site to print");
+  if (ms_id == std::numeric_limits<std::size_t>::max())
+    throw std::runtime_error("failed in a next site to print");
 
   // now find the earliest site to print among those that could be
-  for (size_t i = 0; i < n_files; ++i)
+  for (std::size_t i = 0; i < n_files; ++i)
     if (!outdated[i] && is_valid(sites[i]) &&
         site_precedes(sites, chroms_order, i, ms_id))
       ms_id = i;
@@ -124,23 +118,23 @@ find_minimum_site(const vector<MSite> &sites,
   return ms_id;
 }
 
-
-static bool
+[[nodiscard]] static bool
 same_location(const MSite &a, const MSite &b) {
   return a.chrom == b.chrom && a.pos == b.pos;
 }
 
-static size_t
-collect_sites_to_print(const vector<MSite> &sites,
-                       const unordered_map<string, size_t> &chroms_order,
-                       const vector<bool> &outdated,
-                       vector<bool> &to_print) {
+[[nodiscard]] static std::size_t
+collect_sites_to_print(
+  const std::vector<MSite> &sites,
+  const std::unordered_map<std::string, std::size_t> &chroms_order,
+  const std::vector<bool> &outdated, std::vector<bool> &to_print) {
 
-  const size_t n_files = sites.size();
+  const std::size_t n_files = std::size(sites);
 
-  const size_t min_site_idx = find_minimum_site(sites, chroms_order, outdated);
+  const std::size_t min_site_idx =
+    find_minimum_site(sites, chroms_order, outdated);
 
-  for (size_t i = 0; i < n_files; ++i)
+  for (std::size_t i = 0; i < n_files; ++i)
     // condition below covers "is_valid(sites[i])"
     if (same_location(sites[min_site_idx], sites[i]))
       to_print[i] = true;
@@ -148,41 +142,35 @@ collect_sites_to_print(const vector<MSite> &sites,
   return min_site_idx;
 }
 
-
-static bool
-any_mutated(const vector<bool> &to_print,
-            const vector<MSite> &sites) {
-  size_t i = 0;
-  while (i < sites.size() && !(to_print[i] && sites[i].is_mutated()))
+[[nodiscard]] static bool
+any_mutated(const std::vector<bool> &to_print,
+            const std::vector<MSite> &sites) {
+  std::size_t i = 0;
+  while (i < std::size(sites) && !(to_print[i] && sites[i].is_mutated()))
     ++i;
-  return (i < sites.size());
+  return (i < std::size(sites));
 }
 
-
 static void
 write_line_for_tabular(const bool write_fractional,
                        const bool report_any_mutated,
-                       const size_t min_reads,
-                       std::ostream &out,
-                       const vector<bool> &to_print,
-                       const vector<MSite> &sites,
-                       MSite min_site) {
+                       const std::size_t min_reads, std::ostream &out,
+                       const std::vector<bool> &to_print,
+                       const std::vector<MSite> &sites, MSite min_site) {
 
-  const size_t n_files = sites.size();
+  const std::size_t n_files = std::size(sites);
 
   min_site.set_unmutated();
   if (report_any_mutated && any_mutated(to_print, sites))
     min_site.set_mutated();
 
   // ADS: is this the format we want for the row names?
-  out << min_site.chrom << ':'
-      << min_site.pos << ':'
-      << min_site.strand << ':'
+  out << min_site.chrom << ':' << min_site.pos << ':' << min_site.strand << ':'
       << min_site.context;
 
   if (write_fractional) {
-    for (size_t i = 0; i < n_files; ++i) {
-      const size_t r = sites[i].n_reads;
+    for (std::size_t i = 0; i < n_files; ++i) {
+      const std::size_t r = sites[i].n_reads;
       if (to_print[i] && r >= min_reads)
         out << '\t' << sites[i].meth;
       else
@@ -190,22 +178,21 @@ write_line_for_tabular(const bool write_fractional,
     }
   }
   else
-    for (size_t i = 0; i < n_files; ++i) {
+    for (std::size_t i = 0; i < n_files; ++i) {
       if (to_print[i])
         out << '\t' << sites[i].n_reads << '\t' << sites[i].n_meth();
-      else out << '\t' << 0 << '\t'<< 0;
+      else
+        out << '\t' << 0 << '\t' << 0;
     }
   out << '\n';
 }
 
 static void
-write_line_for_merged_counts(std::ostream &out,
-                             const bool report_any_mutated,
-                             const vector<bool> &to_print,
-                             const vector<MSite> &sites,
-                             MSite min_site) {
+write_line_for_merged_counts(std::ostream &out, const bool report_any_mutated,
+                             const std::vector<bool> &to_print,
+                             const std::vector<MSite> &sites, MSite min_site) {
 
-  const size_t n_files = sites.size();
+  const std::size_t n_files = std::size(sites);
 
   min_site.set_unmutated();
   if (report_any_mutated && any_mutated(to_print, sites))
@@ -213,49 +200,50 @@ write_line_for_merged_counts(std::ostream &out,
 
   double meth_sum = 0;
   min_site.n_reads = 0;
-  for (size_t i = 0; i < n_files; ++i)
+  for (std::size_t i = 0; i < n_files; ++i)
     if (to_print[i]) {
       meth_sum += sites[i].n_meth();
       min_site.n_reads += sites[i].n_reads;
     }
-  min_site.meth = meth_sum/std::max(1ul, min_site.n_reads);
+  min_site.meth = meth_sum / std::max(1ul, min_site.n_reads);
 
   out << min_site << '\n';
 }
 
-
-static string
+[[nodiscard]] static std::string
 remove_extension(const std::string &filename) {
-  const size_t last_dot = filename.find_last_of(".");
-  if (last_dot == std::string::npos) return filename;
-  else return filename.substr(0, last_dot);
+  const std::size_t last_dot = filename.find_last_of(".");
+  if (last_dot == std::string::npos)
+    return filename;
+  else
+    return filename.substr(0, last_dot);
 }
 
-
-static string
-remove_suffix(const string &suffix, const std::string &filename) {
-  if (filename.substr(filename.size() - suffix.size(), suffix.size()) == suffix)
-    return filename.substr(0, filename.size() - suffix.size());
+[[nodiscard]] static std::string
+remove_suffix(const std::string &suffix, const std::string &filename) {
+  if (filename.substr(std::size(filename) - std::size(suffix),
+                      std::size(suffix)) == suffix)
+    return filename.substr(0, std::size(filename) - std::size(suffix));
   return filename;
 }
 
-
 static void
-get_orders_by_file(const string &filename, vector<string> &chroms_order) {
-
-  bgzf_file in(filename, "r");
-  if (!in) throw runtime_error("bad file: " + filename);
+get_orders_by_file(const std::string &filename,
+                   std::vector<std::string> &chroms_order) {
+  bamxx::bgzf_file in(filename, "r");
+  if (!in)
+    throw std::runtime_error("bad file: " + filename);
   chroms_order.clear();
 
-  unordered_set<string> chroms_seen;
-  string line;
-  string prev_chrom;
+  std::unordered_set<std::string> chroms_seen;
+  std::string line;
+  std::string prev_chrom;
 
   while (getline(in, line)) {
     line.resize(line.find_first_of(" \t"));
     if (line != prev_chrom) {
-      if (chroms_seen.find(line) != end(chroms_seen))
-        throw runtime_error("chroms out of order in: " + filename);
+      if (chroms_seen.find(line) != std::cend(chroms_seen))
+        throw std::runtime_error("chroms out of order in: " + filename);
       chroms_seen.insert(line);
       chroms_order.push_back(line);
       std::swap(line, prev_chrom);
@@ -263,94 +251,88 @@ get_orders_by_file(const string &filename, vector<string> &chroms_order) {
   }
 }
 
-
 static void
-get_chroms_order(const vector<string> &filenames,
-                unordered_map<string, size_t> &chroms_order) {
+get_chroms_order(const std::vector<std::string> &filenames,
+                 std::unordered_map<std::string, std::size_t> &chroms_order) {
 
   // get order of chroms in each file
-  vector<vector<string>> orders(filenames.size());
-  for (size_t i = 0; i < filenames.size(); ++i)
+  std::vector<std::vector<std::string>> orders(std::size(filenames));
+  for (std::size_t i = 0; i < std::size(filenames); ++i)
     get_orders_by_file(filenames[i], orders[i]);
 
   // get the union of chrom sets
-  unordered_set<string> the_union;
-  for (size_t i = 0; i < orders.size(); ++i)
-    for (size_t j = 0; j < orders[i].size(); ++j)
+  std::unordered_set<std::string> the_union;
+  for (std::size_t i = 0; i < std::size(orders); ++i)
+    for (std::size_t j = 0; j < std::size(orders[i]); ++j)
       the_union.insert(orders[i][j]);
 
   // get an adjacency list and in-degree for each node
-  unordered_map<string, vector<string>> adj_lists;
-  unordered_map<string, size_t> in_degree;
+  std::unordered_map<std::string, std::vector<std::string>> adj_lists;
+  std::unordered_map<std::string, std::size_t> in_degree;
   for (auto &&i : the_union) {
     in_degree[i] = 0;
-    adj_lists[i] = vector<string>();
+    adj_lists[i] = std::vector<std::string>();
   }
   for (auto &&i : orders) {
-    auto j = begin(i);
-    for (auto k = j + 1; k != end(i); ++j, ++k) {
+    auto j = std::cbegin(i);
+    for (auto k = j + 1; k != std::cend(i); ++j, ++k) {
       adj_lists[*j].push_back(*k);
       ++in_degree[*k];
     }
   }
 
-  std::queue<string> q; // invariant: nodes with no incoming edge
+  std::queue<std::string> q;  // invariant: nodes with no incoming edge
   for (auto &&i : the_union)
     if (in_degree[i] == 0)
       q.push(i);
 
   while (!q.empty()) {
-    const string u = q.front();
+    const std::string u = q.front();
     q.pop();
 
     // iterate over the edges (u, v)
     for (auto &&v : adj_lists[u]) {
-      --in_degree[v]; // degree should not appear here as 0
-      if (in_degree[v] == 0) // this should only happen once per v
+      --in_degree[v];         // degree should not appear here as 0
+      if (in_degree[v] == 0)  // this should only happen once per v
         q.push(v);
     }
-    adj_lists[u].clear(); // delete node; already had in_degree 0
+    adj_lists[u].clear();  // delete node; already had in_degree 0
 
-    chroms_order.insert(make_pair(u, chroms_order.size()));
+    chroms_order.insert(std::make_pair(u, std::size(chroms_order)));
   }
 
   // finally, make sure we found a consistent order
   for (auto &&i : adj_lists)
     if (!i.second.empty())
-      throw runtime_error("inconsistent order of chroms between files");
+      throw std::runtime_error("inconsistent order of chroms between files");
 }
 
-
-
 /*
-  This utility does two things, and they are grouped together here
-  because of how they are done, not because the uses are related. (1)
-  merge-methcounts can take a set of methcounts output files and
-  combine them into one. There are several reasons a user might want
-  to do this. An example is when technical replicates are performed,
-  and analyzed separately to understand technical variance (e.g.,
-  between sequencing runs or library preps). After examining the
-  technical variation, subsequent analyses might be best conducted on
-  all the data together. So all the methcounts files can be combined
-  into one using merge-methcounts. In this case, the coverage at any
-  site is the sum of the coverages in the original methcounts files,
-  and the methylation level at any site is the weighted mean. (2)
-  merge-methcounts can take a set of methcounts output files, and
-  create a table that contains all the same information. The table
-  format is helpful if subsequent analyses are best done using a data
-  table, for example in R. When producing a tabular format,
-  merge-methcounts allows the user to select whether the desired
+  This utility does two things, and they are grouped together here because of
+  how they are done, not because the uses are related. (1) merge-methcounts
+  can take a set of methcounts output files and combine them into one. There
+  are several reasons a user might want to do this. An example is when
+  technical replicates are performed, and analyzed separately to understand
+  technical variance (e.g., between sequencing runs or library preps). After
+  examining the technical variation, subsequent analyses might be best
+  conducted on all the data together. So all the methcounts files can be
+  combined into one using merge-methcounts. In this case, the coverage at any
+  site is the sum of the coverages in the original methcounts files, and the
+  methylation level at any site is the weighted mean. (2) merge-methcounts can
+  take a set of methcounts output files, and create a table that contains all
+  the same information. The table format is helpful if subsequent analyses are
+  best done using a data table, for example in R. When producing a tabular
+  format, merge-methcounts allows the user to select whether the desired
   output is in counts or fractions.
  */
 int
 main_merge_methcounts(int argc, char *argv[]) {
-
   try {
 
-    static const string description = "merge multiple methcounts files";
+    static const std::string description = "merge multiple methcounts files";
 
-    string outfile;
-    bool VERBOSE;
+    std::string outfile;
+    bool verbose = false;
     bool report_any_mutated = false;
     bool write_tabular_format = false;
     bool write_fractional = false;
@@ -358,25 +340,27 @@ main_merge_methcounts(int argc, char *argv[]) {
     bool ignore_chroms_order = false;
     bool add_first_column_header = false;
 
-    string header_info;
-    string column_name_suffix = "RM";
-    string suffix_to_remove;
+    std::string header_info;
+    std::string column_name_suffix = "RM";
+    std::string suffix_to_remove;
 
-    size_t min_reads = 1;
+    std::size_t min_reads = 1;
 
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(strip_path(argv[0]), description,
                            "<methcounts-files>");
     opt_parse.add_opt("output", 'o', "output file name (default: stdout)",
                       false, outfile);
-    opt_parse.add_opt("header", 'H',"header to print (ignored for tabular)",
+    opt_parse.add_opt("header", 'H', "header to print (ignored for tabular)",
                       false, header_info);
-    opt_parse.add_opt("tabular", 't', "output as table",
-                      false, write_tabular_format);
-    opt_parse.add_opt("radmeth", '\0', "make radmeth input "
+    opt_parse.add_opt("tabular", 't', "output as table", false,
+                      write_tabular_format);
+    opt_parse.add_opt("radmeth", '\0',
+                      "make radmeth input "
                       "(use with tabular and without fractional)",
                       false, radmeth_format);
-    opt_parse.add_opt("remove", '\0', "remove this suffix from filenames when "
+    opt_parse.add_opt("remove", '\0',
+                      "remove this suffix from filenames when "
                       "making column names; default removes from final dot",
                       false, suffix_to_remove);
     opt_parse.add_opt("suff", 's',
@@ -386,117 +370,119 @@ main_merge_methcounts(int argc, char *argv[]) {
                       false, column_name_suffix);
     opt_parse.add_opt("fractional", 'f', "output fractions (requires tabular)",
                       false, write_fractional);
-    opt_parse.add_opt("reads", 'r', "min reads (for fractional)",
-                      false, min_reads);
-    opt_parse.add_opt("ignore", '\0',"Do not attempt to determine chromosome. "
+    opt_parse.add_opt("reads", 'r', "min reads (for fractional)", false,
+                      min_reads);
+    opt_parse.add_opt("ignore", '\0',
+                      "Do not attempt to determine chromosome. "
                       "Lexicographic order will be used.",
                       false, ignore_chroms_order);
-    opt_parse.add_opt("mut", 'm',"If any of the sites being merged indicates "
+    opt_parse.add_opt("mut", 'm',
+                      "If any of the sites being merged indicates "
                       "mutated, mark the result has mutated.",
                       false, report_any_mutated);
-    opt_parse.add_opt("1st-col-header", '\0',"add name for 1st col in header",
+    opt_parse.add_opt("1st-col-header", '\0', "add name for 1st col in header",
                       false, add_first_column_header);
-    opt_parse.add_opt("verbose", 'v',"print more run info", false, VERBOSE);
+    opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
     opt_parse.set_show_defaults();
-    vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << endl
-           << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.help_message() << '\n'
+                << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << endl;
+      std::cerr << opt_parse.option_missing_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (write_fractional && !write_tabular_format) {
-      cerr << "fractional output only available for tabular format" << endl;
+      std::cerr << "fractional output only available for tabular format\n";
       return EXIT_SUCCESS;
     }
     if (leftover_args.empty()) {
-      cerr << opt_parse.help_message() << endl;
+      std::cerr << opt_parse.help_message() << '\n';
       return EXIT_SUCCESS;
     }
-    if (column_name_suffix.size() != 2) {
-      cerr << "column name suffix must be 2 letters" << endl;
+    if (std::size(column_name_suffix) != 2) {
+      std::cerr << "column name suffix must be 2 letters\n";
       return EXIT_SUCCESS;
     }
-    vector<string> meth_files(leftover_args);
+    std::vector<std::string> meth_files(leftover_args);
     /****************** END COMMAND LINE OPTIONS *****************/
 
-    unordered_map<string, size_t> chroms_order;
+    std::unordered_map<std::string, std::size_t> chroms_order;
     if (!ignore_chroms_order) {
-      if (VERBOSE)
-        cerr << "resolving chromosome order" << endl;
+      if (verbose)
+        std::cerr << "resolving chromosome order\n";
       get_chroms_order(meth_files, chroms_order);
-      if (VERBOSE) {
-        cerr << "chromosome order" << endl;
-        vector<string> v(chroms_order.size());
+      if (verbose) {
+        std::cerr << "chromosome order\n";
+        std::vector<std::string> v(std::size(chroms_order));
         for (auto &&i : chroms_order)
           v[i.second] = i.first;
         for (auto &&i : v)
-          cerr << i << endl;
+          std::cerr << i << '\n';
       }
     }
 
-    const size_t n_files = meth_files.size();
+    const std::size_t n_files = std::size(meth_files);
 
-    vector<bgzf_file*> infiles(n_files);
-    for (size_t i = 0; i < n_files; ++i) {
-      infiles[i] = new bgzf_file(meth_files[i], "r");
+    std::vector<std::unique_ptr<bamxx::bgzf_file>> infiles(n_files);
+    for (std::size_t i = 0; i < n_files; ++i) {
+      infiles[i] = std::make_unique<bamxx::bgzf_file>(meth_files[i], "r");
       if (!(*infiles[i]))
-        throw runtime_error("cannot open file: " + meth_files[i]);
+        throw std::runtime_error("cannot open file: " + meth_files[i]);
     }
 
     std::ofstream of;
-    if (!outfile.empty()) of.open(outfile);
-    std::ostream out(outfile.empty() ? cout.rdbuf() : of.rdbuf());
+    if (!outfile.empty())
+      of.open(outfile);
+    std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
 
     // print header if user specifies or if tabular output format
     if (write_tabular_format) {
 
-      vector<string> colnames;
+      std::vector<std::string> colnames;
       for (const auto &i : meth_files)
         colnames.push_back(strip_path(i));
 
       for (auto &i : colnames)
-        i = suffix_to_remove.empty() ?
-          remove_extension(i) : remove_suffix(suffix_to_remove, i);
+        i = suffix_to_remove.empty() ? remove_extension(i)
+                                     : remove_suffix(suffix_to_remove, i);
 
       if (!write_fractional && !radmeth_format) {
-        vector<string> tmp;
+        std::vector<std::string> tmp;
         for (auto &&i : colnames) {
           tmp.push_back(i + "_" + column_name_suffix[0]);
           tmp.push_back(i + "_" + column_name_suffix[1]);
         }
-        swap(tmp, colnames);
+        std::swap(tmp, colnames);
       }
 
       if (add_first_column_header)
         out << "site\t";
-      copy(begin(colnames), end(colnames),
-           std::ostream_iterator<string>(out, "\t"));
-      out << endl;
+      std::copy(std::cbegin(colnames), std::cend(colnames),
+                std::ostream_iterator<std::string>(out, "\t"));
+      out << '\n';
     }
     else if (!header_info.empty())
-      out << "#" << header_info << endl;
+      out << "#" << header_info << '\n';
 
-    vector<MSite> sites(n_files);
-    vector<bool> outdated(n_files, true);
-    vector<bool> sites_to_print; // declared here to keep allocation
-    vector<unordered_set<string>> chroms_seen(n_files);
+    std::vector<MSite> sites(n_files);
+    std::vector<bool> outdated(n_files, true);
+    std::vector<bool> sites_to_print;  // declared here to keep allocation
+    std::vector<std::unordered_set<std::string>> chroms_seen(n_files);
 
     while (any_sites_unprocessed(meth_files, infiles, outdated, sites)) {
-
       sites_to_print.clear();
       sites_to_print.resize(n_files, false);
 
       // below idx is one index among the sites to print
-      const size_t idx =
+      const std::size_t idx =
         collect_sites_to_print(sites, chroms_order, outdated, sites_to_print);
 
       // output the appropriate sites' data
@@ -504,21 +490,14 @@ main_merge_methcounts(int argc, char *argv[]) {
         write_line_for_tabular(write_fractional, report_any_mutated, min_reads,
                                out, sites_to_print, sites, sites[idx]);
       else
-        write_line_for_merged_counts(out, report_any_mutated,
-                                     sites_to_print, sites, sites[idx]);
+        write_line_for_merged_counts(out, report_any_mutated, sites_to_print,
+                                     sites, sites[idx]);
 
-      swap(outdated, sites_to_print);
+      std::swap(outdated, sites_to_print);
     }
-
-    for (auto &&f : infiles)
-      delete f;
-  }
-  catch (const runtime_error &e)  {
-    cerr << e.what() << endl;
-    return EXIT_FAILURE;
   }
-  catch (std::bad_alloc &ba) {
-    cerr << "ERROR: could not allocate memory" << endl;
+  catch (const std::runtime_error &e) {
+    std::cerr << e.what() << '\n';
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;