From cef65e53a64906b53cbfb0521563d6e539df06bf Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Tue, 29 Jul 2025 18:47:57 -0700
Subject: [PATCH 1/2] Updating coding style in several sources

---
 src/utils/kmersites.cpp | 198 +++++++++++--------
 src/utils/recovered.cpp | 290 +++++++++++++--------------
 src/utils/uniq.cpp      | 311 ++++++++++++++---------------
 src/utils/unxcounts.cpp | 422 ++++++++++++++++++++--------------------
 4 files changed, 624 insertions(+), 597 deletions(-)
diff --git a/src/utils/kmersites.cpp b/src/utils/kmersites.cpp
index 19ab4fc4..06014369 100644
--- a/src/utils/kmersites.cpp
+++ b/src/utils/kmersites.cpp
@@ -1,6 +1,6 @@
-/* kmersites: a program to generate a wiggle format file (using the
- * UCSC Genome Browser wiggle format) to indicate the location of
- * sites matching a specific k-mer
+/* kmersites: a program to generate a wiggle format file (using the UCSC
+ * Genome Browser wiggle format) to indicate the location of sites matching a
+ * specific k-mer
  *
  * Copyright (C) 2023 Andrew D. Smith
  *
@@ -17,36 +17,26 @@
  * General Public License for more details.
  */
 
-#include <string>
-#include <vector>
+#include <algorithm>
+#include <cstdint>  // for [u]int[0-9]+_t
+#include <filesystem>
+#include <fstream>
 #include <iostream>
+#include <iterator>
 #include <numeric>
 #include <stdexcept>
-#include <cstdint> // for [u]int[0-9]+_t
-#include <filesystem>
-#include <iterator>
-#include <algorithm>
+#include <string>
+#include <vector>
 
 #include "OptionParser.hpp"
-#include "dnmt_error.hpp"
 #include "smithlab_os.hpp"
 
 #include <bamxx.hpp>
 
-namespace fs = std::filesystem;
-
-using bamxx::bgzf_file;
-
-using std::string;
-using std::vector;
-using std::cerr;
-using std::endl;
-using std::to_string;
-
 static inline auto
-process_chrom_wig(const string &kmer, const int offset, const string &name,
-                  const string &chrom, bgzf_file &out) -> void {
-
+process_chrom_wig(const std::string &kmer, const int offset,
+                  const std::string &name, const std::string &chrom,
+                  bamxx::bgzf_file &out) -> void {
   static const auto variable_step_chrom_header = "variableStep chrom=";
 
   out.write(variable_step_chrom_header + name + "\n");
@@ -54,138 +44,176 @@ process_chrom_wig(const string &kmer, const int offset, const string &name,
   const auto kmer_size = size(kmer);
   const auto chrom_size = size(chrom);
   if (kmer_size > chrom_size)
-    throw dnmt_error("kmer size " + to_string(kmer_size) +
-                     " larger than chrom size " + to_string(chrom_size));
+    throw std::runtime_error("kmer size " + std::to_string(kmer_size) +
+                             " larger than chrom size " +
+                             std::to_string(chrom_size));
 
-  const auto beg_kmer = cbegin(kmer);
-  const auto end_kmer = cend(kmer);
+  const auto beg_kmer = std::cbegin(kmer);
+  const auto end_kmer = std::cend(kmer);
 
-  const auto end_chrom = cend(chrom);
-  auto chrom_itr = cbegin(chrom);
+  const auto end_chrom = std::cend(chrom);
+  auto chrom_itr = std::cbegin(chrom);
   auto chrom_itr_k = chrom_itr + kmer_size;
 
   auto pos = 0;
   while (chrom_itr_k != end_chrom) {
     if (std::equal(beg_kmer, end_kmer, chrom_itr++, chrom_itr_k++))
-      out.write(to_string(pos + offset) + "\t1\n");
+      out.write(std::to_string(pos + offset) + "\t1\n");
     ++pos;
   }
 }
 
+[[nodiscard]] static auto
+read_fasta_file(const std::string &filename)
+  -> std::tuple<std::vector<std::string>, std::vector<std::string>> {
+
+  std::ifstream in(filename);
+  if (!in)
+    throw std::runtime_error("cannot open input file " + filename);
+
+  std::vector<std::string> names;
+  std::vector<std::string> sequences;
+
+  std::string line;
+  while (std::getline(in, line)) {
+    if (line[0] == '>') {
+      const auto first_space = line.find_first_of(" \t", 1);
+      if (first_space == std::string::npos)
+        names.push_back(line.substr(1));
+      else
+        names.push_back(line.substr(1, first_space - 1));
+      sequences.emplace_back();
+    }
+    else
+      sequences.back() += line;
+  }
+  return {names, sequences};
+}
+
 static inline auto
-process_chrom_with_named_lines(const string &kmer, const int offset,
-                               const string &name, const string &chrom,
-                               bgzf_file &out) -> void {
+process_chrom_with_named_lines(const std::string &kmer, const int offset,
+                               const std::string &name,
+                               const std::string &chrom,
+                               bamxx::bgzf_file &out) {
 
   const auto kmer_size = size(kmer);
   const auto chrom_size = size(chrom);
   if (kmer_size > chrom_size)
-    throw dnmt_error("kmer size " + to_string(kmer_size) +
-                     " larger than chrom size " + to_string(chrom_size));
+    throw std::runtime_error("kmer size " + std::to_string(kmer_size) +
+                             " larger than chrom size " +
+                             std::to_string(chrom_size));
 
-  const auto beg_kmer = cbegin(kmer);
-  const auto end_kmer = cend(kmer);
+  const auto beg_kmer = std::cbegin(kmer);
+  const auto end_kmer = std::cend(kmer);
 
-  const auto end_chrom = cend(chrom);
-  auto chrom_itr = cbegin(chrom);
+  const auto end_chrom = std::cend(chrom);
+  auto chrom_itr = std::cbegin(chrom);
   auto chrom_itr_k = chrom_itr + kmer_size;
 
   auto pos = 0;
   while (chrom_itr_k != end_chrom) {
     if (std::equal(beg_kmer, end_kmer, chrom_itr++, chrom_itr_k++))
-      out.write(name + "\t" + to_string(pos + offset) + "\t1\n");
+      out.write(name + "\t" + std::to_string(pos + offset) + "\t1\n");
     ++pos;
   }
 }
 
+[[nodiscard]] static inline auto
+bad_dna_kmer(const std::string &kmer) -> bool {
+  const auto x =
+    std::find_if(std::cbegin(kmer), std::cend(kmer), [](const auto c) {
+      return c != 'A' && c != 'C' && c != 'G' && c != 'T';
+    });
+  return x != std::cend(kmer);
+}
+
 auto
 kmersites(const int argc, char *argv[]) -> int {
   try {
 
-    bool verbose = false;
-    bool show_progress = false;
-    bool compress_output = false;
-    bool name_each_line = false;
+    bool verbose{false};
+    bool show_progress{false};
+    bool compress_output{false};
+    bool name_each_line{false};
 
-    string kmer = "CG";
-    string outfile;
-    // int n_threads = 1;
+    std::string kmer = "CG";
+    std::string outfile;
     int offset = 1;
 
     /****************** COMMAND LINE OPTIONS ********************/
-    OptionParser opt_parse(fs::path(string(*argv)).filename(),
-                           "get sites matching kmer",
+    OptionParser opt_parse("dnmtools kmersites", "get sites matching kmer",
                            "<fasta-file>");
-    // opt_parse.add_opt("threads", 't', "threads to use (few needed)",
-    //                   false, n_threads);
     opt_parse.add_opt("output", 'o', "output file name (default: stdout)",
                       false, outfile);
-    opt_parse.add_opt("offset", 'O', "offset within kmer to report",
-                      false, offset);
+    opt_parse.add_opt("offset", 'O', "offset within kmer to report", false,
+                      offset);
     opt_parse.add_opt("kmer", 'k', "kmer to report", false, kmer);
     opt_parse.add_opt("zip", 'z', "output gzip format", false, compress_output);
     opt_parse.add_opt("name-each-line", '\0', "name each line with chrom",
                       false, name_each_line);
     opt_parse.add_opt("progress", '\0', "show progress", false, show_progress);
     opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
-    vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (opt_parse.about_requested() || opt_parse.help_requested() ||
         leftover_args.empty()) {
-      cerr << opt_parse.help_message() << endl
-           << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.help_message() << '\n'
+                << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << endl;
+      std::cerr << opt_parse.option_missing_message() << '\n';
       return EXIT_SUCCESS;
     }
-    const string chroms_file = leftover_args.front();
+    const std::string chroms_file = leftover_args.front();
     /****************** END COMMAND LINE OPTIONS *****************/
 
-    if (offset < 0)
-      throw dnmt_error("offset must be non-negative (specified=" +
-                       to_string(offset));
+    if (bad_dna_kmer(kmer)) {
+      std::cerr << "invalid DNA kmer: " << kmer << "\n";
+      return EXIT_FAILURE;
+    }
 
-    // if (n_threads < 0)
-    //   throw dnmt_error("thread count cannot be negative");
+    if (offset < 0)
+      throw std::runtime_error("offset must be non-negative (specified=" +
+                               std::to_string(offset) + ")");
 
     std::ostringstream cmd;
-    copy(argv, argv + argc, std::ostream_iterator<const char *>(cmd, " "));
+    std::copy(argv, argv + argc, std::ostream_iterator<const char *>(cmd, " "));
 
     // file types from HTSlib use "-" for the filename to go to stdout
-    if (outfile.empty()) outfile = "-";
+    if (outfile.empty())
+      outfile = "-";
 
     if (verbose)
-      cerr << "[input fastq file: " << chroms_file << "]" << endl
-           << "[output file: " << outfile << "]" << endl
-           << "[output format: " << (compress_output ? "bgzf" : "text") << "]"
-           << endl
-           // << "[threads requested: " << n_threads << "]" << endl
-           << "[k-mer to report: " << kmer << "]" << endl
-           << "[command line: \"" << cmd.str() << "\"]" << endl;
-
-    vector<string> names, chroms;
-    read_fasta_file_short_names(chroms_file, names, chroms);
+      std::cerr << "[input fastq file: " << chroms_file << "]\n"
+                << "[output file: " << outfile << "]\n"
+                << "[output format: " << (compress_output ? "bgzf" : "text")
+                << "]\n"
+                << "[k-mer sequence to report: " << kmer << "]\n"
+                << "[command line: " << cmd.str() << "]\n";
+
+    auto [names, chroms] = read_fasta_file(chroms_file);
     for (auto &chrom : chroms)
-      std::transform(cbegin(chrom), cend(chrom), begin(chrom),
+      std::transform(std::cbegin(chrom), std::cend(chrom), std::begin(chrom),
                      [](const char c) { return std::toupper(c); });
 
     // open the output file
-    const auto output_mode = compress_output ? "w" : "wu";
-    bamxx::bgzf_file out(outfile, output_mode);
-    if (!out) throw dnmt_error("error opening output file: " + outfile);
-
-    for (auto i = 0u; i < size(names); ++i) {
-      if (show_progress) cerr << "processing: " << names[i] << endl;
+    bamxx::bgzf_file out(outfile, compress_output ? "w" : "wu");
+    if (!out)
+      throw std::runtime_error("error opening output file: " + outfile);
+
+    auto chrom_itr = std::cbegin(chroms);
+    for (const auto &name : names) {
+      if (show_progress)
+        std::cerr << "processing: " << name << '\n';
       if (name_each_line)
-        process_chrom_with_named_lines(kmer, offset, names[i], chroms[i], out);
+        process_chrom_with_named_lines(kmer, offset, name, *chrom_itr++, out);
       else
-        process_chrom_wig(kmer, offset, names[i], chroms[i], out);
+        process_chrom_wig(kmer, offset, name, *chrom_itr++, out);
     }
   }
   catch (const std::exception &e) {
-    cerr << e.what() << endl;
+    std::cerr << e.what() << '\n';
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
diff --git a/src/utils/recovered.cpp b/src/utils/recovered.cpp
index 9775f9c4..7e834719 100644
--- a/src/utils/recovered.cpp
+++ b/src/utils/recovered.cpp
@@ -1,95 +1,86 @@
-/* recovered: for all sites not present in a counts file, add those
- * sites as non-covered and with the appropriate context.
+/* recovered: for all sites not present in a counts file, add those sites as
+ * non-covered and with the appropriate context.
  *
  * Copyright (C) 2023 Andrew D. Smith
  *
  * Authors: Andrew D. Smith
  *
- * This program is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, either version 3 of the
- * License, or (at your option) any later version.
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
  *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
  */
 
-#include <string>
-#include <vector>
-#include <iostream>
-#include <stdexcept>
-#include <unordered_map>
-#include <unordered_set>
+#include "MSite.hpp"
+#include "bsutils.hpp"
+#include "counts_header.hpp"
+
 #include <bamxx.hpp>
 
 // from smithlab_cpp
 #include "OptionParser.hpp"
-#include "smithlab_utils.hpp"
 #include "smithlab_os.hpp"
-#include "bsutils.hpp"
-#include "dnmt_error.hpp"
-#include "counts_header.hpp"
-
-#include "MSite.hpp"
-
-using std::string;
-using std::vector;
-using std::cout;
-using std::cerr;
-using std::endl;
-using std::unordered_map;
-using std::unordered_set;
-using std::pair;
-using std::numeric_limits;
-using std::runtime_error;
-
-using bamxx::bgzf_file;
+#include "smithlab_utils.hpp"
 
-template<typename T> using num_lim = std::numeric_limits<T>;
+#include <cstdint>
+#include <iostream>
+#include <stdexcept>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
 
 static void
-verify_chrom_orders(const bool verbose, const uint32_t n_threads,
-                    const string &filename,
-                    const unordered_map<string, int32_t> &chroms_order) {
-  bgzf_file in(filename, "r");
-  if (!in) throw runtime_error("bad file: " + filename);
+verify_chrom_orders(
+  const bool verbose, const std::uint32_t n_threads,
+  const std::string &filename,
+  const std::unordered_map<std::string, std::int32_t> &chroms_order) {
+  bamxx::bgzf_file in(filename, "r");
+  if (!in)
+    throw std::runtime_error("bad file: " + filename);
 
   bamxx::bam_tpool tp(n_threads);
   // set the threads for the input file decompression
   if (n_threads > 1 && in.is_bgzf())
     tp.set_io(in);
 
-  unordered_set<int32_t> chroms_seen;
-  string line;
-  string prev_chrom;
+  std::unordered_set<std::int32_t> chroms_seen;
+  std::string line;
+  std::string prev_chrom;
 
-  int32_t prev_idx = -1;
+  std::int32_t prev_idx = -1;
 
   while (getline(in, line)) {
-    if (is_counts_header_line(line)) continue;
+    if (is_counts_header_line(line))
+      continue;
     line.resize(line.find_first_of(" \t"));
     if (line != prev_chrom) {
-      if (verbose) cerr << "verifying: " << line << endl;
+      if (verbose)
+        std::cerr << "verifying: " << line << "\n";
 
       const auto idx_itr = chroms_order.find(line);
       if (idx_itr == cend(chroms_order))
-        throw runtime_error("chrom not found genome file: " + line);
+        throw std::runtime_error("chrom not found genome file: " + line);
       const auto idx = idx_itr->second;
 
       if (chroms_seen.find(idx) != end(chroms_seen))
-        throw runtime_error("chroms out of order in: " + filename);
+        throw std::runtime_error("chroms out of order in: " + filename);
       chroms_seen.insert(idx);
 
       if (idx < prev_idx)
-        throw runtime_error("inconsistent chromosome order at: " + line);
+        throw std::runtime_error("inconsistent chromosome order at: " + line);
 
       prev_idx = idx;
       std::swap(line, prev_chrom);
     }
   }
-  if (verbose) cerr << "chrom orders are consistent" << endl;
+  if (verbose)
+    std::cerr << "chrom orders are consistent\n";
 }
 
 struct quick_buf : public std::ostringstream,
@@ -97,20 +88,21 @@ struct quick_buf : public std::ostringstream,
   // ADS: By user ecatmur on SO; very fast. Seems to work...
   quick_buf() {
     // ...but this seems to depend on data layout
-    static_cast<std::basic_ios<char>&>(*this).rdbuf(this);
+    static_cast<std::basic_ios<char> &>(*this).rdbuf(this);
   }
-  void clear() {
+  void
+  clear() {
     // reset buffer pointers (member functions)
     setp(pbase(), pbase());
   }
-  char const* c_str() {
+  char const *
+  c_str() {
     /* between c_str and insertion make sure to clear() */
     *pptr() = '\0';
     return pbase();
   }
 };
 
-
 /* The three functions below here should probably be moved into
    bsutils.hpp. I am not sure if the DDG function is needed, but it
    seems like if one considers strand, and the CHH is not symmetric,
@@ -118,132 +110,128 @@ struct quick_buf : public std::ostringstream,
    because he spent much time thinking about it in the context of
    plants. */
 static inline bool
-is_chh(const std::string &s, size_t i) {
-  return (i < (s.length() - 2)) &&
-    is_cytosine(s[i]) &&
-    !is_guanine(s[i + 1]) &&
-    !is_guanine(s[i + 2]);
+is_chh(const std::string &s, std::size_t i) {
+  return (i < (s.length() - 2)) && is_cytosine(s[i]) && !is_guanine(s[i + 1]) &&
+         !is_guanine(s[i + 2]);
 }
 
-
 static inline bool
-is_ddg(const std::string &s, size_t i) {
-  return (i < (s.length() - 2)) &&
-    !is_cytosine(s[i]) &&
-    !is_cytosine(s[i + 1]) &&
-    is_guanine(s[i + 2]);
+is_ddg(const std::string &s, std::size_t i) {
+  return (i < (s.length() - 2)) && !is_cytosine(s[i]) &&
+         !is_cytosine(s[i + 1]) && is_guanine(s[i + 2]);
 }
 
-
 static inline bool
-is_c_at_g(const std::string &s, size_t i) {
-  return (i < (s.length() - 2)) &&
-    is_cytosine(s[i]) &&
-    !is_cytosine(s[i + 1]) &&
-    !is_guanine(s[i + 1]) &&
-    is_guanine(s[i + 2]);
+is_c_at_g(const std::string &s, std::size_t i) {
+  return (i < (s.length() - 2)) && is_cytosine(s[i]) &&
+         !is_cytosine(s[i + 1]) && !is_guanine(s[i + 1]) &&
+         is_guanine(s[i + 2]);
 }
 
 /* The "tag" returned by this function should be exclusive, so that
  * the order of checking conditions doesn't matter. There is also a
  * bit of a hack in that the unsigned "pos" could wrap, but this still
  * works as long as the chromosome size is not the maximum size of a
- * size_t.
+ * std::size_t.
  */
-static inline uint32_t
-get_tag_from_genome_c(const string &s, const size_t pos) {
-  if (is_cpg(s, pos)) return 0;
-  else if (is_chh(s, pos)) return 1;
-  else if (is_c_at_g(s, pos)) return 2;
+static inline std::uint32_t
+get_tag_from_genome_c(const std::string &s, const std::size_t pos) {
+  if (is_cpg(s, pos))
+    return 0;
+  else if (is_chh(s, pos))
+    return 1;
+  else if (is_c_at_g(s, pos))
+    return 2;
   return 3;
 }
 
-static inline uint32_t
-get_tag_from_genome_g(const string &s, const size_t pos) {
-  if (is_cpg(s, pos - 1)) return 0;
-  else if (is_ddg(s, pos - 2)) return 1;
-  else if (is_c_at_g(s, pos - 2)) return 2;
+static inline std::uint32_t
+get_tag_from_genome_g(const std::string &s, const std::size_t pos) {
+  if (is_cpg(s, pos - 1))
+    return 0;
+  else if (is_ddg(s, pos - 2))
+    return 1;
+  else if (is_c_at_g(s, pos - 2))
+    return 2;
   return 3;
 }
 
 static const char *tag_values[] = {
-  "CpG", // 0
-  "CHH", // 1
-  "CXG", // 2
-  "CCG", // 3
-  "N"    // 4
+  "CpG",  // 0
+  "CHH",  // 1
+  "CXG",  // 2
+  "CCG",  // 3
+  "N"     // 4
 };
 
 static void
-write_missing_sites(const string &name, const string &chrom,
-                const uint64_t start_pos, const uint64_t end_pos,
-                bgzf_file &out) {
-  const string name_tab = name + "\t";
+write_missing_sites(const std::string &name, const std::string &chrom,
+                    const std::uint64_t start_pos, const std::uint64_t end_pos,
+                    bamxx::bgzf_file &out) {
+  const std::string name_tab = name + "\t";
   quick_buf buf;
   for (auto pos = start_pos; pos < end_pos; ++pos) {
     const char base = chrom[pos];
     if (is_cytosine(base) || is_guanine(base)) {
       const bool is_c = is_cytosine(base);
-      const uint32_t the_tag = is_c ? get_tag_from_genome_c(chrom, pos)
-                                    : get_tag_from_genome_g(chrom, pos);
+      const std::uint32_t the_tag = is_c ? get_tag_from_genome_c(chrom, pos)
+                                         : get_tag_from_genome_g(chrom, pos);
       buf.clear();
-      buf << name_tab << pos
-          << (is_c ? "\t+\t" : "\t-\t")
-          << tag_values[the_tag]
-          << "\t0\t0\n";
+      buf << name_tab << pos << (is_c ? "\t+\t" : "\t-\t")
+          << tag_values[the_tag] << "\t0\t0\n";
       if (!out.write(buf.c_str(), buf.tellp()))
-        throw dnmt_error("error writing output");
+        throw std::runtime_error("error writing output");
     }
   }
 }
 
 static void
-write_current_site(const MSite &site, bgzf_file &out) {
-  quick_buf buf; // keep underlying buffer space?
+write_current_site(const MSite &site, bamxx::bgzf_file &out) {
+  quick_buf buf;  // keep underlying buffer space?
   buf << site << '\n';
   if (!out.write(buf.c_str(), buf.tellp()))
-    throw dnmt_error("error writing site: " + site.tostring());
+    throw std::runtime_error("error writing site: " + site.tostring());
 }
 
-typedef vector<string>::const_iterator chrom_itr_t;
+typedef std::vector<std::string>::const_iterator chrom_itr_t;
 
 static chrom_itr_t
-get_chrom(const unordered_map<string, chrom_itr_t> &chrom_lookup,
-          const string &chrom_name) {
+get_chrom(const std::unordered_map<std::string, chrom_itr_t> &chrom_lookup,
+          const std::string &chrom_name) {
   const auto chrom_idx = chrom_lookup.find(chrom_name);
   if (chrom_idx == cend(chrom_lookup))
-    throw dnmt_error("chromosome not found: " + chrom_name);
+    throw std::runtime_error("chromosome not found: " + chrom_name);
   return chrom_idx->second;
 }
 
-static int32_t
-get_chrom_idx(const unordered_map<string, int32_t> &name_to_idx,
-              const string &chrom_name) {
+static std::int32_t
+get_chrom_idx(const std::unordered_map<std::string, std::int32_t> &name_to_idx,
+              const std::string &chrom_name) {
   const auto chrom_idx = name_to_idx.find(chrom_name);
   if (chrom_idx == cend(name_to_idx))
-    throw dnmt_error("chromosome not found: " + chrom_name);
+    throw std::runtime_error("chromosome not found: " + chrom_name);
   return chrom_idx->second;
 }
 
 static void
 process_sites(const bool verbose, const bool add_missing_chroms,
-              const bool compress_output, const size_t n_threads,
-              const string &infile, const string &outfile,
-              const string &chroms_file) {
+              const bool compress_output, const std::size_t n_threads,
+              const std::string &infile, const std::string &outfile,
+              const std::string &chroms_file) {
 
   // first get the chromosome names and sequences from the FASTA file
-  vector<string> chroms, names;
+  std::vector<std::string> chroms, names;
   read_fasta_file_short_names(chroms_file, names, chroms);
   for (auto &i : chroms)
     transform(cbegin(i), cend(i), begin(i),
               [](const char c) { return std::toupper(c); });
   if (verbose)
-    cerr << "[n chroms in reference: " << chroms.size() << "]" << endl;
+    std::cerr << "[n chroms in reference: " << chroms.size() << "]\n";
 
-  unordered_map<string, chrom_itr_t> chrom_lookup;
-  unordered_map<string, int32_t> name_to_idx;
-  vector<uint64_t> chrom_sizes(size(chroms), 0);
-  for (size_t i = 0; i < size(chroms); ++i) {
+  std::unordered_map<std::string, chrom_itr_t> chrom_lookup;
+  std::unordered_map<std::string, std::int32_t> name_to_idx;
+  std::vector<std::uint64_t> chrom_sizes(size(chroms), 0);
+  for (std::size_t i = 0; i < size(chroms); ++i) {
     chrom_lookup[names[i]] = cbegin(chroms) + i;
     name_to_idx[names[i]] = i;
     chrom_sizes[i] = size(chroms[i]);
@@ -255,11 +243,13 @@ process_sites(const bool verbose, const bool add_missing_chroms,
   bamxx::bam_tpool tp(n_threads);
 
   bamxx::bgzf_file in(infile, "r");
-  if (!in) throw dnmt_error("failed to open input file");
+  if (!in)
+    throw std::runtime_error("failed to open input file");
 
-  const string output_mode = compress_output ? "w" : "wu";
-  bgzf_file out(outfile, output_mode);
-  if (!out) throw dnmt_error("error opening output file: " + outfile);
+  const std::string output_mode = compress_output ? "w" : "wu";
+  bamxx::bgzf_file out(outfile, output_mode);
+  if (!out)
+    throw std::runtime_error("error opening output file: " + outfile);
 
   // set the threads for the input file decompression
   if (n_threads > 1) {
@@ -268,14 +258,14 @@ process_sites(const bool verbose, const bool add_missing_chroms,
   }
 
   MSite site;
-  string chrom_name;
-  int32_t prev_chrom_idx = -1;
-  uint64_t pos = num_lim<uint64_t>::max();
+  std::string chrom_name;
+  std::int32_t prev_chrom_idx = -1;
+  std::uint64_t pos = std::numeric_limits<std::uint64_t>::max();
 
   // ADS: this is probably a poor strategy since we already would know
   // the index of the chrom sequence in the vector.
   chrom_itr_t chrom_itr;
-  string line;
+  std::string line;
 
   while (getline(in, line)) {
     if (is_counts_header_line(line)) {
@@ -285,15 +275,15 @@ process_sites(const bool verbose, const bool add_missing_chroms,
     site.initialize(line.data(), line.data() + size(line));
     if (site.chrom != chrom_name) {
 
-      if (pos != num_lim<uint64_t>::max())
+      if (pos != std::numeric_limits<std::uint64_t>::max())
         write_missing_sites(chrom_name, *chrom_itr, pos, size(*chrom_itr), out);
 
-      const int32_t chrom_idx = get_chrom_idx(name_to_idx, site.chrom);
+      const std::int32_t chrom_idx = get_chrom_idx(name_to_idx, site.chrom);
 
       if (add_missing_chroms)
         for (auto i = prev_chrom_idx + 1; i < chrom_idx; ++i) {
           if (verbose)
-            cerr << "processing: " << names[i] << " (missing)" << endl;
+            std::cerr << "processing: " << names[i] << " (missing)\n";
           write_missing_sites(names[i], chroms[i], 0u, size(chroms[i]), out);
         }
 
@@ -302,7 +292,7 @@ process_sites(const bool verbose, const bool add_missing_chroms,
       pos = 0;
       prev_chrom_idx = chrom_idx;
       if (verbose)
-        cerr << "processing: " << chrom_name << endl;
+        std::cerr << "processing: " << chrom_name << "\n";
     }
     if (pos < site.pos)
       write_missing_sites(chrom_name, *chrom_itr, pos, site.pos, out);
@@ -312,16 +302,15 @@ process_sites(const bool verbose, const bool add_missing_chroms,
   write_missing_sites(chrom_name, *chrom_itr, pos, size(*chrom_itr), out);
 
   if (add_missing_chroms) {
-    const int32_t chrom_idx = size(chroms);
+    const std::int32_t chrom_idx = size(chroms);
     for (auto i = prev_chrom_idx + 1; i < chrom_idx; ++i) {
       if (verbose)
-        cerr << "processing: " << names[i] << " (missing)" << endl;
+        std::cerr << "processing: " << names[i] << " (missing)\n";
       write_missing_sites(names[i], chroms[i], 0u, size(chroms[i]), out);
     }
   }
 }
 
-
 int
 main_recovered(int argc, char *argv[]) {
   try {
@@ -329,50 +318,51 @@ main_recovered(int argc, char *argv[]) {
     bool verbose = false;
     bool add_missing_chroms = false;
     bool compress_output = false;
-    size_t n_threads = 1;
+    std::size_t n_threads = 1;
 
-    string outfile;
-    string chroms_file;
-    const string description =
+    std::string outfile;
+    std::string chroms_file;
+    const std::string description =
       "add sites that are missing as non-covered sites";
 
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(strip_path(argv[0]), description,
                            "<methcounts-file>");
     opt_parse.add_opt("output", 'o', "output file (required)", true, outfile);
-    opt_parse.add_opt("missing", 'm', "add missing chroms", false, add_missing_chroms);
+    opt_parse.add_opt("missing", 'm', "add missing chroms", false,
+                      add_missing_chroms);
     opt_parse.add_opt("threads", 't', "number of threads", false, n_threads);
     opt_parse.add_opt("chrom", 'c', "reference genome file (FASTA format)",
-                      true , chroms_file);
+                      true, chroms_file);
     opt_parse.add_opt("zip", 'z', "output gzip format", false, compress_output);
     opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
-    std::vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << endl
-           << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.help_message() << "\n"
+                << opt_parse.about_message() << "\n";
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.about_message() << "\n";
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << endl;
+      std::cerr << opt_parse.option_missing_message() << "\n";
       return EXIT_SUCCESS;
     }
     if (leftover_args.size() != 1) {
-      cerr << opt_parse.help_message() << endl;
+      std::cerr << opt_parse.help_message() << "\n";
       return EXIT_SUCCESS;
     }
-    const string filename(leftover_args.front());
+    const std::string filename(leftover_args.front());
     /****************** END COMMAND LINE OPTIONS *****************/
 
     process_sites(verbose, add_missing_chroms, compress_output, n_threads,
                   filename, outfile, chroms_file);
   }
-  catch (const std::runtime_error &e) {
-    cerr << e.what() << endl;
+  catch (const std::exception &e) {
+    std::cerr << e.what() << "\n";
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
diff --git a/src/utils/uniq.cpp b/src/utils/uniq.cpp
index 107b0da9..44f11ffb 100644
--- a/src/utils/uniq.cpp
+++ b/src/utils/uniq.cpp
@@ -1,86 +1,79 @@
-/* uniq: remove duplicate reads from a file of mapped reads in the
- * dnmtools format (as output from format_reads), based on identical
- * mapping location and alignment to the reference.
+/* uniq: remove duplicate reads from a file of mapped reads in the dnmtools
+ * format (as output from format_reads), based on identical mapping location
+ * and alignment to the reference.
  *
  * Copyright (C) 2013-2023 University of Southern California and
  *                         Andrew D. Smith
  *
  * Author: Andrew D. Smith
  *
- * This program is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, either version 3 of the
- * License, or (at your option) any later version.
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
  *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
  */
 
-#include <cstdint>  // for [u]int[0-9]+_t
-#include <iostream>
-#include <random>
-#include <stdexcept>
-#include <string>
-#include <vector>
-
-// generated by autotools
-#include <config.h>
-
 #include "GenomicRegion.hpp"
 #include "OptionParser.hpp"
+#include "bam_record_utils.hpp"
 #include "bsutils.hpp"
 #include "dnmt_error.hpp"
 #include "smithlab_os.hpp"
 #include "smithlab_utils.hpp"
-#include "bam_record_utils.hpp"
 
-using std::cerr;
-using std::endl;
-using std::ifstream;
-using std::ofstream;
-using std::runtime_error;
-using std::string;
-using std::to_string;
-using std::vector;
+// generated by autotools
+#include <config.h>
 
-using bamxx::bam_rec;
+#include <cstdint>  // for [u]int[0-9]+_t
+#include <iostream>
+#include <random>
+#include <stdexcept>
+#include <string>
+#include <vector>
 
 namespace uniq_random {
-  // ADS: I made this namespace and functions because different
-  // implementations of rand() on different OS meant that even with
-  // the same seed, the results could be different. This meant testing
-  // didn't work.
-  // ADS: (TODO) refactor this
-  bool initialized = false;
-  std::default_random_engine e;
-  std::uniform_int_distribution<int> di;
-  void initialize(const size_t the_seed) {
-    e = std::default_random_engine(the_seed);
-    initialized = true;
-  }
-  int rand() {
-    // ADS: should have same range as ordinary rand() by properties of
-    // std::uniform_int_distribution default constructor.
-    // assert(initialized);
-    return di(e);
-  }
+// ADS: I made this namespace and functions because different implementations
+// of rand() on different OS meant that even with the same seed, the results
+// could be different. This meant testing didn't work.
+
+// ADS: (TODO) refactor this
+bool initialized{false};
+std::default_random_engine e;
+std::uniform_int_distribution<int> di;
+
+void
+initialize(const std::size_t the_seed) {
+  e = std::default_random_engine(the_seed);
+  initialized = true;
+}
+
+int
+rand() {
+  // ADS: should have same range as ordinary rand() by properties of
+  // std::uniform_int_distribution default constructor.
+  // assert(initialized);
+  return di(e);
+}
 }  // namespace uniq_random
 
 struct rd_stats {  // keep track of good bases/reads in and out
-  size_t bases{};
-  size_t reads{};
-  void update(const bam_rec &b) {
+  std::size_t bases{};
+  std::size_t reads{};
+  void
+  update(const bamxx::bam_rec &b) {
     bases += get_l_qseq(b);
     ++reads;
   }
 };
 
-
 struct uniq_summary {
   uniq_summary(const rd_stats &rs_in, const rd_stats &rs_out,
-               const size_t reads_duped) {
+               const std::size_t reads_duped) {
     total_reads = rs_in.reads;
     total_bases = rs_in.bases;
     unique_reads = rs_out.reads;
@@ -94,60 +87,58 @@ struct uniq_summary {
   }
 
   // total_reads is the number of input reads
-  size_t total_reads{};
+  std::size_t total_reads{};
   // total_bases is the total number of input bases
-  size_t total_bases{};
+  std::size_t total_bases{};
   // unique_reads is the number of unique reads
-  size_t unique_reads{};
+  std::size_t unique_reads{};
   // unique_read_bases is the total number of bases for the unique reads
-  size_t unique_read_bases{};
+  std::size_t unique_read_bases{};
   // non_duplicate_fraction is the ratio of the number of unique reads with
   // no duplicates to that of the input reads
   double non_duplicate_fraction{};
   // duplicate_reads is the number of unique reads with at least one duplicate
-  size_t duplicate_reads{};
+  std::size_t duplicate_reads{};
   // reads_removed is the number of duplicate reads that have been removed
-  size_t reads_removed{};
+  std::size_t reads_removed{};
   // duplication_rate is the average number of duplicates for the reads with
   // at least one duplicate (>1 by definition)
   double duplication_rate{};
 
-  string tostring() {
+  std::string
+  to_string() {
     std::ostringstream oss;
-    oss << "total_reads: " << total_reads << endl
-        << "total_bases: " << total_bases << endl
-        << "unique_reads: " << unique_reads << endl
-        << "unique_read_bases: " << unique_read_bases << endl
-        << "non_duplicate_fraction: " << non_duplicate_fraction << endl
-        << "duplicate_reads: " << duplicate_reads << endl
-        << "reads_removed: " << reads_removed << endl
+    oss << "total_reads: " << total_reads << "\n"
+        << "total_bases: " << total_bases << "\n"
+        << "unique_reads: " << unique_reads << "\n"
+        << "unique_read_bases: " << unique_read_bases << "\n"
+        << "non_duplicate_fraction: " << non_duplicate_fraction << "\n"
+        << "duplicate_reads: " << duplicate_reads << "\n"
+        << "reads_removed: " << reads_removed << "\n"
         << "duplication_rate: " << duplication_rate;
-
     return oss.str();
   }
 };
 
-
-
 static void
 write_stats_output(const rd_stats &rs_in, const rd_stats &rs_out,
-                   const size_t reads_duped, const string &statfile) {
-  if (!statfile.empty()) {
-    uniq_summary summary(rs_in, rs_out, reads_duped);
-    ofstream out_stat(statfile);
-    if (!out_stat) throw runtime_error("bad stats output file");
-    out_stat << summary.tostring() << endl;
-  }
+                   const std::size_t reads_duped, const std::string &statfile) {
+  uniq_summary summary(rs_in, rs_out, reads_duped);
+  std::ofstream out_stat(statfile);
+  if (!out_stat)
+    throw std::runtime_error("bad stats output file");
+  out_stat << summary.to_string() << "\n";
 }
 
 static void
-write_hist_output(const vector<size_t> &hist, const string &histfile) {
-  if (!histfile.empty()) {
-    ofstream out_hist(histfile);
-    if (!out_hist) throw runtime_error("bad hist output file");
-    for (size_t i = 0; i < hist.size(); ++i)
-      if (hist[i] > 0) out_hist << i << '\t' << hist[i] << '\n';
-  }
+write_hist_output(const std::vector<std::size_t> &hist,
+                  const std::string &histfile) {
+  std::ofstream out_hist(histfile);
+  if (!out_hist)
+    throw std::runtime_error("bad hist output file");
+  for (std::size_t i = 0; i < std::size(hist); ++i)
+    if (hist[i] > 0)
+      out_hist << i << '\t' << hist[i] << '\n';
 }
 
 /* The "inner" buffer corresponds to all reads sharing chrom, start,
@@ -155,22 +146,25 @@ write_hist_output(const vector<size_t> &hist, const string &histfile) {
    that shares the same end and strand. */
 static void
 process_inner_buffer(const bool add_dup_count,
-                     const vector<bam_rec>::iterator it,
-                     const vector<bam_rec>::iterator jt, bamxx::bam_header &hdr,
-                     bamxx::bam_out &out, rd_stats &rs_out, size_t &reads_duped,
-                     vector<size_t> &hist) {
+                     const std::vector<bamxx::bam_rec>::iterator it,
+                     const std::vector<bamxx::bam_rec>::iterator jt,
+                     bamxx::bam_header &hdr, bamxx::bam_out &out,
+                     rd_stats &rs_out, std::size_t &reads_duped,
+                     std::vector<std::size_t> &hist) {
   constexpr char du_tag[2] = {'D', 'U'};
-  const size_t n_reads = std::distance(it, jt);
-  const size_t selected = uniq_random::rand() % n_reads;
+  const std::size_t n_reads = std::distance(it, jt);
+  const std::size_t selected = uniq_random::rand() % n_reads;
 
   if (add_dup_count) {
     const int ret = bam_aux_update_int(*(it + selected), du_tag, n_reads);
-    if (ret < 0) throw dnmt_error("error adding duplicate count aux field");
+    if (ret < 0)
+      throw dnmt_error("error adding duplicate count aux field");
   }
 
   if (!out.write(hdr, *(it + selected)))
-    throw runtime_error("failed writing bam record");
-  if (hist.size() <= n_reads) hist.resize(n_reads + 1);
+    throw std::runtime_error("failed writing bam record");
+  if (hist.size() <= n_reads)
+    hist.resize(n_reads + 1);
   hist[n_reads]++;
   rs_out.update(*(it + selected));
   reads_duped += (n_reads > 1);
@@ -179,13 +173,14 @@ process_inner_buffer(const bool add_dup_count,
 /* The buffer corresponds to reads sharing the same mapping chromosome
    and start position. These are gathered and then processed together. */
 static void
-process_buffer(const bool add_dup_count, rd_stats &rs_out, size_t &reads_duped,
-               vector<size_t> &hist, vector<bam_rec> &buffer, bamxx::bam_header &hdr,
+process_buffer(const bool add_dup_count, rd_stats &rs_out,
+               std::size_t &reads_duped, std::vector<std::size_t> &hist,
+               std::vector<bamxx::bam_rec> &buffer, bamxx::bam_header &hdr,
                bamxx::bam_out &out) {
-  sort(begin(buffer), end(buffer), precedes_by_end_and_strand);
-  auto it(begin(buffer));
+  std::sort(std::begin(buffer), std::end(buffer), precedes_by_end_and_strand);
+  auto it = std::begin(buffer);
   auto jt = it + 1;
-  for (; jt != end(buffer); ++jt)
+  for (; jt != std::end(buffer); ++jt)
     if (!equivalent_end_and_strand(*it, *jt)) {
       process_inner_buffer(add_dup_count, it, jt, hdr, out, rs_out, reads_duped,
                            hist);
@@ -197,28 +192,33 @@ process_buffer(const bool add_dup_count, rd_stats &rs_out, size_t &reads_duped,
 }
 
 static void
-uniq(const bool add_dup_count, const uint32_t max_buffer_size,
-     const size_t n_threads, const string &cmd, const string &infile,
-     const string &statfile, const string &histfile, const bool bam_format,
-     const string &outfile) {
+uniq(const bool add_dup_count, const std::uint32_t max_buffer_size,
+     const std::size_t n_threads, const std::string &cmd,
+     const std::string &infile, const std::string &statfile,
+     const std::string &histfile, const bool bam_format,
+     const std::string &outfile) {
   // values to tabulate stats; no real cost
   rd_stats rs_in, rs_out;
-  size_t reads_duped = 0;
-  vector<size_t> hist;
+  std::size_t reads_duped = 0;
+  std::vector<std::size_t> hist;
 
   bamxx::bam_tpool tpool(n_threads);  // outer scope: must be destroyed last
 
   bamxx::bam_in hts(infile);
-  if (!hts) throw dnmt_error("failed to open input file: " + infile);
+  if (!hts)
+    throw dnmt_error("failed to open input file: " + infile);
   bamxx::bam_header hdr(hts);
-  if (!hdr) throw dnmt_error("failed to read header");
+  if (!hdr)
+    throw dnmt_error("failed to read header");
 
   bamxx::bam_out out(outfile, bam_format);
   {
     bamxx::bam_header hdr_out(hdr);
-    if (!hdr_out) throw dnmt_error("failed create header");
+    if (!hdr_out)
+      throw dnmt_error("failed create header");
     hdr_out.add_pg_line(cmd, "DNMTOOLS", VERSION);
-    if (!out.write(hdr_out)) throw dnmt_error("failed to write header");
+    if (!out.write(hdr_out))
+      throw dnmt_error("failed to write header");
   }
 
   if (n_threads > 1) {
@@ -226,22 +226,22 @@ uniq(const bool add_dup_count, const uint32_t max_buffer_size,
     tpool.set_io(out);
   }
 
-  bam_rec aln;
+  bamxx::bam_rec aln;
   bool found_mapped_read{false};  // valid SAM/BAM can have 0 reads
-  while (!found_mapped_read && hts.read(hdr, aln)) {
+  while (!found_mapped_read && hts.read(hdr, aln))
     // ADS: skip reads that have no tid -- they are not mapped
     if (get_tid(aln) != -1)
       found_mapped_read = true;
-  }
 
   if (found_mapped_read) {
     rs_in.update(aln);  // update stats for input we just got
 
-    vector<bam_rec> buffer(1, aln);  // select output from this buffer
+    // select output from this buffer
+    std::vector<bamxx::bam_rec> buffer(1, aln);
 
     // to check that reads are sorted properly
-    vector<bool> chroms_seen(get_n_targets(hdr), false);
-    int32_t cur_chrom = get_tid(aln);
+    std::vector<bool> chroms_seen(get_n_targets(hdr), false);
+    std::int32_t cur_chrom = get_tid(aln);
 
     while (hts.read(hdr, aln)) {
       // ADS: skip reads that have no tid -- they are not mapped
@@ -251,12 +251,13 @@ uniq(const bool add_dup_count, const uint32_t max_buffer_size,
 
       // below works because buffer reset at every new chrom
       if (precedes_by_start(aln, buffer[0]))
-        throw runtime_error("not sorted: " + get_qname(buffer[0]) + " " +
-                            get_qname(aln));
+        throw std::runtime_error("not sorted: " + get_qname(buffer[0]) + " " +
+                                 get_qname(aln));
 
-      const int32_t chrom = get_tid(aln);
+      const std::int32_t chrom = get_tid(aln);
       if (chrom != cur_chrom) {
-        if (chroms_seen[chrom]) throw runtime_error("input not sorted");
+        if (chroms_seen[chrom])
+          throw std::runtime_error("input not sorted");
         chroms_seen[chrom] = true;
         cur_chrom = chrom;
       }
@@ -271,15 +272,18 @@ uniq(const bool add_dup_count, const uint32_t max_buffer_size,
     }
     process_buffer(add_dup_count, rs_out, reads_duped, hist, buffer, hdr, out);
   }
+
   // write any additional output requested
-  write_stats_output(rs_in, rs_out, reads_duped, statfile);
-  write_hist_output(hist, histfile);
+  if (!statfile.empty())
+    write_stats_output(rs_in, rs_out, reads_duped, statfile);
+  if (!histfile.empty())
+    write_hist_output(hist, histfile);
 }
 
 int
 main_uniq(int argc, char *argv[]) {
   try {
-    uint32_t max_buffer_size = std::numeric_limits<uint32_t>::max();
+    std::uint32_t max_buffer_size = std::numeric_limits<std::uint32_t>::max();
     bool VERBOSE = false;
 
     bool bam_format = false;
@@ -288,19 +292,20 @@ main_uniq(int argc, char *argv[]) {
 
     // ADS: Not recommended to change this seed. It shouldn't matter
     // at all, and we want results to behave as deterministic.
-    size_t the_seed = 408;
-    string outfile;
-    string statfile;
-    string histfile;
-    size_t n_threads = 1;
+    std::size_t the_seed = 408;
+    std::string outfile;
+    std::string statfile;
+    std::string histfile;
+    std::size_t n_threads = 1;
 
     /****************** COMMAND LINE OPTIONS ********************/
-    OptionParser opt_parse(strip_path(argv[0]),
+    OptionParser opt_parse("dnmtools uniq",
                            "program to remove duplicate reads from "
                            "sorted mapped reads",
                            "<in-file> [out-file]", 2);
     opt_parse.add_opt("threads", 't', "number of threads", false, n_threads);
-    opt_parse.add_opt("summary", 'S', "statistics output file", false, statfile);
+    opt_parse.add_opt("summary", 'S', "statistics output file", false,
+                      statfile);
     opt_parse.add_opt("add-count", 'a', "add duplicate counts to reads", false,
                       add_dup_count);
     opt_parse.add_opt("hist", '\0',
@@ -311,33 +316,33 @@ main_uniq(int argc, char *argv[]) {
     opt_parse.add_opt("stdout", '\0', "write to standard output", false,
                       use_stdout);
     opt_parse.add_opt("seed", 's', "random seed", false, the_seed);
-    opt_parse.add_opt("max", 'm', "max duplicates to consider",
-                      false, max_buffer_size);
+    opt_parse.add_opt("max", 'm', "max duplicates to consider", false,
+                      max_buffer_size);
     opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
     opt_parse.set_show_defaults();
-    vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (opt_parse.about_requested() || opt_parse.help_requested() ||
         leftover_args.empty()) {
-      cerr << opt_parse.help_message() << endl
-           << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.help_message() << std::endl
+                << opt_parse.about_message() << "\n";
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << endl;
+      std::cerr << opt_parse.option_missing_message() << "\n";
       return EXIT_SUCCESS;
     }
     if ((leftover_args.size() == 1 && !use_stdout) ||
         (leftover_args.size() == 2 && use_stdout)) {
-      cerr << opt_parse.help_message() << endl
-           << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.help_message() << std::endl
+                << opt_parse.about_message() << "\n";
       return EXIT_SUCCESS;
     }
-    const string infile(leftover_args.front());
+    const std::string infile(leftover_args.front());
     if (leftover_args.size() == 2 && !use_stdout)
       outfile = leftover_args.back();
     else
-      outfile = string("-");  // so htslib can write to stdout
+      outfile = std::string("-");  // so htslib can write to stdout
     /****************** END COMMAND LINE OPTIONS *****************/
 
     // ADS: Random here is because we choose randomly when keeping one
@@ -345,26 +350,26 @@ main_uniq(int argc, char *argv[]) {
     uniq_random::initialize(the_seed);
 
     std::ostringstream cmd;
-    copy(argv, argv + argc, std::ostream_iterator<const char *>(cmd, " "));
+    std::copy(argv, argv + argc, std::ostream_iterator<const char *>(cmd, " "));
 
     if (VERBOSE)
-      cerr << "[output file: " << outfile << "]" << endl
-           << "[output format: " << (bam_format ? "B" : "S") << "AM]" << endl
-           << "[stats file: " << (statfile.empty() ? "none" : statfile) << "]"
-           << endl
-           << "[hist file: " << (histfile.empty() ? "none" : histfile) << "]"
-           << endl
-           << "[add duplicate count: " << (add_dup_count ? "yes" : "no") << "]"
-           << endl
-           << "[threads requested: " << n_threads << "]" << endl
-           << "[command line: \"" << cmd.str() << "\"]" << endl
-           << "[random number seed: " << the_seed << "]" << endl;
+      std::cerr << "[output file: " << outfile << "]\n"
+                << "[output format: " << (bam_format ? "B" : "S") << "AM]\n"
+                << "[stats file: " << (statfile.empty() ? "none" : statfile)
+                << "]\n"
+                << "[hist file: " << (histfile.empty() ? "none" : histfile)
+                << "]\n"
+                << "[add duplicate count: " << (add_dup_count ? "yes" : "no")
+                << "]\n"
+                << "[threads requested: " << n_threads << "]\n"
+                << "[command line: \"" << cmd.str() << "\"]\n"
+                << "[random number seed: " << the_seed << "]\n";
 
     uniq(add_dup_count, max_buffer_size, n_threads, cmd.str(), infile, statfile,
          histfile, bam_format, outfile);
   }
-  catch (const runtime_error &e) {
-    cerr << e.what() << endl;
+  catch (const std::exception &e) {
+    std::cerr << e.what() << "\n";
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
diff --git a/src/utils/unxcounts.cpp b/src/utils/unxcounts.cpp
index d3360d87..6a7bd519 100644
--- a/src/utils/unxcounts.cpp
+++ b/src/utils/unxcounts.cpp
@@ -16,8 +16,17 @@
  * General Public License for more details.
  */
 
+#include "MSite.hpp"
+#include "bsutils.hpp"
+#include "counts_header.hpp"
+
 #include <bamxx.hpp>
 
+// from smithlab_cpp
+#include "OptionParser.hpp"
+#include "smithlab_os.hpp"
+#include "smithlab_utils.hpp"
+
 #include <charconv>
 #include <iostream>
 #include <stdexcept>
@@ -26,91 +35,67 @@
 #include <unordered_set>
 #include <vector>
 
-// from smithlab_cpp
-#include "MSite.hpp"
-#include "OptionParser.hpp"
-#include "bsutils.hpp"
-#include "counts_header.hpp"
-#include "dnmt_error.hpp"
-#include "smithlab_os.hpp"
-#include "smithlab_utils.hpp"
-
-using std::cbegin;
-using std::cend;
-using std::cerr;
-using std::copy;
-using std::copy_n;
-using std::cout;
-using std::endl;
-using std::from_chars;
-using std::numeric_limits;
-using std::pair;
-using std::runtime_error;
-using std::string;
-using std::to_chars;
-using std::to_string;
-using std::unordered_map;
-using std::unordered_set;
-using std::vector;
-
-using bamxx::bgzf_file;
-
-template<typename T> using num_lim = std::numeric_limits<T>;
-
 static void
-read_fasta_file_short_names_uppercase(const string &chroms_file,
-                                      vector<string> &names,
-                                      vector<string> &chroms) {
+read_fasta_file_short_names_uppercase(const std::string &chroms_file,
+                                      std::vector<std::string> &names,
+                                      std::vector<std::string> &chroms) {
   chroms.clear();
   names.clear();
   read_fasta_file_short_names(chroms_file, names, chroms);
   for (auto &i : chroms)
-    transform(cbegin(i), cend(i), begin(i),
+    transform(std::cbegin(i), std::cend(i), begin(i),
               [](const char c) { return std::toupper(c); });
 }
 
-
 static void
-verify_chrom_orders(const bool verbose, const uint32_t n_threads,
-                    const string &filename,
-                    const unordered_map<string, int32_t> &chroms_order) {
+verify_chrom_orders(
+  const bool verbose, const std::uint32_t n_threads,
+  const std::string &filename,
+  const std::unordered_map<std::string, std::int32_t> &chroms_order) {
   bamxx::bam_tpool tp(n_threads);
 
-  bgzf_file in(filename, "r");
-  if (!in) throw runtime_error("bad file: " + filename);
+  bamxx::bgzf_file in(filename, "r");
+  if (!in)
+    throw std::runtime_error("bad file: " + filename);
 
   // set the threads for the input file decompression
-  if (n_threads > 1 && in.is_bgzf()) tp.set_io(in);
+  if (n_threads > 1 && in.is_bgzf())
+    tp.set_io(in);
 
-  unordered_set<int32_t> chroms_seen;
-  int32_t prev_id = -1;
+  std::unordered_set<std::int32_t> chroms_seen;
+  std::int32_t prev_id = -1;
 
-  kstring_t line{0, 0, nullptr};
+  kstring_t line = KS_INITIALIZE;
   const int ret = ks_resize(&line, 1024);
-  if (ret) throw runtime_error("failed to acquire buffer");
+  if (ret)
+    throw std::runtime_error("failed to acquire buffer");
 
   while (bamxx::getline(in, line)) {
-    if (std::isdigit(line.s[0])) continue;
-    if (is_counts_header_line(line.s)) continue;
+    if (std::isdigit(line.s[0]))
+      continue;
+    if (is_counts_header_line(line.s))
+      continue;
 
-    string chrom{line.s};
-    if (verbose) cerr << "verifying: " << chrom << endl;
+    std::string chrom{line.s};
+    if (verbose)
+      std::cerr << "verifying: " << chrom << "\n";
 
     const auto idx_itr = chroms_order.find(chrom);
-    if (idx_itr == cend(chroms_order))
-      throw runtime_error("chrom not found genome file: " + chrom);
+    if (idx_itr == std::cend(chroms_order))
+      throw std::runtime_error("chrom not found genome file: " + chrom);
     const auto idx = idx_itr->second;
 
     if (chroms_seen.find(idx) != end(chroms_seen))
-      throw runtime_error("chroms out of order in: " + filename);
+      throw std::runtime_error("chroms out of order in: " + filename);
     chroms_seen.insert(idx);
 
     if (idx < prev_id)
-      throw runtime_error("inconsistent chromosome order at: " + chrom);
+      throw std::runtime_error("inconsistent chromosome order at: " + chrom);
 
     prev_id = idx;
   }
-  if (verbose) cerr << "chrom orders are consistent" << endl;
+  if (verbose)
+    std::cerr << "chrom orders are consistent" << "\n";
 }
 
 static const char *tag_values[] = {
@@ -126,7 +111,7 @@ static const int tag_sizes[] = {3, 3, 3, 3, 1};
 // ADS: the values below allow for things like CHH where the is a N in
 // the triplet; I'm allowing that for consistency with the weird logic
 // from earlier versions.
-const uint32_t context_codes[] = {
+const std::uint32_t context_codes[] = {
   /*CAA CHH*/ 1,
   /*CAC CHH*/ 1,
   /*CAG CXG*/ 2,
@@ -154,23 +139,23 @@ const uint32_t context_codes[] = {
   /*CNN ---*/ 1   // 4
 };
 
-static inline uint32_t
-get_tag_from_genome_c(const string &s, const size_t pos) {
+static inline std::uint32_t
+get_tag_from_genome_c(const std::string &s, const size_t pos) {
   const auto val = base2int(s[pos + 1]) * 5 + base2int(s[pos + 2]);
   return context_codes[val];
 }
 
-static inline uint32_t
-get_tag_from_genome_g(const string &s, const size_t pos) {
+static inline std::uint32_t
+get_tag_from_genome_g(const std::string &s, const size_t pos) {
   const auto val =
     base2int(complement(s[pos - 1])) * 5 + base2int(complement(s[pos - 2]));
   return context_codes[val];
 }
 
 static bool
-write_missing(const uint32_t name_size, const string &chrom,
-              const uint64_t start_pos, const uint64_t end_pos,
-              vector<char> &buf, bgzf_file &out) {
+write_missing(const std::uint32_t name_size, const std::string &chrom,
+              const std::uint64_t start_pos, const std::uint64_t end_pos,
+              std::vector<char> &buf, bamxx::bgzf_file &out) {
   static constexpr auto zeros = "\t0\t0\n";
   static constexpr auto pos_strand = "\t+\t";
   static constexpr auto neg_strand = "\t-\t";
@@ -181,27 +166,28 @@ write_missing(const uint32_t name_size, const string &chrom,
     const char base = chrom[pos];
     if (is_cytosine(base) || is_guanine(base)) {
       const bool is_c = is_cytosine(base);
-      const uint32_t the_tag = is_c ? get_tag_from_genome_c(chrom, pos)
-                                    : get_tag_from_genome_g(chrom, pos);
+      const std::uint32_t the_tag = is_c ? get_tag_from_genome_c(chrom, pos)
+                                         : get_tag_from_genome_g(chrom, pos);
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wstringop-overflow=0"
-      auto [ptr, ec] = to_chars(cursor, buf_end, pos);
-      ptr = copy_n(is_c ? pos_strand : neg_strand, 3, ptr);
-      ptr = copy_n(tag_values[the_tag], tag_sizes[the_tag], ptr);
-      ptr = copy_n(zeros, 5, ptr);
+      auto [ptr, ec] = std::to_chars(cursor, buf_end, pos);
+      ptr = std::copy_n(is_c ? pos_strand : neg_strand, 3, ptr);
+      ptr = std::copy_n(tag_values[the_tag], tag_sizes[the_tag], ptr);
+      ptr = std::copy_n(zeros, 5, ptr);
       const auto sz = std::distance(buf.data(), ptr);
 #pragma GCC diagnostic push
 
-      if (bgzf_write(out.f, buf.data(), sz) != sz) return false;
+      if (bgzf_write(out.f, buf.data(), sz) != sz)
+        return false;
     }
   }
   return true;
 }
 
 static bool
-write_missing_cpg(const uint32_t &name_size, const string &chrom,
-                  const uint64_t start_pos, const uint64_t end_pos,
-                  vector<char> &buf, bgzf_file &out) {
+write_missing_cpg(const std::uint32_t &name_size, const std::string &chrom,
+                  const std::uint64_t start_pos, const std::uint64_t end_pos,
+                  std::vector<char> &buf, bamxx::bgzf_file &out) {
   static constexpr auto zeros = "\t0\t0\n";
   static constexpr auto pos_strand = "\t+\t";
   const auto buf_end = buf.data() + size(buf);
@@ -211,25 +197,27 @@ write_missing_cpg(const uint32_t &name_size, const string &chrom,
     // When this function is called, the "end_pos" is either the chrom
     // size or the position of a base known to be a C. So we never
     // have to allow pos+1 to equal end_pos.
-    if (is_cytosine(chrom[pos]) && is_guanine(chrom[pos+1])) {
+    if (is_cytosine(chrom[pos]) && is_guanine(chrom[pos + 1])) {
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wstringop-overflow=0"
-      auto [ptr, ec] = to_chars(cursor, buf_end, pos);
-      ptr = copy_n(pos_strand, 3, ptr);
-      ptr = copy_n("CpG", 3, ptr);
-      ptr = copy_n(zeros, 5, ptr);
+      auto [ptr, ec] = std::to_chars(cursor, buf_end, pos);
+      ptr = std::copy_n(pos_strand, 3, ptr);
+      ptr = std::copy_n("CpG", 3, ptr);
+      ptr = std::copy_n(zeros, 5, ptr);
       const auto sz = std::distance(buf.data(), ptr);
 #pragma GCC diagnostic push
-      if (bgzf_write(out.f, buf.data(), sz) != sz) return false;
+      if (bgzf_write(out.f, buf.data(), sz) != sz)
+        return false;
     }
   }
   return true;
 }
 
 static bool
-write_site(const uint32_t name_size, const string &chrom, const uint32_t pos,
-           const uint32_t n_meth, const uint32_t n_unmeth, vector<char> &buf,
-           bgzf_file &out) {
+write_site(const std::uint32_t name_size, const std::string &chrom,
+           const std::uint32_t pos, const std::uint32_t n_meth,
+           const std::uint32_t n_unmeth, std::vector<char> &buf,
+           bamxx::bgzf_file &out) {
   static constexpr auto pos_strand = "\t+\t";
   static constexpr auto neg_strand = "\t-\t";
   static constexpr auto fmt = std::chars_format::general;
@@ -238,8 +226,8 @@ write_site(const uint32_t name_size, const string &chrom, const uint32_t pos,
   const char base = chrom[pos];
   assert(is_cytosine(base) || is_guanine(base));
   const bool is_c = is_cytosine(base);
-  const uint32_t the_tag = is_c ? get_tag_from_genome_c(chrom, pos)
-                                : get_tag_from_genome_g(chrom, pos);
+  const std::uint32_t the_tag = is_c ? get_tag_from_genome_c(chrom, pos)
+                                     : get_tag_from_genome_g(chrom, pos);
   const auto n_reads = n_meth + n_unmeth;
   const auto meth = static_cast<double>(n_meth) / std::max(n_reads, 1u);
 
@@ -248,20 +236,20 @@ write_site(const uint32_t name_size, const string &chrom, const uint32_t pos,
   // chrom name is already in the buffer so move past it
   auto cursor = buf.data() + name_size + 1;
   {
-    auto [ptr, ec] = to_chars(cursor, buf_end, pos);
+    auto [ptr, ec] = std::to_chars(cursor, buf_end, pos);
     cursor = ptr;
   }
-  cursor = copy_n(is_c ? pos_strand : neg_strand, 3, cursor);
-  cursor = copy_n(tag_values[the_tag], tag_sizes[the_tag], cursor);
+  cursor = std::copy_n(is_c ? pos_strand : neg_strand, 3, cursor);
+  cursor = std::copy_n(tag_values[the_tag], tag_sizes[the_tag], cursor);
   *cursor++ = '\t';
   {
-    // use default precision, 6, same as cout default
-    auto [ptr, ec] = to_chars(cursor, buf_end, meth, fmt, 6);
+    // use default precision, 6, same as std::cout default
+    auto [ptr, ec] = std::to_chars(cursor, buf_end, meth, fmt, 6);
     cursor = ptr;
   }
   *cursor++ = '\t';
   {
-    auto [ptr, ec] = to_chars(cursor, buf_end, n_reads);
+    auto [ptr, ec] = std::to_chars(cursor, buf_end, n_reads);
     cursor = ptr;
   }
   *cursor++ = '\n';
@@ -271,81 +259,85 @@ write_site(const uint32_t name_size, const string &chrom, const uint32_t pos,
   return bgzf_write(out.f, buf.data(), sz) == sz;
 }
 
-typedef vector<string>::const_iterator chrom_itr_t;
+typedef std::vector<std::string>::const_iterator chrom_itr_t;
 
 static chrom_itr_t
-get_chrom(const unordered_map<string, chrom_itr_t> &chrom_lookup,
-          const string &chrom_name) {
+get_chrom(const std::unordered_map<std::string, chrom_itr_t> &chrom_lookup,
+          const std::string &chrom_name) {
   const auto chr_id = chrom_lookup.find(chrom_name);
-  if (chr_id == cend(chrom_lookup))
-    throw dnmt_error("chromosome not found: " + chrom_name);
+  if (chr_id == std::cend(chrom_lookup))
+    throw std::runtime_error("chromosome not found: " + chrom_name);
   return chr_id->second;
 }
 
-static int32_t
-get_chrom_id(const unordered_map<string, int32_t> &name_to_id,
-              const string &chrom_name) {
+static std::int32_t
+get_chrom_id(const std::unordered_map<std::string, std::int32_t> &name_to_id,
+             const std::string &chrom_name) {
   const auto chr_id = name_to_id.find(chrom_name);
-  if (chr_id == cend(name_to_id))
-    throw dnmt_error("chromosome not found: " + chrom_name);
+  if (chr_id == std::cend(name_to_id))
+    throw std::runtime_error("chromosome not found: " + chrom_name);
   return chr_id->second;
 }
 
 static bool
-verify_chrom(const string &header_line,
-             const unordered_map<string, int32_t> &name_to_id,
-             const vector<uint64_t> &chrom_sizes) {
-  if (is_counts_header_version_line(header_line)) return true;
+verify_chrom(const std::string &header_line,
+             const std::unordered_map<std::string, std::int32_t> &name_to_id,
+             const std::vector<std::uint64_t> &chrom_sizes) {
+  if (is_counts_header_version_line(header_line))
+    return true;
   std::istringstream iss(header_line.substr(1));
-  string name;
-  uint64_t chrom_size = 0;
-  if (!(iss >> name >> chrom_size)) return false;
+  std::string name;
+  std::uint64_t chrom_size = 0;
+  if (!(iss >> name >> chrom_size))
+    return false;
 
   const auto idx = name_to_id.find(name);
-  if (idx == cend(name_to_id)) return false;
+  if (idx == std::cend(name_to_id))
+    return false;
 
   return chrom_size == chrom_sizes[idx->second];
 }
 
 static void
-get_lookups(const vector<string> &names, const vector<string> &chroms,
-            unordered_map<string, chrom_itr_t> &chrom_lookup,
-            unordered_map<string, int32_t> &name_to_id,
-            vector<uint64_t> &chrom_sizes) {
+get_lookups(const std::vector<std::string> &names,
+            const std::vector<std::string> &chroms,
+            std::unordered_map<std::string, chrom_itr_t> &chrom_lookup,
+            std::unordered_map<std::string, std::int32_t> &name_to_id,
+            std::vector<std::uint64_t> &chrom_sizes) {
   chrom_lookup.clear();
   name_to_id.clear();
-  chrom_sizes = vector<uint64_t>(size(chroms), 0);
+  chrom_sizes = std::vector<std::uint64_t>(size(chroms), 0);
   for (size_t i = 0; i < size(chroms); ++i) {
-    chrom_lookup[names[i]] = cbegin(chroms) + i;
+    chrom_lookup[names[i]] = std::cbegin(chroms) + i;
     name_to_id[names[i]] = i;
     chrom_sizes[i] = size(chroms[i]);
   }
 }
 
 static void
-process_header_line(const unordered_map<string, int32_t> &name_to_id,
-                    const vector<uint64_t> &chrom_sizes, const kstring_t &line,
-                    bgzf_file &out) {
-  string hdr_line{line.s};
+process_header_line(
+  const std::unordered_map<std::string, std::int32_t> &name_to_id,
+  const std::vector<std::uint64_t> &chrom_sizes, const kstring_t &line,
+  bamxx::bgzf_file &out) {
+  std::string hdr_line{line.s};
   if (size(hdr_line) > 1 && !verify_chrom(hdr_line, name_to_id, chrom_sizes))
-    throw runtime_error{"failed to verify header for: " + hdr_line};
+    throw std::runtime_error{"failed to verify header for: " + hdr_line};
   if (!write_counts_header_line(hdr_line, out))
-    throw runtime_error{"failed to write header line: " + hdr_line};
+    throw std::runtime_error{"failed to write header line: " + hdr_line};
 }
 
-
 // write all sites for chroms in the given range
 static void
-write_all_sites(const bool verbose,
-                const uint32_t prev_chr_id,
-                const uint32_t chr_id,
-                const vector<string> &names,
-                const vector<string> &chroms,
-                vector<char> &buf, bgzf_file &out) {
+write_all_sites(const bool verbose, const std::uint32_t prev_chr_id,
+                const std::uint32_t chr_id,
+                const std::vector<std::string> &names,
+                const std::vector<std::string> &chroms, std::vector<char> &buf,
+                bamxx::bgzf_file &out) {
   for (auto i = prev_chr_id + 1; i < chr_id; ++i) {
     if (verbose)
-      cerr << "processing: " << names[i] << " (missing)" << endl;
-    auto res = copy(cbegin(names[i]), cend(names[i]), buf.data());
+      std::cerr << "processing: " << names[i] << " (missing)" << "\n";
+    auto res =
+      std::copy(std::cbegin(names[i]), std::cend(names[i]), buf.data());
     *res = '\t';
     write_missing(size(names[i]), chroms[i], 0u, size(chroms[i]), buf, out);
   }
@@ -354,17 +346,17 @@ write_all_sites(const bool verbose,
 static void
 process_sites(const bool verbose, const bool add_missing_chroms,
               const bool require_covered, const bool compress_output,
-              const size_t n_threads, const string &infile,
-              const string &outfile, const string &chroms_file) {
+              const size_t n_threads, const std::string &infile,
+              const std::string &outfile, const std::string &chroms_file) {
   // first get the chromosome names and sequences from the FASTA file
-  vector<string> chroms, names;
+  std::vector<std::string> chroms, names;
   read_fasta_file_short_names_uppercase(chroms_file, names, chroms);
   if (verbose)
-    cerr << "[n chroms in reference: " << chroms.size() << "]" << endl;
+    std::cerr << "[n chroms in reference: " << chroms.size() << "]" << "\n";
 
-  unordered_map<string, chrom_itr_t> chrom_lookup;
-  unordered_map<string, int32_t> name_to_id;
-  vector<uint64_t> chrom_sizes(size(chroms), 0);
+  std::unordered_map<std::string, chrom_itr_t> chrom_lookup;
+  std::unordered_map<std::string, std::int32_t> name_to_id;
+  std::vector<std::uint64_t> chrom_sizes(size(chroms), 0);
   get_lookups(names, chroms, chrom_lookup, name_to_id, chrom_sizes);
 
   if (add_missing_chroms)
@@ -373,29 +365,33 @@ process_sites(const bool verbose, const bool add_missing_chroms,
   bamxx::bam_tpool tp(n_threads);
 
   bamxx::bgzf_file in(infile, "r");
-  if (!in) throw dnmt_error("failed to open input file");
+  if (!in)
+    throw std::runtime_error("failed to open input file");
 
-  const string output_mode = compress_output ? "w" : "wu";
-  bgzf_file out(outfile, output_mode);
-  if (!out) throw dnmt_error("error opening output file: " + outfile);
+  const std::string output_mode = compress_output ? "w" : "wu";
+  bamxx::bgzf_file out(outfile, output_mode);
+  if (!out)
+    throw std::runtime_error("error opening output file: " + outfile);
 
   // set the threads for the input file decompression
   if (n_threads > 1) {
-    if (in.is_bgzf()) tp.set_io(in);
+    if (in.is_bgzf())
+      tp.set_io(in);
     tp.set_io(out);
   }
 
-  static constexpr uint32_t output_buffer_size = 1024;
-  vector<char> buf(output_buffer_size, '\0');
+  static constexpr std::uint32_t output_buffer_size = 1024;
+  std::vector<char> buf(output_buffer_size, '\0');
 
-  kstring_t line{0, 0, nullptr};
+  kstring_t line = KS_INITIALIZE;
   const int ret = ks_resize(&line, output_buffer_size);
-  if (ret) throw runtime_error("failed to acquire buffer");
+  if (ret)
+    throw std::runtime_error("failed to acquire buffer");
 
-  string chrom_name;
-  uint32_t nm_sz{};
-  int32_t prev_chr_id = -1;
-  uint64_t pos = num_lim<uint64_t>::max();
+  std::string chrom_name;
+  std::uint32_t nm_sz{};
+  std::int32_t prev_chr_id = -1;
+  std::uint64_t pos = std::numeric_limits<std::uint64_t>::max();
 
   // ADS: this is probably a poor strategy since we already would know
   // the index of the chrom sequence in the vector.
@@ -409,12 +405,12 @@ process_sites(const bool verbose, const bool add_missing_chroms,
 
     if (!std::isdigit(line.s[0])) {  // check if we have a chrom line
 
-      if (!require_covered && pos != num_lim<uint64_t>::max())
+      if (!require_covered && pos != std::numeric_limits<std::uint64_t>::max())
         write_missing(nm_sz, *ch_itr, pos + 1, size(*ch_itr), buf, out);
 
-      chrom_name = string{line.s};
+      chrom_name = std::string{line.s};
       nm_sz = size(chrom_name);
-      const int32_t chr_id = get_chrom_id(name_to_id, chrom_name);
+      const std::int32_t chr_id = get_chrom_id(name_to_id, chrom_name);
 
       if (add_missing_chroms)
         write_all_sites(verbose, prev_chr_id, chr_id, names, chroms, buf, out);
@@ -422,17 +418,19 @@ process_sites(const bool verbose, const bool add_missing_chroms,
       ch_itr = get_chrom(chrom_lookup, chrom_name);
       pos = 0;
       prev_chr_id = chr_id;
-      if (verbose) cerr << "processing: " << chrom_name << endl;
+      if (verbose)
+        std::cerr << "processing: " << chrom_name << "\n";
 
-      auto res = copy(cbegin(chrom_name), cend(chrom_name), buf.data());
+      auto res =
+        std::copy(std::cbegin(chrom_name), std::cend(chrom_name), buf.data());
       *res = '\t';
     }
     else {
-      uint32_t pos_step = 0, n_meth = 0, n_unmeth = 0;
+      std::uint32_t pos_step = 0, n_meth = 0, n_unmeth = 0;
       const auto end_line = line.s + line.l;
-      auto res = from_chars(line.s, end_line, pos_step);
-      res = from_chars(res.ptr + 1, end_line, n_meth);
-      res = from_chars(res.ptr + 1, end_line, n_unmeth);
+      auto res = std::from_chars(line.s, end_line, pos_step);
+      res = std::from_chars(res.ptr + 1, end_line, n_meth);
+      res = std::from_chars(res.ptr + 1, end_line, n_unmeth);
 
       const auto curr_pos = pos + pos_step;
       if (!require_covered && pos + 1 < curr_pos)
@@ -451,16 +449,16 @@ process_sites(const bool verbose, const bool add_missing_chroms,
 
 // write all cpg sites for chroms in the given range
 static void
-write_all_cpgs(const bool verbose,
-               const uint32_t prev_chr_id,
-               const uint32_t chr_id,
-               const vector<string> &names,
-               const vector<string> &chroms,
-               vector<char> &buf, bgzf_file &out) {
+write_all_cpgs(const bool verbose, const std::uint32_t prev_chr_id,
+               const std::uint32_t chr_id,
+               const std::vector<std::string> &names,
+               const std::vector<std::string> &chroms, std::vector<char> &buf,
+               bamxx::bgzf_file &out) {
   for (auto i = prev_chr_id + 1; i < chr_id; ++i) {
     if (verbose)
-      cerr << "processing: " << names[i] << " (missing)" << endl;
-    auto res = copy(cbegin(names[i]), cend(names[i]), buf.data());
+      std::cerr << "processing: " << names[i] << " (missing)" << "\n";
+    auto res =
+      std::copy(std::cbegin(names[i]), std::cend(names[i]), buf.data());
     *res = '\t';
     write_missing_cpg(size(names[i]), chroms[i], 0u, size(chroms[i]), buf, out);
   }
@@ -469,17 +467,17 @@ write_all_cpgs(const bool verbose,
 static void
 process_cpg_sites(const bool verbose, const bool add_missing_chroms,
                   const bool require_covered, const bool compress_output,
-                  const size_t n_threads, const string &infile,
-                  const string &outfile, const string &chroms_file) {
+                  const size_t n_threads, const std::string &infile,
+                  const std::string &outfile, const std::string &chroms_file) {
   // first get the chromosome names and sequences from the FASTA file
-  vector<string> chroms, names;
+  std::vector<std::string> chroms, names;
   read_fasta_file_short_names_uppercase(chroms_file, names, chroms);
   if (verbose)
-    cerr << "[n chroms in reference: " << chroms.size() << "]" << endl;
+    std::cerr << "[n chroms in reference: " << chroms.size() << "]" << "\n";
 
-  unordered_map<string, chrom_itr_t> chrom_lookup;
-  unordered_map<string, int32_t> name_to_id;
-  vector<uint64_t> chrom_sizes(size(chroms), 0);
+  std::unordered_map<std::string, chrom_itr_t> chrom_lookup;
+  std::unordered_map<std::string, std::int32_t> name_to_id;
+  std::vector<std::uint64_t> chrom_sizes(size(chroms), 0);
   get_lookups(names, chroms, chrom_lookup, name_to_id, chrom_sizes);
 
   if (add_missing_chroms)
@@ -488,29 +486,33 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms,
   bamxx::bam_tpool tp(n_threads);
 
   bamxx::bgzf_file in(infile, "r");
-  if (!in) throw dnmt_error("failed to open input file");
+  if (!in)
+    throw std::runtime_error("failed to open input file");
 
-  const string output_mode = compress_output ? "w" : "wu";
-  bgzf_file out(outfile, output_mode);
-  if (!out) throw dnmt_error("error opening output file: " + outfile);
+  const std::string output_mode = compress_output ? "w" : "wu";
+  bamxx::bgzf_file out(outfile, output_mode);
+  if (!out)
+    throw std::runtime_error("error opening output file: " + outfile);
 
   // set the threads for the input file decompression
   if (n_threads > 1) {
-    if (in.is_bgzf()) tp.set_io(in);
+    if (in.is_bgzf())
+      tp.set_io(in);
     tp.set_io(out);
   }
 
-  static constexpr uint32_t output_buffer_size = 1024;
-  vector<char> buf(output_buffer_size, '\0');
+  static constexpr std::uint32_t output_buffer_size = 1024;
+  std::vector<char> buf(output_buffer_size, '\0');
 
-  kstring_t line{0, 0, nullptr};
+  kstring_t line = KS_INITIALIZE;
   const int ret = ks_resize(&line, output_buffer_size);
-  if (ret) throw runtime_error("failed to acquire buffer");
+  if (ret)
+    throw std::runtime_error("failed to acquire buffer");
 
-  string chrom_name;
-  uint32_t nm_sz{};
-  int32_t prev_chr_id = -1;
-  uint64_t pos = num_lim<uint64_t>::max();
+  std::string chrom_name;
+  std::uint32_t nm_sz{};
+  std::int32_t prev_chr_id = -1;
+  std::uint64_t pos = std::numeric_limits<std::uint64_t>::max();
 
   // ADS: this is probably a poor strategy since we already would know
   // the index of the chrom sequence in the vector.
@@ -524,12 +526,12 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms,
 
     if (!std::isdigit(line.s[0])) {  // check if we have a chrom line
 
-      if (!require_covered && pos != num_lim<uint64_t>::max())
+      if (!require_covered && pos != std::numeric_limits<std::uint64_t>::max())
         write_missing_cpg(nm_sz, *ch_itr, pos + 1, size(*ch_itr), buf, out);
 
-      chrom_name = string{line.s};
+      chrom_name = std::string{line.s};
       nm_sz = size(chrom_name);
-      const int32_t chr_id = get_chrom_id(name_to_id, chrom_name);
+      const std::int32_t chr_id = get_chrom_id(name_to_id, chrom_name);
 
       if (add_missing_chroms)
         write_all_cpgs(verbose, prev_chr_id, chr_id, names, chroms, buf, out);
@@ -537,17 +539,19 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms,
       ch_itr = get_chrom(chrom_lookup, chrom_name);
       pos = 0;
       prev_chr_id = chr_id;
-      if (verbose) cerr << "processing: " << chrom_name << endl;
+      if (verbose)
+        std::cerr << "processing: " << chrom_name << "\n";
 
-      auto res = copy(cbegin(chrom_name), cend(chrom_name), buf.data());
+      auto res =
+        std::copy(std::cbegin(chrom_name), std::cend(chrom_name), buf.data());
       *res = '\t';
     }
     else {
-      uint32_t pos_step = 0, n_meth = 0, n_unmeth = 0;
+      std::uint32_t pos_step = 0, n_meth = 0, n_unmeth = 0;
       const auto end_line = line.s + line.l;
-      auto res = from_chars(line.s, end_line, pos_step);
-      res = from_chars(res.ptr + 1, end_line, n_meth);
-      res = from_chars(res.ptr + 1, end_line, n_unmeth);
+      auto res = std::from_chars(line.s, end_line, pos_step);
+      res = std::from_chars(res.ptr + 1, end_line, n_meth);
+      res = std::from_chars(res.ptr + 1, end_line, n_unmeth);
 
       const auto curr_pos = pos + pos_step;
       if (!require_covered && pos + 1 < curr_pos)
@@ -573,9 +577,9 @@ main_unxcounts(int argc, char *argv[]) {
     bool assume_cpg_only = false;
     size_t n_threads = 1;
 
-    string outfile;
-    string chroms_file;
-    const string description =
+    std::string outfile;
+    std::string chroms_file;
+    const std::string description =
       "convert compressed counts format back to full counts";
 
     /****************** COMMAND LINE OPTIONS ********************/
@@ -592,34 +596,34 @@ main_unxcounts(int argc, char *argv[]) {
                       true, chroms_file);
     opt_parse.add_opt("zip", 'z', "output gzip format", false, compress_output);
     opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
-    std::vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << endl
-           << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.help_message() << "\n"
+                << opt_parse.about_message() << "\n";
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.about_message() << "\n";
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << endl;
+      std::cerr << opt_parse.option_missing_message() << "\n";
       return EXIT_SUCCESS;
     }
     if (leftover_args.size() != 1) {
-      cerr << opt_parse.help_message() << endl;
+      std::cerr << opt_parse.help_message() << "\n";
       return EXIT_SUCCESS;
     }
     if (require_covered && add_missing_chroms) {
-      cerr << "options mutually exclusive: reads and missing" << endl;
+      std::cerr << "options mutually exclusive: reads and missing" << "\n";
       return EXIT_FAILURE;
     }
-    const string filename(leftover_args.front());
+    const std::string filename(leftover_args.front());
     /****************** END COMMAND LINE OPTIONS *****************/
 
     if (require_covered && add_missing_chroms) {
-      cerr << "options mutually exclusive: reads and missing" << endl;
+      std::cerr << "options mutually exclusive: reads and missing" << "\n";
       return EXIT_FAILURE;
     }
 
@@ -631,8 +635,8 @@ main_unxcounts(int argc, char *argv[]) {
       process_sites(verbose, add_missing_chroms, require_covered,
                     compress_output, n_threads, filename, outfile, chroms_file);
   }
-  catch (const std::runtime_error &e) {
-    cerr << e.what() << endl;
+  catch (const std::exception &e) {
+    std::cerr << e.what() << "\n";
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;

From d6e408cf84979389ba39444c5fffcfe05ab011e7 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Wed, 30 Jul 2025 11:13:58 -0700
Subject: [PATCH 2/2] src/utils/kmersites.cpp: adding forgotten header

---
 src/utils/kmersites.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/utils/kmersites.cpp b/src/utils/kmersites.cpp
index 06014369..15318cda 100644
--- a/src/utils/kmersites.cpp
+++ b/src/utils/kmersites.cpp
@@ -24,6 +24,7 @@
 #include <iostream>
 #include <iterator>
 #include <numeric>
+#include <sstream>
 #include <stdexcept>
 #include <string>
 #include <vector>