diff --git a/Makefile.am b/Makefile.am
index c79401c4..9e832e16 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -205,6 +205,7 @@ dnmtools_SOURCES += src/analysis/methstates.cpp
 dnmtools_SOURCES += src/analysis/bsrate.cpp
 dnmtools_SOURCES += src/analysis/methentropy.cpp
 dnmtools_SOURCES += src/analysis/methcounts.cpp
+dnmtools_SOURCES += src/analysis/nanopore.cpp
 dnmtools_SOURCES += src/analysis/roimethstat.cpp
 dnmtools_SOURCES += src/analysis/cpgbins.cpp
 dnmtools_SOURCES += src/analysis/multimethstat.cpp
diff --git a/data/md5sum.txt b/data/md5sum.txt
index 91c77bbf..d796535c 100644
--- a/data/md5sum.txt
+++ b/data/md5sum.txt
@@ -4,7 +4,6 @@ ae05a28de5643a512386e767b3aa963a  tests/araTha1_simulated.hypermr
 75777c209bf820ab700801d87a0a3615  tests/reads.bsrate
 e73facd597c3b903cbfe29afa9f58371  tests/reads.counts
 56575da7d3af9b696258512142903d1e  tests/reads.counts.select
-34bf6331c3e6c59fd8709fe112960ec4  tests/reads.counts.sym
 0f72560aa101e85679783a1ecaf80615  tests/reads.epiread
 9dbd476424d48a8d0f043dfc00af0d23  tests/reads.fmt.srt.sam
 4085cc74b003a918b4a4743fca7922a4  tests/reads.hmr
@@ -12,7 +11,6 @@ e73facd597c3b903cbfe29afa9f58371  tests/reads.counts
 a389e4ee1687895517b0b0d26e9338ee  tests/reads.mstats
 d8856f9731af76b8a4ab3cc7d667cdb2  tests/reads.ustats
 bcbf01be810cbf4051292813eb6b9225  tests/tRex1.idx
-c0951c0682abfb4fb6383c94b96c84cf  tests/tRex1_promoters.roi.bed
 ec6a686617cad31e9f7a37a3d378e6ed  tests/two_epialleles.states
 93e38b20d162062a5d147c4290095a13  tests/mlml.out
 d947fe3d61ef7b1564558a69608f0e64  tests/methylome.pmd
@@ -22,3 +20,5 @@ d6c983396860541a29b730e4acd0ef88  tests/reads.sam
 1251050c362589319ca65151708ff998  tests/reads.fmt.srt.uniq.sam
 534ed1d8320e4b250c6d675bba23fac9  tests/reads.xcounts
 838e61c5a3155db8075eeeec53d25efa  tests/reads.unxcounts
+001b9d966f62fa439b24cf2198cc3de5  tests/reads.counts.sym
+2b8a0406015458be51b8b1c9e58b3602  tests/tRex1_promoters.roi.bed
diff --git a/src/analysis/nanopore.cpp b/src/analysis/nanopore.cpp
new file mode 100644
index 00000000..c53cdfd0
--- /dev/null
+++ b/src/analysis/nanopore.cpp
@@ -0,0 +1,997 @@
+/* nanocount: methylation counts for nanopore data; see docs for 'counts'
+ * command for details.
+ *
+ * Copyright (C) 2025 Andrew D. Smith
+ *
+ * Author: Andrew D. Smith
+ *
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <array>
+#include <cstdint>  // for [u]int[0-9]+_t
+#include <iomanip>
+#include <iostream>
+#include <numeric>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "OptionParser.hpp"
+#include "bam_record_utils.hpp"
+#include "bsutils.hpp"
+#include "counts_header.hpp"
+#include "dnmt_error.hpp"
+
+/* HTSlib */
+#include <htslib/sam.h>
+
+using bamxx::bam_header;
+using bamxx::bam_rec;
+using bamxx::bgzf_file;
+
+[[nodiscard]] static std::string
+get_basecall_model(const bam_header &hdr) {
+  kstring_t ks{};
+
+  ks = {0, 0, nullptr};
+  const auto rg_ret = sam_hdr_find_line_pos(hdr.h, "RG", 0, &ks);
+  if (rg_ret == -2)
+    throw std::runtime_error("failure of sam_hdr_find_line_pos");
+  if (rg_ret == -1)
+    return {};
+  ks_free(&ks);
+
+  ks = {0, 0, nullptr};
+  const auto ds_ret = sam_hdr_find_tag_pos(hdr.h, "RG", 0, "DS", &ks);
+  if (ds_ret == -2)
+    throw std::runtime_error("failure of sam_hdr_find_tag_pos");
+  if (ds_ret == -1)
+    return {};
+  std::istringstream buffer(ks.s);
+  ks_free(&ks);
+
+  const std::vector<std::string> parts(
+    (std::istream_iterator<std::string>(buffer)), {});
+
+  std::string basecall_model;
+  for (const auto &key_val : parts) {
+    const auto equal_pos = key_val.find('=');
+    if (equal_pos == std::string::npos)
+      throw std::runtime_error("malformed DS key=val pair: " + key_val);
+    if (key_val.substr(0, equal_pos) == "basecall_model")
+      return key_val.substr(equal_pos + 1);
+  }
+
+  return {};
+}
+
+// ADS: here the std::uint16_t allows for up to 256 reads, each contributing up
+// to 256 "counts" in the probability encoding.
+typedef std::uint16_t count_type;
+
+[[nodiscard]] static inline bool
+eats_ref(const std::uint32_t c) {
+  return bam_cigar_type(bam_cigar_op(c)) & 2;
+}
+
+[[nodiscard]] static inline bool
+eats_query(const std::uint32_t c) {
+  return bam_cigar_type(bam_cigar_op(c)) & 1;
+}
+
+/* The three functions below here should probably be moved into
+   bsutils.hpp. I am not sure if the DDG function is needed, but it
+   seems like if one considers strand, and the CHH is not symmetric,
+   then one needs this. Also, Qiang should be consulted on this
+   because he spent much time thinking about it in the context of
+   plants. */
+[[nodiscard]] static bool
+is_chh(const std::string &s, std::size_t i) {
+  return i + 2 < std::size(s) && is_cytosine(s[i]) && !is_guanine(s[i + 1]) &&
+         !is_guanine(s[i + 2]);
+}
+
+[[nodiscard]] static bool
+is_ddg(const std::string &s, std::size_t i) {
+  return i + 2 < std::size(s) && !is_cytosine(s[i]) && !is_cytosine(s[i + 1]) &&
+         is_guanine(s[i + 2]);
+}
+
+[[nodiscard]] static bool
+is_c_at_g(const std::string &s, std::size_t i) {
+  return i + 2 < std::size(s) && is_cytosine(s[i]) && !is_cytosine(s[i + 1]) &&
+         !is_guanine(s[i + 1]) && is_guanine(s[i + 2]);
+}
+
+/* Right now the CountSet objects below are much larger than they need
+   to be, for the things we are computing. However, it's not clear
+   that the minimum information would really put the memory
+   requirement of the program into a more reasonable range, so keeping
+   all the information seems reasonable. */
+struct CountSet {
+  std::string
+  tostring() const {
+    // clang-format off
+    std::ostringstream oss;
+    oss << hydroxy_pos << '\t' << hydroxy_neg << '\t'
+        << methyl_pos << '\t' << methyl_neg << '\t'
+        << n_reads_pos << '\t' << n_reads_neg << '\n';
+    // clang-format on
+    return oss.str();
+  }
+  void
+  add_count_pos(const std::uint8_t h, const std::uint8_t m) {
+    hydroxy_pos += h;
+    methyl_pos += m;
+    ++n_reads_pos;
+  }
+  void
+  add_count_neg(const std::uint8_t h, const std::uint8_t m) {
+    hydroxy_neg += h;
+    methyl_neg += m;
+    ++n_reads_neg;
+  }
+  double
+  get_hydroxy(const bool is_c) const {
+    return is_c ? hydroxy_pos : hydroxy_neg;
+  }
+  double
+  get_methyl(const bool is_c) const {
+    return is_c ? methyl_pos : methyl_neg;
+  }
+  double
+  get_mods(const bool is_c) const {
+    return get_hydroxy(is_c) + get_methyl(is_c);
+  }
+  double
+  get_n_reads(const bool is_c) const {
+    return is_c ? n_reads_pos : n_reads_neg;
+  }
+  count_type hydroxy_pos{0};
+  count_type hydroxy_neg{0};
+  count_type methyl_pos{0};
+  count_type methyl_neg{0};
+  count_type n_reads_pos{0};
+  count_type n_reads_neg{0};
+};
+
+/* The "tag" returned by this function should be exclusive, so that
+ * the order of checking conditions doesn't matter. There is also a
+ * bit of a hack in that the unsigned "pos" could wrap, but this still
+ * works as long as the chromosome size is not the maximum size of a
+ * std::size_t.
+ */
+[[nodiscard]] static std::uint32_t
+get_tag_from_genome(const std::string &s, const std::size_t pos) {
+  if (is_cytosine(s[pos])) {
+    if (is_cpg(s, pos))
+      return 0;
+    else if (is_chh(s, pos))
+      return 1;
+    else if (is_c_at_g(s, pos))
+      return 2;
+    else
+      return 3;
+  }
+  if (is_guanine(s[pos])) {
+    if (is_cpg(s, pos - 1))
+      return 0;
+    else if (is_ddg(s, pos - 2))
+      return 1;
+    else if (is_c_at_g(s, pos - 2))
+      return 2;
+    else
+      return 3;
+  }
+  return 4;  // shouldn't be used for anything
+}
+
+static const char *tag_values[] = {
+  "CpG",  // 0
+  "CHH",  // 1
+  "CXG",  // 2
+  "CCG",  // 3
+  "N",    // 4
+};
+
+template <typename T>
+[[nodiscard]] static std::tuple<T, T>
+get_hydroxy_sites(T mod_pos_beg, T mod_pos_end) {
+  const char hydroxy_tag[] = "C+h?";
+  const auto hydroxy_tag_size = 4;
+  auto hydroxy_beg = strstr(mod_pos_beg, hydroxy_tag);
+  if (hydroxy_beg == mod_pos_end)
+    return std::tuple<T, T>{};
+  auto hydroxy_end = std::find(hydroxy_beg, mod_pos_end, ';');
+  if (hydroxy_end == mod_pos_end)
+    return std::tuple<T, T>{};
+  hydroxy_beg += hydroxy_tag_size;
+  return std::tuple<T, T>(hydroxy_beg, hydroxy_end);
+}
+
+template <typename T>
+[[nodiscard]] static std::tuple<T, T>
+get_methyl_sites(T mod_pos_beg, T mod_pos_end) {
+  const char methyl_tag[] = "C+h?";
+  const auto methyl_tag_size = 4;
+  auto methyl_beg = strstr(mod_pos_beg, methyl_tag);
+  if (methyl_beg == mod_pos_end)
+    return std::tuple<T, T>{};
+  auto methyl_end = std::find(methyl_beg, mod_pos_end, ';');
+  if (methyl_end == mod_pos_end)
+    return std::tuple<T, T>{};
+  methyl_beg += methyl_tag_size;
+  return std::tuple<T, T>(methyl_beg, methyl_end);
+}
+
+[[nodiscard]] static std::tuple<char *, char *>
+get_modification_positions(const bamxx::bam_rec &aln) {
+  const auto mm_aux = bam_aux_get(aln.b, "MM");
+  if (mm_aux == nullptr)
+    return {};
+  auto mod_pos_beg = bam_aux2Z(mm_aux);
+  auto mod_pos_end = mod_pos_beg + std::strlen(mod_pos_beg);
+  return {mod_pos_beg, mod_pos_end};
+}
+
+struct mod_prob_buffer {
+  static constexpr auto init_capacity{128 * 1024};
+
+  std::vector<std::uint8_t> hydroxy_probs;
+  std::vector<std::uint8_t> methyl_probs;
+
+  mod_prob_buffer() {
+    methyl_probs.reserve(init_capacity);
+    hydroxy_probs.reserve(init_capacity);
+  }
+
+  bool
+  set_probs(const bamxx::bam_rec &aln) {
+    const auto get_next_mod_pos = [](auto &b, const auto e) -> std::int32_t {
+      const auto isdig = [](const auto x) {
+        return std::isdigit(static_cast<unsigned char>(x));
+      };
+      b = std::find_if(b, e, isdig);
+      if (b == e)
+        return -1;
+      auto r = atoi(b);
+      b = std::find_if_not(b, e, isdig);
+      return r;
+    };
+
+    auto [mod_pos_beg, mod_pos_end] = get_modification_positions(aln);
+    auto [hydroxy_beg, hydroxy_end] =
+      get_hydroxy_sites(mod_pos_beg, mod_pos_end);
+    auto [methyl_beg, methyl_end] = get_methyl_sites(mod_pos_beg, mod_pos_end);
+
+    // check if any are false
+    if (mod_pos_beg == nullptr || hydroxy_beg == nullptr ||
+        methyl_beg == nullptr)
+      return false;
+
+    // assume that hydroxy and methyl both point to CpG sites
+    if (!std::equal(hydroxy_beg, hydroxy_end, methyl_beg, methyl_end))
+      return false;
+
+    const auto mod_prob = bam_aux_get(aln.b, "ML");
+    if (mod_prob == nullptr)
+      return false;
+
+    // number of commas is number of hydroxy substrates = CpG sites
+    const auto n_cpgs = std::count(hydroxy_beg, hydroxy_end, ',');
+
+    const auto qlen = get_l_qseq(aln);
+    const auto seq = bam_get_seq(aln);
+
+    methyl_probs.clear();
+    methyl_probs.resize(qlen, 0);
+
+    hydroxy_probs.clear();
+    hydroxy_probs.resize(qlen, 0);
+
+    std::int32_t delta = get_next_mod_pos(hydroxy_beg, hydroxy_end);
+
+    auto hydroxy_prob_idx = 0;      // start of modifications
+    auto methyl_prob_idx = n_cpgs;  // start methyl after hydroxy
+
+    if (bam_is_rev(aln)) {
+      for (auto i = 0; i < qlen; ++i) {
+        const auto nuc = seq_nt16_str[bam_seqi(seq, qlen - i - 1)];
+        if (nuc == 'G') {
+          if (seq_nt16_str[bam_seqi(seq, qlen - i - 2)] == 'C') {
+            // assume that when delta hits 0 we have a CpG site
+            if (delta != 0)
+              return false;
+            methyl_probs[i] = bam_auxB2i(mod_prob, methyl_prob_idx++);
+            hydroxy_probs[i] = bam_auxB2i(mod_prob, hydroxy_prob_idx++);
+          }
+          --delta;
+          if (delta < 0)
+            delta = get_next_mod_pos(hydroxy_beg, hydroxy_end);
+        }
+      }
+    }
+    else {
+      for (auto i = 0; i + 1 < qlen; ++i) {
+        const auto nuc = seq_nt16_str[bam_seqi(seq, i)];
+        if (nuc == 'C') {
+          if (seq_nt16_str[bam_seqi(seq, i + 1)] == 'G') {
+            // assume that when delta hits 0 we have a CpG site
+            if (delta != 0)
+              return false;
+            methyl_probs[i] = bam_auxB2i(mod_prob, methyl_prob_idx++);
+            hydroxy_probs[i] = bam_auxB2i(mod_prob, hydroxy_prob_idx++);
+          }
+          --delta;
+          if (delta < 0)
+            delta = get_next_mod_pos(hydroxy_beg, hydroxy_end);
+        }
+      }
+    }
+    return true;
+  }
+};
+
+// clang-format off
+static const std::uint8_t enc[] = {
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 16
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 32
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 48
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 64
+  4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,  // 80
+  4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 96
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 112
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 128
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 144
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 160
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 176
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 192
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 208
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 224
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // 240
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4   // 256
+};
+// clang-format on
+
+static constexpr auto n_dinucs = 16u;
+// clang-format off
+static const auto dinucs = std::vector{
+  "AA", "AC", "AG", "AT",
+  "CA", "CC", "CG", "CT",
+  "GA", "GC", "GG", "GT",
+  "TA", "TC", "TG", "TT",
+};
+// clang-format on
+
+struct match_counter {
+  typedef std::array<std::uint64_t, 256> container;
+  container pos{};
+  container neg{};
+
+  void
+  add_pos(const std::uint8_t r1, const std::uint8_t r2, const std::uint8_t q1,
+          const std::uint8_t q2) {
+    const auto r1e = enc[r1];
+    const auto r2e = enc[r2];
+    const auto q1e = enc[q1];
+    const auto q2e = enc[q2];
+    if ((r1e | r2e | q1e | q2e) & 4u)
+      return;
+    pos[(r1e * 64) + (r2e * 16) + (q1e * 4) + (q2e * 1)]++;
+  }
+
+  void
+  add_neg(const std::uint8_t r1, const std::uint8_t r2, const std::uint8_t q1,
+          const std::uint8_t q2) {
+    const auto r1e = enc[r1];
+    const auto r2e = enc[r2];
+    const auto q1e = enc[q1];
+    const auto q2e = enc[q2];
+    if ((r1e | r2e | q1e | q2e) & 4u)
+      return;
+    neg[(r1e * 64) + (r2e * 16) + (q1e * 4) + (q2e * 1)]++;
+  }
+
+  [[nodiscard]] std::string
+  tostring() const {
+    std::ostringstream oss;
+    oss << "target_query_fwd" << '\n';
+    for (auto i = 0u; i < n_dinucs; ++i) {
+      const auto beg = std::cbegin(pos) + (i * n_dinucs);
+      oss << dinucs[i];
+      for (auto itr = beg; itr != beg + n_dinucs; ++itr)
+        oss << '\t' << *itr;
+      oss << '\n';
+    }
+    oss << "target_query_rev" << '\n';
+    for (auto i = 0u; i < n_dinucs; ++i) {
+      const auto beg = std::cbegin(neg) + (i * n_dinucs);
+      oss << dinucs[i];
+      for (auto itr = beg; itr != beg + n_dinucs; ++itr)
+        oss << '\t' << *itr;
+      oss << '\n';
+    }
+    return oss.str();
+  }
+
+  [[nodiscard]] std::string
+  json_row(const container::const_iterator beg) const {
+    std::ostringstream oss;
+    oss << "{";
+    auto j = 0;
+    for (auto itr = beg; itr != beg + n_dinucs; ++itr) {
+      if (j > 0)
+        oss << ",";
+      oss << R"(")" << dinucs[j++] << R"(":")" << *itr << R"(")";
+    }
+    oss << "}";
+    return oss.str();
+  }
+
+  [[nodiscard]] std::string
+  json_table(const container &a) const {
+    std::ostringstream oss;
+    const auto beg = std::cbegin(a);
+    oss << "{";
+    for (auto i = 0u; i < n_dinucs; ++i) {
+      if (i > 0)
+        oss << ",";
+      oss << R"(")" << dinucs[i] << R"(":)" << json_row(beg + (i * n_dinucs));
+    }
+    oss << "}";
+    return oss.str();
+  }
+
+  [[nodiscard]] std::string
+  json() const {
+    // clang-format off
+    return (std::ostringstream() << "{"
+            << R"("map_fwd":)" << json_table(pos) << ","
+            << R"("map_rev":)" << json_table(neg)
+            << "}").str();
+    // clang-format on
+  }
+
+  [[nodiscard]] std::string
+  tostring_frac() const {
+    static constexpr auto width = 8;
+    static constexpr auto prec = 3;
+    std::ostringstream oss;
+    oss.precision(prec);
+    oss.setf(std::ios::fixed, std::ios::floatfield);
+    oss << "map_fwd\n";
+    for (auto i = 0u; i < n_dinucs; ++i) {
+      oss << dinucs[i];
+      const auto beg = std::cbegin(pos) + (i * n_dinucs);
+      const auto tot = std::max(std::accumulate(beg, beg + n_dinucs, 0.0), 1.0);
+      for (auto itr = beg; itr != beg + n_dinucs; ++itr)
+        oss << std::setw(width) << *itr / tot;
+      oss << '\n';
+    }
+    oss << "map_ref\n";
+    for (auto i = 0u; i < n_dinucs; ++i) {
+      oss << dinucs[i];
+      const auto beg = std::cbegin(neg) + (i * n_dinucs);
+      const auto tot = std::max(std::accumulate(beg, beg + n_dinucs, 0.0), 1.0);
+      for (auto itr = beg; itr != beg + n_dinucs; ++itr)
+        oss << std::setw(width) << *itr / tot;
+      oss << '\n';
+    }
+    return oss.str();
+  }
+};
+
+static void
+count_states_pos(const bam_rec &aln, std::vector<CountSet> &counts,
+                 mod_prob_buffer &mod_buf, const std::string &chrom,
+                 match_counter &mc) {
+  /* Move through cigar, reference and read positions without
+     inflating cigar or read sequence */
+  const auto seq = bam_get_seq(aln);
+  const auto beg_cig = bam_get_cigar(aln);
+  const auto end_cig = beg_cig + get_n_cigar(aln);
+  const auto end_ref = std::cend(chrom);
+
+  auto rpos = get_pos(aln);
+  auto ref_itr = std::cbegin(chrom) + rpos;
+
+  if (!mod_buf.set_probs(aln))
+    return;
+
+  auto qpos = 0;                     // to match type with b->core.l_qseq
+  auto q_lim = get_l_qseq(aln) - 1;  // if here, this is >= 0
+
+  auto hydroxy_prob_itr = std::cbegin(mod_buf.hydroxy_probs);
+  auto methyl_prob_itr = std::cbegin(mod_buf.methyl_probs);
+
+  for (auto c_itr = beg_cig; c_itr != end_cig; ++c_itr) {
+    const char op = bam_cigar_op(*c_itr);
+    const std::uint32_t n = bam_cigar_oplen(*c_itr);
+    if (eats_ref(op) && eats_query(op)) {
+      const decltype(qpos) end_qpos = qpos + n;
+      for (; qpos < end_qpos; ++qpos) {
+        const auto r_nuc = *ref_itr++;
+        mc.add_pos(r_nuc, ref_itr == end_ref ? 'N' : *ref_itr,
+                   seq_nt16_str[bam_seqi(seq, qpos)],
+                   qpos == q_lim ? 'N' : seq_nt16_str[bam_seqi(seq, qpos + 1)]);
+        counts[rpos++].add_count_pos(*hydroxy_prob_itr, *methyl_prob_itr);
+        ++methyl_prob_itr;
+        ++hydroxy_prob_itr;
+      }
+    }
+    else if (eats_query(op)) {
+      qpos += n;
+      methyl_prob_itr += n;
+      hydroxy_prob_itr += n;
+    }
+    else if (eats_ref(op)) {
+      rpos += n;
+      ref_itr += n;
+    }
+  }
+  // ADS: somehow previous code included a correction for rpos going
+  // past the end of the chromosome; this should result at least in a
+  // soft-clip by any mapper. I'm not checking it here as even if it
+  // happens I don't want to terminate.
+  assert(qpos == get_l_qseq(aln));
+}
+
+static void
+count_states_neg(const bam_rec &aln, std::vector<CountSet> &counts,
+                 mod_prob_buffer &mod_buf, const std::string &chrom,
+                 match_counter &mc) {
+  /* Move through cigar, reference and (*backward*) through read
+     positions without inflating cigar or read sequence */
+  const auto seq = bam_get_seq(aln);
+  const auto beg_cig = bam_get_cigar(aln);
+  const auto end_cig = beg_cig + get_n_cigar(aln);
+  const auto end_ref = std::cend(chrom);
+  auto rpos = get_pos(aln);
+  auto ref_itr = std::cbegin(chrom) + rpos;
+
+  std::size_t qpos = get_l_qseq(aln);  // to match type with b->core.l_qseq
+  std::size_t q_idx = 0;
+  std::size_t l_qseq = get_l_qseq(aln);
+
+  if (!mod_buf.set_probs(aln))
+    return;
+
+  auto hydroxy_prob_itr = std::cend(mod_buf.hydroxy_probs);
+  auto methyl_prob_itr = std::cend(mod_buf.methyl_probs);
+
+  if (qpos == 0)
+    return;
+  for (auto c_itr = beg_cig; c_itr != end_cig; ++c_itr) {
+    const char op = bam_cigar_op(*c_itr);
+    const std::uint32_t n = bam_cigar_oplen(*c_itr);
+    if (eats_ref(op) && eats_query(op)) {
+      assert(qpos >= n);
+      const std::size_t end_qpos = qpos - n;  // to match type with qpos
+      for (; qpos > end_qpos; --qpos) {
+        const auto r_nuc = *ref_itr++;
+        const auto q_nuc = seq_nt16_str[bam_seqi(seq, q_idx)];
+        ++q_idx;
+        mc.add_neg(r_nuc, ref_itr == end_ref ? 'N' : *ref_itr, q_nuc,
+                   q_idx == l_qseq ? 'N' : seq_nt16_str[bam_seqi(seq, q_idx)]);
+        --methyl_prob_itr;
+        --hydroxy_prob_itr;
+        counts[rpos++].add_count_neg(*hydroxy_prob_itr, *methyl_prob_itr);
+      }
+    }
+    else if (eats_query(op)) {
+      qpos -= n;
+      q_idx += n;
+      methyl_prob_itr -= n;
+      hydroxy_prob_itr -= n;
+    }
+    else if (eats_ref(op)) {
+      rpos += n;
+      ref_itr += n;
+    }
+  }
+
+  /* qpos is unsigned; would wrap around if < 0 */
+  // ADS: Same as count_states_pos; see comment there
+  assert(qpos == 0);
+}
+
+[[nodiscard]] static std::unordered_map<std::int32_t, std::size_t>
+get_tid_to_idx(const bam_header &hdr,
+               const std::unordered_map<std::string, std::size_t> name_to_idx) {
+  std::unordered_map<std::int32_t, std::size_t> tid_to_idx;
+  for (std::int32_t i = 0; i < hdr.h->n_targets; ++i) {
+    // "curr_name" gives a "tid_to_name" mapping allowing to jump
+    // through "name_to_idx" and get "tid_to_idx"
+    const std::string curr_name(hdr.h->target_name[i]);
+    const auto name_itr(name_to_idx.find(curr_name));
+    if (name_itr == end(name_to_idx))
+      throw std::runtime_error("failed to find chrom: " + curr_name);
+    tid_to_idx[i] = name_itr->second;
+  }
+  return tid_to_idx;
+}
+
+[[nodiscard]] static bool
+consistent_targets(
+  const bam_header &hdr,
+  const std::unordered_map<std::int32_t, std::size_t> &tid_to_idx,
+  const std::vector<std::string> &names,
+  const std::vector<std::size_t> &sizes) {
+  const std::size_t n_targets = hdr.h->n_targets;
+  if (n_targets != names.size())
+    return false;
+
+  for (std::size_t tid = 0; tid < n_targets; ++tid) {
+    const std::string tid_name_sam = sam_hdr_tid2name(hdr, tid);
+    const std::size_t tid_size_sam = sam_hdr_tid2len(hdr, tid);
+    const auto idx_itr = tid_to_idx.find(tid);
+    if (idx_itr == std::cend(tid_to_idx))
+      return false;
+    const auto idx = idx_itr->second;
+    if (tid_name_sam != names[idx] || tid_size_sam != sizes[idx])
+      return false;
+  }
+  return true;
+}
+
+struct read_processor {
+  static constexpr auto default_expected_basecall_model =
+    "dna_r10.4.1_e8.2_400bps_hac@v4.3.0";
+
+  bool verbose{};
+  bool show_progress{};
+  bool compress_output{};
+  bool include_header{};
+  bool require_covered{};
+  bool cpg_only{};
+  bool force{};
+  std::uint32_t n_threads{1};
+  int strand{0};
+  std::string expected_basecall_model{};
+
+  [[nodiscard]] std::string
+  tostring() const {
+    std::ostringstream oss;
+    oss << std::boolalpha;
+    oss << "[verbose: " << verbose << "]\n"
+        << "[show_progress: " << show_progress << "]\n"
+        << "[compress_output: " << compress_output << "]\n"
+        << "[include_header: " << include_header << "]\n"
+        << "[require_covered: " << require_covered << "]\n"
+        << "[cpg_only: " << cpg_only << "]\n"
+        << "[force: " << force << "]\n"
+        << "[n_threads: " << n_threads << "]\n"
+        << "[strand: " << strand << "]\n"
+        << "[expected_basecall_model: " << expected_basecall_model_str()
+        << "]\n";
+    return oss.str();
+  }
+
+  read_processor() : expected_basecall_model{default_expected_basecall_model} {}
+
+  [[nodiscard]] std::string
+  expected_basecall_model_str() const {
+    return expected_basecall_model.empty() ? "NA" : expected_basecall_model;
+  }
+
+  void
+  output_skipped_chromosome(
+    const std::int32_t tid,
+    const std::unordered_map<std::int32_t, std::size_t> &tid_to_idx,
+    const bam_header &hdr,
+    const std::vector<std::string>::const_iterator chroms_beg,
+    const std::vector<std::size_t> &chrom_sizes, std::vector<CountSet> &counts,
+    bgzf_file &out) const {
+
+    // get the index of the next chrom sequence
+    const auto chrom_idx = tid_to_idx.find(tid);
+    if (chrom_idx == std::cend(tid_to_idx))
+      throw std::runtime_error("chrom not found: " +
+                               sam_hdr_tid2name(hdr, tid));
+
+    const auto chrom_itr = chroms_beg + chrom_idx->second;
+
+    // reset the counts
+    counts.clear();
+    counts.resize(chrom_sizes[chrom_idx->second]);
+
+    // ADS: 'false' below for require_covered b/c these sites can't be covered.
+    write_output(hdr, out, tid, *chrom_itr, counts);
+  }
+
+  void
+  write_output(const bam_header &hdr, bgzf_file &out, const std::int32_t tid,
+               const std::string &chrom,
+               const std::vector<CountSet> &counts) const {
+    static constexpr auto out_fmt = "%ld\t%c\t%s\t%.6g\t%d\t%.6g\t%.6g\n";
+    static constexpr auto buf_size = 1024;
+    char buffer[buf_size];
+
+    // Put chrom name in buffer and then skip that part for each site because
+    // it doesn't change.
+    const auto chrom_name = sam_hdr_tid2name(hdr.h, tid);
+    if (chrom_name == nullptr)
+      throw std::runtime_error("failed to identify chrom for tid: " +
+                               std::to_string(tid));
+    const auto chrom_name_offset = std::sprintf(buffer, "%s\t", chrom_name);
+    if (chrom_name_offset < 0)
+      throw std::runtime_error("failed to write to output buffer");
+    auto buffer_after_chrom = buffer + chrom_name_offset;
+
+    for (std::size_t i = 0; i < std::size(chrom); ++i) {
+      const char base = chrom[i];
+      if (is_cytosine(base) || is_guanine(base)) {
+
+        const std::uint32_t the_tag = get_tag_from_genome(chrom, i);
+        if (cpg_only && the_tag != 0)
+          continue;
+
+        const bool is_c = is_cytosine(base);
+
+        const std::uint32_t n_reads = counts[i].get_n_reads(is_c);
+        if (require_covered && n_reads == 0)
+          continue;
+        const double denom = std::max(n_reads, 1u);
+        const double hydroxy = counts[i].get_hydroxy(is_c) / 256.0 / denom;
+        const double methyl = counts[i].get_methyl(is_c) / 256.0 / denom;
+        const double mods = counts[i].get_mods(is_c) / 256.0 / denom;
+
+        // clang-format off
+        int r = std::sprintf(buffer_after_chrom, out_fmt,
+                             i,
+                             is_c ? '+' : '-',
+                             tag_values[the_tag],
+                             mods,
+                             n_reads,
+                             hydroxy,
+                             methyl);
+        // clang-format on
+
+        if (r < 0)
+          throw std::runtime_error("failed to write to output buffer");
+        out.write(buffer);
+      }
+    }
+  }
+
+  [[nodiscard]] match_counter
+  operator()(const std::string &infile, const std::string &outfile,
+             const std::string &chroms_file) const {
+    // first get the chromosome names and sequences from the FASTA file
+    std::vector<std::string> chroms, names;
+    read_fasta_file_short_names(chroms_file, names, chroms);
+    for (auto &i : chroms)
+      transform(std::cbegin(i), std::cend(i), std::begin(i),
+                [](const char c) { return std::toupper(c); });
+    if (verbose)
+      std::cerr << "[n chroms in reference: " << chroms.size() << "]"
+                << std::endl;
+    const auto chroms_beg = std::cbegin(chroms);
+
+    std::unordered_map<std::string, std::size_t> name_to_idx;
+    std::vector<std::size_t> chrom_sizes(chroms.size(), 0);
+    for (std::size_t i = 0; i < chroms.size(); ++i) {
+      name_to_idx[names[i]] = i;
+      chrom_sizes[i] = chroms[i].size();
+    }
+
+    bamxx::bam_tpool tp(n_threads);  // Must be destroyed after hts
+
+    // open the hts SAM/BAM input file and get the header
+    bamxx::bam_in hts(infile);
+    if (!hts)
+      throw std::runtime_error("failed to open input file");
+    // load the input file's header
+    bam_header hdr(hts);
+    if (!hdr)
+      throw std::runtime_error("failed to read header");
+
+    std::unordered_map<std::int32_t, std::size_t> tid_to_idx =
+      get_tid_to_idx(hdr, name_to_idx);
+
+    if (!consistent_targets(hdr, tid_to_idx, names, chrom_sizes))
+      throw std::runtime_error("inconsistent reference genome information");
+
+    // open the output file
+    const std::string output_mode = compress_output ? "w" : "wu";
+    bgzf_file out(outfile, output_mode);
+    if (!out)
+      throw std::runtime_error("error opening output file: " + outfile);
+
+    // set the threads for the input file decompression
+    if (n_threads > 1) {
+      tp.set_io(hts);
+      tp.set_io(out);
+    }
+
+    // validate the basecall model
+    const auto basecall_model = get_basecall_model(hdr);
+    if (verbose)
+      std::cerr << "[observed basecall model: "
+                << (basecall_model.empty() ? "NA" : basecall_model) << "]"
+                << std::endl;
+    if (!expected_basecall_model.empty() &&
+        basecall_model != expected_basecall_model) {
+      std::cerr << "failed to match basecall model:" << "\n"
+                << "observed="
+                << (basecall_model.empty() ? "NA" : basecall_model) << "\n"
+                << "expected=" << expected_basecall_model_str() << std::endl;
+      return {};
+    }
+
+    if (include_header)
+      write_counts_header_from_bam_header(hdr, out);
+
+    // this is where all the counts are accumulated
+    std::vector<CountSet> counts;
+
+    // now iterate over the reads, switching chromosomes and writing
+    // output as needed
+    bam_rec aln;
+    std::int32_t prev_tid = -1;
+
+    std::vector<std::string>::const_iterator chrom_itr{};
+
+    match_counter mc;
+    mod_prob_buffer mod_buf;
+
+    while (hts.read(hdr, aln)) {
+      const std::int32_t tid = get_tid(aln);
+      if (get_l_qseq(aln) == 0)
+        continue;
+
+      if (tid == -1)  // ADS: skip reads that have no tid -- they are not mapped
+        continue;
+      if (tid == prev_tid) {
+        const bool is_rev = bam_is_rev(aln);
+        if (is_rev && strand != 1)
+          count_states_neg(aln, counts, mod_buf, *chrom_itr, mc);
+        if (!is_rev && strand != 2)
+          count_states_pos(aln, counts, mod_buf, *chrom_itr, mc);
+      }
+      else {  // chrom has changed, so output results and get the next chrom
+
+        // write output if there is any; counts is empty only for first chrom
+        if (!counts.empty())
+          write_output(hdr, out, prev_tid, *chrom_itr, counts);
+        // make sure reads are sorted chrom tid number in header
+        if (tid < prev_tid) {
+          const std::string message = "SAM file is not sorted "
+                                      "previous tid: " +
+                                      std::to_string(prev_tid) +
+                                      " current tid: " + std::to_string(tid);
+          throw std::runtime_error(message);
+        }
+
+        if (!require_covered)
+          for (auto i = prev_tid + 1; i < tid; ++i)
+            output_skipped_chromosome(i, tid_to_idx, hdr, chroms_beg,
+                                      chrom_sizes, counts, out);
+
+        // get the next chrom to process
+        auto chrom_idx(tid_to_idx.find(tid));
+        if (chrom_idx == end(tid_to_idx))
+          throw std::runtime_error("chromosome not found: " +
+                                   std::string(sam_hdr_tid2name(hdr, tid)));
+        if (show_progress)
+          std::cerr << "processing " << sam_hdr_tid2name(hdr, tid) << std::endl;
+
+        prev_tid = tid;
+        chrom_itr = chroms_beg + chrom_idx->second;
+
+        // reset the counts
+        counts.clear();
+        counts.resize(chrom_sizes[chrom_idx->second]);
+      }
+    }
+    if (!counts.empty())
+      write_output(hdr, out, prev_tid, *chrom_itr, counts);
+
+    // ADS: if some chroms might not be covered by reads, we have to
+    // iterate over what remains
+    if (!require_covered)
+      for (auto i = prev_tid + 1; i < hdr.h->n_targets; ++i)
+        output_skipped_chromosome(i, tid_to_idx, hdr, chroms_beg, chrom_sizes,
+                                  counts, out);
+    return mc;
+  }
+};
+
+int
+main_nanocount(int argc, const char **argv) {
+
+  try {
+
+    read_processor rp;
+
+    std::string chroms_file;
+    std::string outfile;
+    std::string stats_file;
+
+    /****************** COMMAND LINE OPTIONS ********************/
+    OptionParser opt_parse(strip_path(argv[0]),
+                           "get methylation levels from mapped nanopore reads",
+                           "-c <chroms> <mapped-reads>");
+    opt_parse.add_opt("threads", 't', "threads to use (few needed)", false,
+                      rp.n_threads);
+    opt_parse.add_opt("output", 'o', "output file name (default: stdout)",
+                      false, outfile);
+    opt_parse.add_opt("chrom", 'c', "reference genome file (FASTA format)",
+                      true, chroms_file);
+    opt_parse.add_opt("cpg-only", 'n', "print only CpG context cytosines",
+                      false, rp.cpg_only);
+    opt_parse.add_opt("require-covered", 'r', "only output covered sites",
+                      false, rp.require_covered);
+    opt_parse.add_opt("strand", '\0',
+                      "use strand (1=positive, 2=negative, 0=default)", false,
+                      rp.strand);
+    opt_parse.add_opt("stats", 's', "output match/mismatch stats to this file",
+                      false, stats_file);
+    opt_parse.add_opt("force", '\0', "skip consistency checks", false,
+                      rp.force);
+    opt_parse.add_opt("header", 'H', "add a header to identify the reference",
+                      false, rp.include_header);
+    opt_parse.add_opt("zip", 'z', "output gzip format", false,
+                      rp.compress_output);
+    opt_parse.add_opt("progress", '\0', "show progress", false,
+                      rp.show_progress);
+    opt_parse.add_opt("verbose", 'v', "print more run info", false, rp.verbose);
+    std::vector<std::string> leftover_args;
+    opt_parse.parse(argc, argv, leftover_args);
+    if (opt_parse.about_requested() || opt_parse.help_requested() ||
+        leftover_args.empty()) {
+      std::cerr << opt_parse.help_message() << std::endl
+                << opt_parse.about_message() << std::endl;
+      return EXIT_SUCCESS;
+    }
+    if (opt_parse.option_missing()) {
+      std::cerr << opt_parse.option_missing_message() << std::endl;
+      return EXIT_SUCCESS;
+    }
+    const std::string mapped_reads_file = leftover_args.front();
+    /****************** END COMMAND LINE OPTIONS *****************/
+
+    if (rp.force)
+      rp.expected_basecall_model.clear();
+
+    if (rp.n_threads <= 0)
+      throw std::runtime_error("thread count cannot be negative");
+
+    std::ostringstream cmd;
+    copy(argv, argv + argc, std::ostream_iterator<const char *>(cmd, " "));
+
+    // file types from HTSlib use "-" for the filename to go to stdout
+    if (outfile.empty())
+      outfile = "-";
+
+    if (rp.verbose)
+      std::cerr << "[input BAM/SAM file: " << mapped_reads_file << "]\n"
+                << "[output file: " << outfile << "]\n"
+                << "[genome file: " << chroms_file << "]\n"
+                << rp.tostring();
+
+    const auto mc = rp(mapped_reads_file, outfile, chroms_file);
+    if (!stats_file.empty()) {
+      std::ofstream stats_out(stats_file);
+      if (!stats_out)
+        throw std::runtime_error("Error opening stats file: " + stats_file);
+      stats_out << mc.json() << std::endl;
+    }
+  }
+  catch (const std::exception &e) {
+    std::cerr << e.what() << std::endl;
+    return EXIT_FAILURE;
+  }
+  return EXIT_SUCCESS;
+}
diff --git a/src/common/MSite.cpp b/src/common/MSite.cpp
index 27bd1a22..a9275929 100644
--- a/src/common/MSite.cpp
+++ b/src/common/MSite.cpp
@@ -15,26 +15,27 @@
 
 #include "MSite.hpp"
 
-#include <string>
-#include <iostream>
+#include <charconv>
 #include <fstream>
+#include <iostream>
+#include <regex>
 #include <sstream>
 #include <stdexcept>
-#include <regex>
-#include <charconv>
+#include <string>
 
-#include "smithlab_utils.hpp"
 #include "counts_header.hpp"
+#include "smithlab_utils.hpp"
 
-using std::string;
-using std::runtime_error;
-using std::regex_match;
-using std::from_chars;
-using std::find_if;
 using std::cbegin;
 using std::cend;
 using std::end;
+using std::find_if;
+using std::from_chars;
+using std::regex_match;
+using std::runtime_error;
+using std::string;
 
+bool MSite::no_extra_fields = true;
 
 bool
 MSite::initialize(const char *c, const char *c_end) {
@@ -45,7 +46,8 @@ MSite::initialize(const char *c, const char *c_end) {
 
   auto field_s = c;
   auto field_e = find_if(field_s + 1, c_end, is_sep);
-  if (field_e == c_end) failed = true;
+  if (field_e == c_end)
+    failed = true;
 
   {
     const uint32_t d = std::distance(field_s, field_e);
@@ -95,7 +97,7 @@ MSite::initialize(const char *c, const char *c_end) {
 
   {
     const auto [ptr, ec] = from_chars(field_s, c_end, n_reads);
-    failed = failed || (ptr != c_end);
+    failed = failed || (no_extra_fields && ptr != c_end);
   }
   // ADS: the value below would work for a flag
   // pos = std::numeric_limits<decltype(pos)>::max();
@@ -103,36 +105,28 @@ MSite::initialize(const char *c, const char *c_end) {
   return !failed;
 }
 
-
 MSite::MSite(const string &line) {
   if (!initialize(line.data(), line.data() + size(line)))
     throw runtime_error("bad count line: " + line);
 }
 
-
 MSite::MSite(const char *line, const int n) {
   if (!initialize(line, line + n))
     throw runtime_error("bad count line: " + string(line));
 }
 
-
 string
 MSite::tostring() const {
   std::ostringstream oss;
-  oss << chrom << '\t'
-      << pos << '\t'
-      << strand << '\t'
-      << context << '\t'
-      << meth << '\t'
-      << n_reads;
+  oss << chrom << '\t' << pos << '\t' << strand << '\t' << context << '\t'
+      << meth << '\t' << n_reads;
   return oss.str();
 }
 
 size_t
 distance(const MSite &a, const MSite &b) {
-  return a.chrom == b.chrom ?
-    std::max(a.pos, b.pos) - std::min(a.pos, b.pos) :
-    std::numeric_limits<size_t>::max();
+  return a.chrom == b.chrom ? std::max(a.pos, b.pos) - std::min(a.pos, b.pos)
+                            : std::numeric_limits<size_t>::max();
 }
 
 using std::ifstream;
@@ -152,10 +146,8 @@ move_to_start_of_line(ifstream &in) {
     in.clear();
 }
 
-
 void
-find_offset_for_msite(const std::string &chr,
-                      const size_t idx,
+find_offset_for_msite(const std::string &chr, const size_t idx,
                       std::ifstream &site_in) {
 
   site_in.seekg(0, ios_base::beg);
@@ -166,7 +158,7 @@ find_offset_for_msite(const std::string &chr,
   if (end_pos - begin_pos < 2)
     throw runtime_error("empty counts file");
 
-  size_t step_size = (end_pos - begin_pos)/2;
+  size_t step_size = (end_pos - begin_pos) / 2;
 
   site_in.seekg(0, ios_base::beg);
   string low_chr;
@@ -185,7 +177,7 @@ find_offset_for_msite(const std::string &chr,
   size_t pos = step_size;
   site_in.seekg(pos, ios_base::beg);
   move_to_start_of_line(site_in);
-  size_t prev_pos = 0; // keep track of previous position in file
+  size_t prev_pos = 0;  // keep track of previous position in file
 
   // binary search inside sorted file on disk
   // iterate until step size is 0 or positions are identical
@@ -194,8 +186,8 @@ find_offset_for_msite(const std::string &chr,
     // track (mid) position in file to make sure it keeps moving
     prev_pos = pos;
 
-    string mid_chr; // chromosome name at mid point
-    size_t mid_idx = 0; // position at mid point
+    string mid_chr;      // chromosome name at mid point
+    size_t mid_idx = 0;  // position at mid point
     if (!(site_in >> mid_chr >> mid_idx))
       throw runtime_error("failed navigating inside file");
 
@@ -203,10 +195,10 @@ find_offset_for_msite(const std::string &chr,
     // might catch an unsorted file
     if (mid_chr < low_chr || mid_chr > high_chr)
       throw runtime_error("chromosomes unsorted inside file: "
-                          "low=" + low_chr + ",mid=" + mid_chr +
-                          ",high=" + high_chr);
+                          "low=" +
+                          low_chr + ",mid=" + mid_chr + ",high=" + high_chr);
 
-    step_size /= 2; // cut the range in half
+    step_size /= 2;  // cut the range in half
 
     if (chr < mid_chr || (chr == mid_chr && idx <= mid_idx)) {
       // move to the left
@@ -229,8 +221,7 @@ find_offset_for_msite(const std::string &chr,
 using std::unordered_map;
 void
 find_offset_for_msite(const unordered_map<string, uint32_t> &chrom_order,
-                      const std::string &chr,
-                      const size_t idx,
+                      const std::string &chr, const size_t idx,
                       std::ifstream &site_in) {
 
   site_in.seekg(0, ios_base::beg);
@@ -241,7 +232,7 @@ find_offset_for_msite(const unordered_map<string, uint32_t> &chrom_order,
   if (end_pos - begin_pos < 2)
     throw runtime_error("empty counts file");
 
-  size_t step_size = (end_pos - begin_pos)/2;
+  size_t step_size = (end_pos - begin_pos) / 2;
 
   const auto chr_idx_itr = chrom_order.find(chr);
   if (chr_idx_itr == end(chrom_order)) {
@@ -277,7 +268,7 @@ find_offset_for_msite(const unordered_map<string, uint32_t> &chrom_order,
   size_t pos = step_size;
   site_in.seekg(pos, ios_base::beg);
   move_to_start_of_line(site_in);
-  size_t prev_pos = 0; // keep track of previous position in file
+  size_t prev_pos = 0;  // keep track of previous position in file
 
   // binary search inside sorted file on disk
   // iterate until step size is 0 or positions are identical
@@ -286,8 +277,8 @@ find_offset_for_msite(const unordered_map<string, uint32_t> &chrom_order,
     // track (mid) position in file to make sure it keeps moving
     prev_pos = pos;
 
-    string mid_chr; // chromosome name at mid point
-    size_t mid_idx{}; // position at mid point
+    string mid_chr;    // chromosome name at mid point
+    size_t mid_idx{};  // position at mid point
     if (!(site_in >> mid_chr >> mid_idx))
       throw runtime_error("failed navigating inside file");
 
@@ -301,10 +292,12 @@ find_offset_for_msite(const unordered_map<string, uint32_t> &chrom_order,
     using std::to_string;
     if (mid_chr_idx < low_chr_idx || mid_chr_idx > high_chr_idx)
       throw runtime_error("chromosomes unsorted inside file: "
-                          "low=" + to_string(low_chr_idx) + ",mid=" + to_string(mid_chr_idx) +
+                          "low=" +
+                          to_string(low_chr_idx) +
+                          ",mid=" + to_string(mid_chr_idx) +
                           ",high=" + to_string(high_chr_idx));
 
-    step_size /= 2; // cut the range in half
+    step_size /= 2;  // cut the range in half
 
     if (chr_idx < mid_chr_idx || (chr_idx == mid_chr_idx && idx <= mid_idx)) {
       // move to the left
@@ -328,10 +321,12 @@ is_msite_line(const string &line) {
   std::istringstream iss(line);
 
   string chrom;
-  if (!(iss >> chrom)) return false;
+  if (!(iss >> chrom))
+    return false;
 
   int64_t pos = 0;
-  if (!(iss >> pos || pos < 0)) return false;
+  if (!(iss >> pos || pos < 0))
+    return false;
 
   string strand;
   if (!(iss >> strand) || (strand.size() != 1) ||
@@ -341,13 +336,16 @@ is_msite_line(const string &line) {
   string context;
   // ADS: below, allowing any context
   // std::regex pattern("^C[pHWX][GH]$");
-  if (!(iss >> context)) return false;
+  if (!(iss >> context))
+    return false;
 
   double level = 0.0;
-  if (!(iss >> level) || level < 0.0 || level > 1.0) return false;
+  if (!(iss >> level) || level < 0.0 || level > 1.0)
+    return false;
 
   int64_t n_reads = 0;
-  if (!(iss >> n_reads || n_reads < 0)) return false;
+  if (!(iss >> n_reads || n_reads < 0))
+    return false;
 
   string temp;
   if (iss >> temp)
@@ -359,7 +357,8 @@ is_msite_line(const string &line) {
 bool
 is_msite_file(const string &file) {
   bamxx::bgzf_file in(file, "r");
-  if (!in) throw runtime_error("cannot open file: " + file);
+  if (!in)
+    throw runtime_error("cannot open file: " + file);
 
   string line;
   while (getline(in, line) && is_counts_header_line(line))
diff --git a/src/common/MSite.hpp b/src/common/MSite.hpp
index 355aad72..a74fc36a 100644
--- a/src/common/MSite.hpp
+++ b/src/common/MSite.hpp
@@ -18,12 +18,20 @@
 #ifndef MSITE_HPP
 #define MSITE_HPP
 
-#include <string>
+#include <bamxx.hpp>
+
 #include <cmath>
 #include <cstdint>
+#include <sstream>
+#include <string>
 #include <unordered_map>
 
 struct MSite {
+
+  // This defaults to true, and when true MSite lines cannot be parsed if they
+  // have additional columns in the file.
+  static bool no_extra_fields;
+
   std::string chrom{};
   size_t pos{};
   char strand{};
@@ -32,34 +40,37 @@ struct MSite {
   size_t n_reads{};
 
   MSite() = default;
-  MSite(const std::string &chrom,
-        const size_t pos,
-        const char strand,
-        const std::string &context,
-        const double meth,
-        const size_t n_reads) :
-    chrom{chrom}, pos{pos}, strand{strand},
-    context{context}, meth{meth}, n_reads{n_reads} {}
+  MSite(const std::string &chrom, const size_t pos, const char strand,
+        const std::string &context, const double meth, const size_t n_reads) :
+    chrom{chrom}, pos{pos}, strand{strand}, context{context}, meth{meth},
+    n_reads{n_reads} {}
   explicit MSite(const std::string &line);
   explicit MSite(const char *line, const int n);
 
-  bool initialize(const char *c, const char *c_end);
+  bool
+  initialize(const char *c, const char *c_end);
 
-  bool operator<(const MSite &other) const {
+  bool
+  operator<(const MSite &other) const {
     int r = chrom.compare(other.chrom);
-    return (r < 0 ||
-            (r == 0 &&
-             (pos < other.pos ||
-              (pos == other.pos && strand < other.strand))));
+    return (r < 0 || (r == 0 && (pos < other.pos ||
+                                 (pos == other.pos && strand < other.strand))));
   }
 
-  size_t n_meth() const {return std::round(meth*n_reads);}
-  size_t n_unmeth() const {return n_reads - n_meth();}
+  size_t
+  n_meth() const {
+    return std::round(meth * n_reads);
+  }
+  size_t
+  n_unmeth() const {
+    return n_reads - n_meth();
+  }
 
   //////////////////////////////////////////////////////////////
   /// FUNCTIONS BELOW ARE FOR MANIPULATING SYMMETRIC CPG SITES
   //////////////////////////////////////////////////////////////
-  void add(const MSite &other) {
+  void
+  add(const MSite &other) {
     // ADS: possible that this function has specific behavior that
     // should be placed in the 'sym' source, since we might not want
     // this line below to operate generally.
@@ -67,14 +78,15 @@ struct MSite {
       context += 'x';
     // ADS: order matters below as n_reads update invalidates n_meth()
     // function until meth has been updated
-    const size_t total_c_reads = n_meth() + other.n_meth();
+    const double total_c_reads = (meth * n_reads + other.meth * other.n_reads);
     n_reads += other.n_reads;
-    meth = static_cast<double>(total_c_reads)/std::max(1ul, n_reads);
+    meth = total_c_reads / std::max(1ul, n_reads);
   }
 
   // ADS: function below has redundant check for is_cpg, which is
   // expensive and might be ok to remove
-  bool is_mate_of(const MSite &first) {
+  bool
+  is_mate_of(const MSite &first) {
     return (first.pos + 1 == pos && first.is_cpg() && is_cpg() &&
             first.strand == '+' && strand == '-');
   }
@@ -85,39 +97,48 @@ struct MSite {
   /////  CpG within. Also included is a function that tests if a site
   /////  has a mutation.
   ////////////////////////////////////////////////////////////////////////
-  bool is_cpg() const {
+  bool
+  is_cpg() const {
     return context.length() >= 3 &&
-      (context[0] == 'C' && context[1] == 'p' && context[2] == 'G');
+           (context[0] == 'C' && context[1] == 'p' && context[2] == 'G');
   }
-  bool is_chh() const {
+  bool
+  is_chh() const {
     return context.length() >= 3 &&
-      (context[0] == 'C' && context[1] == 'H' && context[2] == 'H');
+           (context[0] == 'C' && context[1] == 'H' && context[2] == 'H');
   }
-  bool is_ccg() const {
+  bool
+  is_ccg() const {
     return context.length() >= 3 &&
-      (context[0] == 'C' && context[1] == 'C' && context[2] == 'G');
+           (context[0] == 'C' && context[1] == 'C' && context[2] == 'G');
   }
-  bool is_cxg() const {
+  bool
+  is_cxg() const {
     return context.length() >= 3 &&
-      (context[0] == 'C' && context[1] == 'X' && context[2] == 'G');
+           (context[0] == 'C' && context[1] == 'X' && context[2] == 'G');
   }
-  bool is_mutated() const {
+  bool
+  is_mutated() const {
     return context.length() == 4 && context[3] == 'x';
   }
 
-  void set_mutated() {
+  void
+  set_mutated() {
     if (!is_mutated())
       context += 'x';
   }
-  void set_unmutated() {
+  void
+  set_unmutated() {
     if (is_mutated())
       context.resize(context.length() - 1);
   }
 
-  std::string tostring() const;
+  std::string
+  tostring() const;
 };
 
-template <class T> T &
+template <class T>
+T &
 operator>>(T &in, MSite &s) {
   std::string line;
   if (getline(in, line))
@@ -125,9 +146,10 @@ operator>>(T &in, MSite &s) {
   return in;
 }
 
-template <class T> T &
+template <class T>
+T &
 operator<<(T &out, const MSite &s) {
-  out << s.tostring(); // seems to be an issue returning this directly
+  out << s.tostring();  // seems to be an issue returning this directly
   return out;
 }
 
@@ -138,15 +160,13 @@ distance(const MSite &a, const MSite &b);
 // file (for the specified stream) that does not precede the specified
 // position given by the (chrom, start_pos) pair.
 void
-find_offset_for_msite(const std::string &chrom,
-                      const size_t start_pos,
+find_offset_for_msite(const std::string &chrom, const size_t start_pos,
                       std::ifstream &site_in);
 
 void
-find_offset_for_msite(const std::unordered_map<std::string, uint32_t> &chrom_order,
-                      const std::string &chr,
-                      const size_t idx,
-                      std::ifstream &site_in);
+find_offset_for_msite(
+  const std::unordered_map<std::string, uint32_t> &chrom_order,
+  const std::string &chr, const size_t idx, std::ifstream &site_in);
 
 // ADS: using the functions below for now for static polymorphism
 // because I don't want to have insertion and extraction operators
@@ -154,9 +174,6 @@ find_offset_for_msite(const std::unordered_map<std::string, uint32_t> &chrom_ord
 // if we are going to use them, we need to be aware of which functions
 // we are calling, and not just do it unconsciously.
 
-#include <bamxx.hpp>
-#include <sstream>
-
 inline bamxx::bgzf_file &
 write_site(bamxx::bgzf_file &f, const MSite &s) {
   // ADS: to slow??
diff --git a/src/common/bam_record_utils.cpp b/src/common/bam_record_utils.cpp
index ec92f811..69f041cd 100644
--- a/src/common/bam_record_utils.cpp
+++ b/src/common/bam_record_utils.cpp
@@ -980,7 +980,8 @@ to_string(const bam_header &hdr, const bam_rec &aln) {
   if (ret < 0) {
     throw runtime_error("Can't format record: " + to_string(hdr, aln));
   }
+  const std::string s = string(ks.s);
   if (ks.s != nullptr)
     free(ks.s);
-  return string(ks.s);
+  return s;
 }
diff --git a/src/dnmtools.cpp b/src/dnmtools.cpp
index 5ba90743..ff5881d7 100644
--- a/src/dnmtools.cpp
+++ b/src/dnmtools.cpp
@@ -24,8 +24,8 @@
 #include <vector>
 
 using std::begin;
-using std::end;
 using std::cout;
+using std::end;
 using std::endl;
 using std::pair;
 using std::string;
@@ -39,7 +39,8 @@ struct dnmtools_command {
   string description;
   std::function<int(const int, const char **)> fun;
 
-  auto operator()(const int argc, const char **argv) const -> int {
+  auto
+  operator()(const int argc, const char **argv) const -> int {
     return fun(argc - 1, argv + 1);
   }
 };
@@ -64,6 +65,8 @@ simreads(int argc, const char **argv);
 int
 main_counts(int argc, const char **argv);
 int
+main_nanocount(int argc, const char **argv);
+int
 main_allelicmeth(int argc, const char **argv);
 int
 main_amrfinder(int argc, const char **argv);
@@ -145,7 +148,8 @@ print_help(
        << "Commands:" << endl;
   for (auto &&g : command_groups) {
     cout << "  " << g.first << ":" << endl;
-    for (auto &&c : g.second) cout << c << endl;
+    for (auto &&c : g.second)
+      cout << c << endl;
     cout << endl;
   }
 }
@@ -154,7 +158,7 @@ int
 main(int argc, const char **argv) {
   try {
     vector<pair<string, vector<dnmtools_command>>> command_groups = {
-// clang-format off
+      // clang-format off
 {{"mapping",
  {{{"abismal",    "map FASTQ reads to a FASTA reference genome or an index", abismal},
    {"abismalidx", "convert a FASTA reference genome to an abismal index",    abismalidx},
@@ -165,6 +169,7 @@ main(int argc, const char **argv) {
    {"uniq",      "remove duplicate reads from sorted mapped reads",           main_uniq},
    {"bsrate",    "compute the BS conversion rate from BS-seq reads mapped to a genome",  main_bsrate},
    {"counts",    "get methylation levels from mapped WGBS reads",             main_counts},
+   {"counts-nano", "get methylation levels from mapped nanopore reads",       main_nanocount},
    {"sym",       "get CpG sites and make methylation levels symmetric",       main_symmetric_cpgs},
    {"levels",    "compute methylation summary statistics from a counts file", main_levels}}}},
 
@@ -208,7 +213,7 @@ main(int argc, const char **argv) {
    {"unxcounts",     "reverse the xcounts process yielding a counts file", main_unxcounts},
    {"selectsites",   "sites inside a set of genomic intervals", main_selectsites},
    {"kmersites",     "make track file for sites matching kmer", kmersites}}}}}};
-// clang-format on
+    // clang-format on
 
     if (argc < 2) {
       print_help(command_groups);
@@ -221,11 +226,11 @@ main(int argc, const char **argv) {
 
     for (auto &g : command_groups) {
       const auto the_cmd = find_if(begin(g.second), end(g.second), has_tag);
-      if (the_cmd != end(g.second)) return (*the_cmd)(argc, argv);
+      if (the_cmd != end(g.second))
+        return (*the_cmd)(argc, argv);
     }
 
     std::cerr << "ERROR: invalid command " << argv[1] << std::endl;
-
   }
   catch (const std::exception &e) {
     std::cerr << "ERROR:\t" << e.what() << endl;
diff --git a/src/utils/symmetric-cpgs.cpp b/src/utils/symmetric-cpgs.cpp
index b1d0cd9f..5b7b9620 100644
--- a/src/utils/symmetric-cpgs.cpp
+++ b/src/utils/symmetric-cpgs.cpp
@@ -1,8 +1,7 @@
 /* symmetric-cpgs: extract the CpG sites from a methcounts output
  * file and produce a new one with the CpGs treated unstranded.
  *
- * Copyright (C) 2014 University of Southern California and
- *                    Andrew D. Smith
+ * Copyright (C) 2014-2025 Andrew D. Smith
  *
  * Authors: Andrew D. Smith
  *
@@ -56,7 +55,8 @@ ensure_positive_cpg(MSite &s) {
   s.strand = '+';
 }
 
-template<class T> static std::tuple<MSite, bool>
+template <class T>
+static std::tuple<MSite, bool>
 get_first_site(T &in, T &out) {
   bool prev_is_cpg = false;
   MSite prev_site;
@@ -68,14 +68,16 @@ get_first_site(T &in, T &out) {
     }
     else {
       prev_site.initialize(line.data(), line.data() + size(line));
-      if (prev_site.is_cpg()) prev_is_cpg = true;
+      if (prev_site.is_cpg())
+        prev_is_cpg = true;
       within_header = false;
     }
   }
   return {prev_site, prev_is_cpg};
 }
 
-template<class T> static bool
+template <class T>
+static bool
 process_sites(const bool verbose, T &in, T &out) {
 
   // get the first site while dealing with the header
@@ -90,7 +92,8 @@ process_sites(const bool verbose, T &in, T &out) {
   while (read_site(in, curr_site)) {
     const bool same_chrom = prev_site.chrom == curr_site.chrom;
     if (same_chrom) {
-      if (curr_site.pos <= prev_site.pos) return false;
+      if (curr_site.pos <= prev_site.pos)
+        return false;
       if (prev_is_cpg) {
         if (curr_site.is_mate_of(prev_site))
           curr_site.add(prev_site);
@@ -103,7 +106,8 @@ process_sites(const bool verbose, T &in, T &out) {
     else {
       if (verbose)
         cerr << "processing: " << curr_site.chrom << endl;
-      if (chroms_seen.find(curr_site.chrom) != cend(chroms_seen)) return false;
+      if (chroms_seen.find(curr_site.chrom) != cend(chroms_seen))
+        return false;
       chroms_seen.insert(curr_site.chrom);
 
       if (prev_is_cpg) {
@@ -129,6 +133,7 @@ main_symmetric_cpgs(int argc, const char **argv) {
     string outfile{"-"};
     bool verbose = false;
     bool compress_output = false;
+    bool allow_extra_fields = false;
     int32_t n_threads = 1;
 
     const string description =
@@ -141,6 +146,8 @@ main_symmetric_cpgs(int argc, const char **argv) {
                       outfile);
     opt_parse.add_opt("threads", 't', "number of threads", false, n_threads);
     opt_parse.add_opt("zip", 'z', "output gzip format", false, compress_output);
+    opt_parse.add_opt("relaxed", '\0', "allow extra fields in input", false,
+                      allow_extra_fields);
     opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
     std::vector<string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
@@ -164,20 +171,26 @@ main_symmetric_cpgs(int argc, const char **argv) {
     const string filename(leftover_args.front());
     /****************** END COMMAND LINE OPTIONS *****************/
 
-    if (n_threads <= 0) throw runtime_error("threads must be positive");
+    MSite::no_extra_fields = (allow_extra_fields == false);
+
+    if (n_threads <= 0)
+      throw runtime_error("threads must be positive");
     bamxx::bam_tpool tp(n_threads);
 
     // const bool show_progress = VERBOSE && isatty(fileno(stderr));
     bgzf_file in(filename, "r");
-    if (!in) throw runtime_error("could not open file: " + filename);
+    if (!in)
+      throw runtime_error("could not open file: " + filename);
 
     // open the output file
     const string output_mode = compress_output ? "w" : "wu";
     bamxx::bgzf_file out(outfile, output_mode);
-    if (!out) throw runtime_error("error opening output file: " + outfile);
+    if (!out)
+      throw runtime_error("error opening output file: " + outfile);
 
     if (n_threads > 1) {
-      if (in.is_bgzf()) tp.set_io(in);
+      if (in.is_bgzf())
+        tp.set_io(in);
       tp.set_io(out);
     }
 
@@ -187,7 +200,8 @@ main_symmetric_cpgs(int argc, const char **argv) {
       cerr << "sites are not sorted in: " << filename << endl;
       namespace fs = std::filesystem;
       const fs::path outpath{outfile};
-      if (fs::exists(outpath)) fs::remove(outpath);
+      if (fs::exists(outpath))
+        fs::remove(outpath);
       return EXIT_FAILURE;
     }
   }