From cef65e53a64906b53cbfb0521563d6e539df06bf Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Tue, 29 Jul 2025 18:47:57 -0700 Subject: [PATCH 1/2] Updating coding style in several sources --- src/utils/kmersites.cpp | 198 +++++++++++-------- src/utils/recovered.cpp | 290 +++++++++++++-------------- src/utils/uniq.cpp | 311 ++++++++++++++--------------- src/utils/unxcounts.cpp | 422 ++++++++++++++++++++-------------------- 4 files changed, 624 insertions(+), 597 deletions(-) diff --git a/src/utils/kmersites.cpp b/src/utils/kmersites.cpp index 19ab4fc4..06014369 100644 --- a/src/utils/kmersites.cpp +++ b/src/utils/kmersites.cpp @@ -1,6 +1,6 @@ -/* kmersites: a program to generate a wiggle format file (using the - * UCSC Genome Browser wiggle format) to indicate the location of - * sites matching a specific k-mer +/* kmersites: a program to generate a wiggle format file (using the UCSC + * Genome Browser wiggle format) to indicate the location of sites matching a + * specific k-mer * * Copyright (C) 2023 Andrew D. Smith * @@ -17,36 +17,26 @@ * General Public License for more details. */ -#include -#include +#include +#include // for [u]int[0-9]+_t +#include +#include #include +#include #include #include -#include // for [u]int[0-9]+_t -#include -#include -#include +#include +#include #include "OptionParser.hpp" -#include "dnmt_error.hpp" #include "smithlab_os.hpp" #include -namespace fs = std::filesystem; - -using bamxx::bgzf_file; - -using std::string; -using std::vector; -using std::cerr; -using std::endl; -using std::to_string; - static inline auto -process_chrom_wig(const string &kmer, const int offset, const string &name, - const string &chrom, bgzf_file &out) -> void { - +process_chrom_wig(const std::string &kmer, const int offset, + const std::string &name, const std::string &chrom, + bamxx::bgzf_file &out) -> void { static const auto variable_step_chrom_header = "variableStep chrom="; out.write(variable_step_chrom_header + name + "\n"); @@ -54,138 +44,176 @@ process_chrom_wig(const string &kmer, const int offset, const string &name, const auto kmer_size = size(kmer); const auto chrom_size = size(chrom); if (kmer_size > chrom_size) - throw dnmt_error("kmer size " + to_string(kmer_size) + - " larger than chrom size " + to_string(chrom_size)); + throw std::runtime_error("kmer size " + std::to_string(kmer_size) + + " larger than chrom size " + + std::to_string(chrom_size)); - const auto beg_kmer = cbegin(kmer); - const auto end_kmer = cend(kmer); + const auto beg_kmer = std::cbegin(kmer); + const auto end_kmer = std::cend(kmer); - const auto end_chrom = cend(chrom); - auto chrom_itr = cbegin(chrom); + const auto end_chrom = std::cend(chrom); + auto chrom_itr = std::cbegin(chrom); auto chrom_itr_k = chrom_itr + kmer_size; auto pos = 0; while (chrom_itr_k != end_chrom) { if (std::equal(beg_kmer, end_kmer, chrom_itr++, chrom_itr_k++)) - out.write(to_string(pos + offset) + "\t1\n"); + out.write(std::to_string(pos + offset) + "\t1\n"); ++pos; } } +[[nodiscard]] static auto +read_fasta_file(const std::string &filename) + -> std::tuple, std::vector> { + + std::ifstream in(filename); + if (!in) + throw std::runtime_error("cannot open input file " + filename); + + std::vector names; + std::vector sequences; + + std::string line; + while (std::getline(in, line)) { + if (line[0] == '>') { + const auto first_space = line.find_first_of(" \t", 1); + if (first_space == std::string::npos) + names.push_back(line.substr(1)); + else + names.push_back(line.substr(1, first_space - 1)); + sequences.emplace_back(); + } + else + sequences.back() += line; + } + return {names, sequences}; +} + static inline auto -process_chrom_with_named_lines(const string &kmer, const int offset, - const string &name, const string &chrom, - bgzf_file &out) -> void { +process_chrom_with_named_lines(const std::string &kmer, const int offset, + const std::string &name, + const std::string &chrom, + bamxx::bgzf_file &out) { const auto kmer_size = size(kmer); const auto chrom_size = size(chrom); if (kmer_size > chrom_size) - throw dnmt_error("kmer size " + to_string(kmer_size) + - " larger than chrom size " + to_string(chrom_size)); + throw std::runtime_error("kmer size " + std::to_string(kmer_size) + + " larger than chrom size " + + std::to_string(chrom_size)); - const auto beg_kmer = cbegin(kmer); - const auto end_kmer = cend(kmer); + const auto beg_kmer = std::cbegin(kmer); + const auto end_kmer = std::cend(kmer); - const auto end_chrom = cend(chrom); - auto chrom_itr = cbegin(chrom); + const auto end_chrom = std::cend(chrom); + auto chrom_itr = std::cbegin(chrom); auto chrom_itr_k = chrom_itr + kmer_size; auto pos = 0; while (chrom_itr_k != end_chrom) { if (std::equal(beg_kmer, end_kmer, chrom_itr++, chrom_itr_k++)) - out.write(name + "\t" + to_string(pos + offset) + "\t1\n"); + out.write(name + "\t" + std::to_string(pos + offset) + "\t1\n"); ++pos; } } +[[nodiscard]] static inline auto +bad_dna_kmer(const std::string &kmer) -> bool { + const auto x = + std::find_if(std::cbegin(kmer), std::cend(kmer), [](const auto c) { + return c != 'A' && c != 'C' && c != 'G' && c != 'T'; + }); + return x != std::cend(kmer); +} + auto kmersites(const int argc, char *argv[]) -> int { try { - bool verbose = false; - bool show_progress = false; - bool compress_output = false; - bool name_each_line = false; + bool verbose{false}; + bool show_progress{false}; + bool compress_output{false}; + bool name_each_line{false}; - string kmer = "CG"; - string outfile; - // int n_threads = 1; + std::string kmer = "CG"; + std::string outfile; int offset = 1; /****************** COMMAND LINE OPTIONS ********************/ - OptionParser opt_parse(fs::path(string(*argv)).filename(), - "get sites matching kmer", + OptionParser opt_parse("dnmtools kmersites", "get sites matching kmer", ""); - // opt_parse.add_opt("threads", 't', "threads to use (few needed)", - // false, n_threads); opt_parse.add_opt("output", 'o', "output file name (default: stdout)", false, outfile); - opt_parse.add_opt("offset", 'O', "offset within kmer to report", - false, offset); + opt_parse.add_opt("offset", 'O', "offset within kmer to report", false, + offset); opt_parse.add_opt("kmer", 'k', "kmer to report", false, kmer); opt_parse.add_opt("zip", 'z', "output gzip format", false, compress_output); opt_parse.add_opt("name-each-line", '\0', "name each line with chrom", false, name_each_line); opt_parse.add_opt("progress", '\0', "show progress", false, show_progress); opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose); - vector leftover_args; + std::vector leftover_args; opt_parse.parse(argc, argv, leftover_args); if (opt_parse.about_requested() || opt_parse.help_requested() || leftover_args.empty()) { - cerr << opt_parse.help_message() << endl - << opt_parse.about_message() << endl; + std::cerr << opt_parse.help_message() << '\n' + << opt_parse.about_message() << '\n'; return EXIT_SUCCESS; } if (opt_parse.option_missing()) { - cerr << opt_parse.option_missing_message() << endl; + std::cerr << opt_parse.option_missing_message() << '\n'; return EXIT_SUCCESS; } - const string chroms_file = leftover_args.front(); + const std::string chroms_file = leftover_args.front(); /****************** END COMMAND LINE OPTIONS *****************/ - if (offset < 0) - throw dnmt_error("offset must be non-negative (specified=" + - to_string(offset)); + if (bad_dna_kmer(kmer)) { + std::cerr << "invalid DNA kmer: " << kmer << "\n"; + return EXIT_FAILURE; + } - // if (n_threads < 0) - // throw dnmt_error("thread count cannot be negative"); + if (offset < 0) + throw std::runtime_error("offset must be non-negative (specified=" + + std::to_string(offset) + ")"); std::ostringstream cmd; - copy(argv, argv + argc, std::ostream_iterator(cmd, " ")); + std::copy(argv, argv + argc, std::ostream_iterator(cmd, " ")); // file types from HTSlib use "-" for the filename to go to stdout - if (outfile.empty()) outfile = "-"; + if (outfile.empty()) + outfile = "-"; if (verbose) - cerr << "[input fastq file: " << chroms_file << "]" << endl - << "[output file: " << outfile << "]" << endl - << "[output format: " << (compress_output ? "bgzf" : "text") << "]" - << endl - // << "[threads requested: " << n_threads << "]" << endl - << "[k-mer to report: " << kmer << "]" << endl - << "[command line: \"" << cmd.str() << "\"]" << endl; - - vector names, chroms; - read_fasta_file_short_names(chroms_file, names, chroms); + std::cerr << "[input fastq file: " << chroms_file << "]\n" + << "[output file: " << outfile << "]\n" + << "[output format: " << (compress_output ? "bgzf" : "text") + << "]\n" + << "[k-mer sequence to report: " << kmer << "]\n" + << "[command line: " << cmd.str() << "]\n"; + + auto [names, chroms] = read_fasta_file(chroms_file); for (auto &chrom : chroms) - std::transform(cbegin(chrom), cend(chrom), begin(chrom), + std::transform(std::cbegin(chrom), std::cend(chrom), std::begin(chrom), [](const char c) { return std::toupper(c); }); // open the output file - const auto output_mode = compress_output ? "w" : "wu"; - bamxx::bgzf_file out(outfile, output_mode); - if (!out) throw dnmt_error("error opening output file: " + outfile); - - for (auto i = 0u; i < size(names); ++i) { - if (show_progress) cerr << "processing: " << names[i] << endl; + bamxx::bgzf_file out(outfile, compress_output ? "w" : "wu"); + if (!out) + throw std::runtime_error("error opening output file: " + outfile); + + auto chrom_itr = std::cbegin(chroms); + for (const auto &name : names) { + if (show_progress) + std::cerr << "processing: " << name << '\n'; if (name_each_line) - process_chrom_with_named_lines(kmer, offset, names[i], chroms[i], out); + process_chrom_with_named_lines(kmer, offset, name, *chrom_itr++, out); else - process_chrom_wig(kmer, offset, names[i], chroms[i], out); + process_chrom_wig(kmer, offset, name, *chrom_itr++, out); } } catch (const std::exception &e) { - cerr << e.what() << endl; + std::cerr << e.what() << '\n'; return EXIT_FAILURE; } return EXIT_SUCCESS; diff --git a/src/utils/recovered.cpp b/src/utils/recovered.cpp index 9775f9c4..7e834719 100644 --- a/src/utils/recovered.cpp +++ b/src/utils/recovered.cpp @@ -1,95 +1,86 @@ -/* recovered: for all sites not present in a counts file, add those - * sites as non-covered and with the appropriate context. +/* recovered: for all sites not present in a counts file, add those sites as + * non-covered and with the appropriate context. * * Copyright (C) 2023 Andrew D. Smith * * Authors: Andrew D. Smith * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. */ -#include -#include -#include -#include -#include -#include +#include "MSite.hpp" +#include "bsutils.hpp" +#include "counts_header.hpp" + #include // from smithlab_cpp #include "OptionParser.hpp" -#include "smithlab_utils.hpp" #include "smithlab_os.hpp" -#include "bsutils.hpp" -#include "dnmt_error.hpp" -#include "counts_header.hpp" - -#include "MSite.hpp" - -using std::string; -using std::vector; -using std::cout; -using std::cerr; -using std::endl; -using std::unordered_map; -using std::unordered_set; -using std::pair; -using std::numeric_limits; -using std::runtime_error; - -using bamxx::bgzf_file; +#include "smithlab_utils.hpp" -template using num_lim = std::numeric_limits; +#include +#include +#include +#include +#include +#include +#include static void -verify_chrom_orders(const bool verbose, const uint32_t n_threads, - const string &filename, - const unordered_map &chroms_order) { - bgzf_file in(filename, "r"); - if (!in) throw runtime_error("bad file: " + filename); +verify_chrom_orders( + const bool verbose, const std::uint32_t n_threads, + const std::string &filename, + const std::unordered_map &chroms_order) { + bamxx::bgzf_file in(filename, "r"); + if (!in) + throw std::runtime_error("bad file: " + filename); bamxx::bam_tpool tp(n_threads); // set the threads for the input file decompression if (n_threads > 1 && in.is_bgzf()) tp.set_io(in); - unordered_set chroms_seen; - string line; - string prev_chrom; + std::unordered_set chroms_seen; + std::string line; + std::string prev_chrom; - int32_t prev_idx = -1; + std::int32_t prev_idx = -1; while (getline(in, line)) { - if (is_counts_header_line(line)) continue; + if (is_counts_header_line(line)) + continue; line.resize(line.find_first_of(" \t")); if (line != prev_chrom) { - if (verbose) cerr << "verifying: " << line << endl; + if (verbose) + std::cerr << "verifying: " << line << "\n"; const auto idx_itr = chroms_order.find(line); if (idx_itr == cend(chroms_order)) - throw runtime_error("chrom not found genome file: " + line); + throw std::runtime_error("chrom not found genome file: " + line); const auto idx = idx_itr->second; if (chroms_seen.find(idx) != end(chroms_seen)) - throw runtime_error("chroms out of order in: " + filename); + throw std::runtime_error("chroms out of order in: " + filename); chroms_seen.insert(idx); if (idx < prev_idx) - throw runtime_error("inconsistent chromosome order at: " + line); + throw std::runtime_error("inconsistent chromosome order at: " + line); prev_idx = idx; std::swap(line, prev_chrom); } } - if (verbose) cerr << "chrom orders are consistent" << endl; + if (verbose) + std::cerr << "chrom orders are consistent\n"; } struct quick_buf : public std::ostringstream, @@ -97,20 +88,21 @@ struct quick_buf : public std::ostringstream, // ADS: By user ecatmur on SO; very fast. Seems to work... quick_buf() { // ...but this seems to depend on data layout - static_cast&>(*this).rdbuf(this); + static_cast &>(*this).rdbuf(this); } - void clear() { + void + clear() { // reset buffer pointers (member functions) setp(pbase(), pbase()); } - char const* c_str() { + char const * + c_str() { /* between c_str and insertion make sure to clear() */ *pptr() = '\0'; return pbase(); } }; - /* The three functions below here should probably be moved into bsutils.hpp. I am not sure if the DDG function is needed, but it seems like if one considers strand, and the CHH is not symmetric, @@ -118,132 +110,128 @@ struct quick_buf : public std::ostringstream, because he spent much time thinking about it in the context of plants. */ static inline bool -is_chh(const std::string &s, size_t i) { - return (i < (s.length() - 2)) && - is_cytosine(s[i]) && - !is_guanine(s[i + 1]) && - !is_guanine(s[i + 2]); +is_chh(const std::string &s, std::size_t i) { + return (i < (s.length() - 2)) && is_cytosine(s[i]) && !is_guanine(s[i + 1]) && + !is_guanine(s[i + 2]); } - static inline bool -is_ddg(const std::string &s, size_t i) { - return (i < (s.length() - 2)) && - !is_cytosine(s[i]) && - !is_cytosine(s[i + 1]) && - is_guanine(s[i + 2]); +is_ddg(const std::string &s, std::size_t i) { + return (i < (s.length() - 2)) && !is_cytosine(s[i]) && + !is_cytosine(s[i + 1]) && is_guanine(s[i + 2]); } - static inline bool -is_c_at_g(const std::string &s, size_t i) { - return (i < (s.length() - 2)) && - is_cytosine(s[i]) && - !is_cytosine(s[i + 1]) && - !is_guanine(s[i + 1]) && - is_guanine(s[i + 2]); +is_c_at_g(const std::string &s, std::size_t i) { + return (i < (s.length() - 2)) && is_cytosine(s[i]) && + !is_cytosine(s[i + 1]) && !is_guanine(s[i + 1]) && + is_guanine(s[i + 2]); } /* The "tag" returned by this function should be exclusive, so that * the order of checking conditions doesn't matter. There is also a * bit of a hack in that the unsigned "pos" could wrap, but this still * works as long as the chromosome size is not the maximum size of a - * size_t. + * std::size_t. */ -static inline uint32_t -get_tag_from_genome_c(const string &s, const size_t pos) { - if (is_cpg(s, pos)) return 0; - else if (is_chh(s, pos)) return 1; - else if (is_c_at_g(s, pos)) return 2; +static inline std::uint32_t +get_tag_from_genome_c(const std::string &s, const std::size_t pos) { + if (is_cpg(s, pos)) + return 0; + else if (is_chh(s, pos)) + return 1; + else if (is_c_at_g(s, pos)) + return 2; return 3; } -static inline uint32_t -get_tag_from_genome_g(const string &s, const size_t pos) { - if (is_cpg(s, pos - 1)) return 0; - else if (is_ddg(s, pos - 2)) return 1; - else if (is_c_at_g(s, pos - 2)) return 2; +static inline std::uint32_t +get_tag_from_genome_g(const std::string &s, const std::size_t pos) { + if (is_cpg(s, pos - 1)) + return 0; + else if (is_ddg(s, pos - 2)) + return 1; + else if (is_c_at_g(s, pos - 2)) + return 2; return 3; } static const char *tag_values[] = { - "CpG", // 0 - "CHH", // 1 - "CXG", // 2 - "CCG", // 3 - "N" // 4 + "CpG", // 0 + "CHH", // 1 + "CXG", // 2 + "CCG", // 3 + "N" // 4 }; static void -write_missing_sites(const string &name, const string &chrom, - const uint64_t start_pos, const uint64_t end_pos, - bgzf_file &out) { - const string name_tab = name + "\t"; +write_missing_sites(const std::string &name, const std::string &chrom, + const std::uint64_t start_pos, const std::uint64_t end_pos, + bamxx::bgzf_file &out) { + const std::string name_tab = name + "\t"; quick_buf buf; for (auto pos = start_pos; pos < end_pos; ++pos) { const char base = chrom[pos]; if (is_cytosine(base) || is_guanine(base)) { const bool is_c = is_cytosine(base); - const uint32_t the_tag = is_c ? get_tag_from_genome_c(chrom, pos) - : get_tag_from_genome_g(chrom, pos); + const std::uint32_t the_tag = is_c ? get_tag_from_genome_c(chrom, pos) + : get_tag_from_genome_g(chrom, pos); buf.clear(); - buf << name_tab << pos - << (is_c ? "\t+\t" : "\t-\t") - << tag_values[the_tag] - << "\t0\t0\n"; + buf << name_tab << pos << (is_c ? "\t+\t" : "\t-\t") + << tag_values[the_tag] << "\t0\t0\n"; if (!out.write(buf.c_str(), buf.tellp())) - throw dnmt_error("error writing output"); + throw std::runtime_error("error writing output"); } } } static void -write_current_site(const MSite &site, bgzf_file &out) { - quick_buf buf; // keep underlying buffer space? +write_current_site(const MSite &site, bamxx::bgzf_file &out) { + quick_buf buf; // keep underlying buffer space? buf << site << '\n'; if (!out.write(buf.c_str(), buf.tellp())) - throw dnmt_error("error writing site: " + site.tostring()); + throw std::runtime_error("error writing site: " + site.tostring()); } -typedef vector::const_iterator chrom_itr_t; +typedef std::vector::const_iterator chrom_itr_t; static chrom_itr_t -get_chrom(const unordered_map &chrom_lookup, - const string &chrom_name) { +get_chrom(const std::unordered_map &chrom_lookup, + const std::string &chrom_name) { const auto chrom_idx = chrom_lookup.find(chrom_name); if (chrom_idx == cend(chrom_lookup)) - throw dnmt_error("chromosome not found: " + chrom_name); + throw std::runtime_error("chromosome not found: " + chrom_name); return chrom_idx->second; } -static int32_t -get_chrom_idx(const unordered_map &name_to_idx, - const string &chrom_name) { +static std::int32_t +get_chrom_idx(const std::unordered_map &name_to_idx, + const std::string &chrom_name) { const auto chrom_idx = name_to_idx.find(chrom_name); if (chrom_idx == cend(name_to_idx)) - throw dnmt_error("chromosome not found: " + chrom_name); + throw std::runtime_error("chromosome not found: " + chrom_name); return chrom_idx->second; } static void process_sites(const bool verbose, const bool add_missing_chroms, - const bool compress_output, const size_t n_threads, - const string &infile, const string &outfile, - const string &chroms_file) { + const bool compress_output, const std::size_t n_threads, + const std::string &infile, const std::string &outfile, + const std::string &chroms_file) { // first get the chromosome names and sequences from the FASTA file - vector chroms, names; + std::vector chroms, names; read_fasta_file_short_names(chroms_file, names, chroms); for (auto &i : chroms) transform(cbegin(i), cend(i), begin(i), [](const char c) { return std::toupper(c); }); if (verbose) - cerr << "[n chroms in reference: " << chroms.size() << "]" << endl; + std::cerr << "[n chroms in reference: " << chroms.size() << "]\n"; - unordered_map chrom_lookup; - unordered_map name_to_idx; - vector chrom_sizes(size(chroms), 0); - for (size_t i = 0; i < size(chroms); ++i) { + std::unordered_map chrom_lookup; + std::unordered_map name_to_idx; + std::vector chrom_sizes(size(chroms), 0); + for (std::size_t i = 0; i < size(chroms); ++i) { chrom_lookup[names[i]] = cbegin(chroms) + i; name_to_idx[names[i]] = i; chrom_sizes[i] = size(chroms[i]); @@ -255,11 +243,13 @@ process_sites(const bool verbose, const bool add_missing_chroms, bamxx::bam_tpool tp(n_threads); bamxx::bgzf_file in(infile, "r"); - if (!in) throw dnmt_error("failed to open input file"); + if (!in) + throw std::runtime_error("failed to open input file"); - const string output_mode = compress_output ? "w" : "wu"; - bgzf_file out(outfile, output_mode); - if (!out) throw dnmt_error("error opening output file: " + outfile); + const std::string output_mode = compress_output ? "w" : "wu"; + bamxx::bgzf_file out(outfile, output_mode); + if (!out) + throw std::runtime_error("error opening output file: " + outfile); // set the threads for the input file decompression if (n_threads > 1) { @@ -268,14 +258,14 @@ process_sites(const bool verbose, const bool add_missing_chroms, } MSite site; - string chrom_name; - int32_t prev_chrom_idx = -1; - uint64_t pos = num_lim::max(); + std::string chrom_name; + std::int32_t prev_chrom_idx = -1; + std::uint64_t pos = std::numeric_limits::max(); // ADS: this is probably a poor strategy since we already would know // the index of the chrom sequence in the vector. chrom_itr_t chrom_itr; - string line; + std::string line; while (getline(in, line)) { if (is_counts_header_line(line)) { @@ -285,15 +275,15 @@ process_sites(const bool verbose, const bool add_missing_chroms, site.initialize(line.data(), line.data() + size(line)); if (site.chrom != chrom_name) { - if (pos != num_lim::max()) + if (pos != std::numeric_limits::max()) write_missing_sites(chrom_name, *chrom_itr, pos, size(*chrom_itr), out); - const int32_t chrom_idx = get_chrom_idx(name_to_idx, site.chrom); + const std::int32_t chrom_idx = get_chrom_idx(name_to_idx, site.chrom); if (add_missing_chroms) for (auto i = prev_chrom_idx + 1; i < chrom_idx; ++i) { if (verbose) - cerr << "processing: " << names[i] << " (missing)" << endl; + std::cerr << "processing: " << names[i] << " (missing)\n"; write_missing_sites(names[i], chroms[i], 0u, size(chroms[i]), out); } @@ -302,7 +292,7 @@ process_sites(const bool verbose, const bool add_missing_chroms, pos = 0; prev_chrom_idx = chrom_idx; if (verbose) - cerr << "processing: " << chrom_name << endl; + std::cerr << "processing: " << chrom_name << "\n"; } if (pos < site.pos) write_missing_sites(chrom_name, *chrom_itr, pos, site.pos, out); @@ -312,16 +302,15 @@ process_sites(const bool verbose, const bool add_missing_chroms, write_missing_sites(chrom_name, *chrom_itr, pos, size(*chrom_itr), out); if (add_missing_chroms) { - const int32_t chrom_idx = size(chroms); + const std::int32_t chrom_idx = size(chroms); for (auto i = prev_chrom_idx + 1; i < chrom_idx; ++i) { if (verbose) - cerr << "processing: " << names[i] << " (missing)" << endl; + std::cerr << "processing: " << names[i] << " (missing)\n"; write_missing_sites(names[i], chroms[i], 0u, size(chroms[i]), out); } } } - int main_recovered(int argc, char *argv[]) { try { @@ -329,50 +318,51 @@ main_recovered(int argc, char *argv[]) { bool verbose = false; bool add_missing_chroms = false; bool compress_output = false; - size_t n_threads = 1; + std::size_t n_threads = 1; - string outfile; - string chroms_file; - const string description = + std::string outfile; + std::string chroms_file; + const std::string description = "add sites that are missing as non-covered sites"; /****************** COMMAND LINE OPTIONS ********************/ OptionParser opt_parse(strip_path(argv[0]), description, ""); opt_parse.add_opt("output", 'o', "output file (required)", true, outfile); - opt_parse.add_opt("missing", 'm', "add missing chroms", false, add_missing_chroms); + opt_parse.add_opt("missing", 'm', "add missing chroms", false, + add_missing_chroms); opt_parse.add_opt("threads", 't', "number of threads", false, n_threads); opt_parse.add_opt("chrom", 'c', "reference genome file (FASTA format)", - true , chroms_file); + true, chroms_file); opt_parse.add_opt("zip", 'z', "output gzip format", false, compress_output); opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose); - std::vector leftover_args; + std::vector leftover_args; opt_parse.parse(argc, argv, leftover_args); if (argc == 1 || opt_parse.help_requested()) { - cerr << opt_parse.help_message() << endl - << opt_parse.about_message() << endl; + std::cerr << opt_parse.help_message() << "\n" + << opt_parse.about_message() << "\n"; return EXIT_SUCCESS; } if (opt_parse.about_requested()) { - cerr << opt_parse.about_message() << endl; + std::cerr << opt_parse.about_message() << "\n"; return EXIT_SUCCESS; } if (opt_parse.option_missing()) { - cerr << opt_parse.option_missing_message() << endl; + std::cerr << opt_parse.option_missing_message() << "\n"; return EXIT_SUCCESS; } if (leftover_args.size() != 1) { - cerr << opt_parse.help_message() << endl; + std::cerr << opt_parse.help_message() << "\n"; return EXIT_SUCCESS; } - const string filename(leftover_args.front()); + const std::string filename(leftover_args.front()); /****************** END COMMAND LINE OPTIONS *****************/ process_sites(verbose, add_missing_chroms, compress_output, n_threads, filename, outfile, chroms_file); } - catch (const std::runtime_error &e) { - cerr << e.what() << endl; + catch (const std::exception &e) { + std::cerr << e.what() << "\n"; return EXIT_FAILURE; } return EXIT_SUCCESS; diff --git a/src/utils/uniq.cpp b/src/utils/uniq.cpp index 107b0da9..44f11ffb 100644 --- a/src/utils/uniq.cpp +++ b/src/utils/uniq.cpp @@ -1,86 +1,79 @@ -/* uniq: remove duplicate reads from a file of mapped reads in the - * dnmtools format (as output from format_reads), based on identical - * mapping location and alignment to the reference. +/* uniq: remove duplicate reads from a file of mapped reads in the dnmtools + * format (as output from format_reads), based on identical mapping location + * and alignment to the reference. * * Copyright (C) 2013-2023 University of Southern California and * Andrew D. Smith * * Author: Andrew D. Smith * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. */ -#include // for [u]int[0-9]+_t -#include -#include -#include -#include -#include - -// generated by autotools -#include - #include "GenomicRegion.hpp" #include "OptionParser.hpp" +#include "bam_record_utils.hpp" #include "bsutils.hpp" #include "dnmt_error.hpp" #include "smithlab_os.hpp" #include "smithlab_utils.hpp" -#include "bam_record_utils.hpp" -using std::cerr; -using std::endl; -using std::ifstream; -using std::ofstream; -using std::runtime_error; -using std::string; -using std::to_string; -using std::vector; +// generated by autotools +#include -using bamxx::bam_rec; +#include // for [u]int[0-9]+_t +#include +#include +#include +#include +#include namespace uniq_random { - // ADS: I made this namespace and functions because different - // implementations of rand() on different OS meant that even with - // the same seed, the results could be different. This meant testing - // didn't work. - // ADS: (TODO) refactor this - bool initialized = false; - std::default_random_engine e; - std::uniform_int_distribution di; - void initialize(const size_t the_seed) { - e = std::default_random_engine(the_seed); - initialized = true; - } - int rand() { - // ADS: should have same range as ordinary rand() by properties of - // std::uniform_int_distribution default constructor. - // assert(initialized); - return di(e); - } +// ADS: I made this namespace and functions because different implementations +// of rand() on different OS meant that even with the same seed, the results +// could be different. This meant testing didn't work. + +// ADS: (TODO) refactor this +bool initialized{false}; +std::default_random_engine e; +std::uniform_int_distribution di; + +void +initialize(const std::size_t the_seed) { + e = std::default_random_engine(the_seed); + initialized = true; +} + +int +rand() { + // ADS: should have same range as ordinary rand() by properties of + // std::uniform_int_distribution default constructor. + // assert(initialized); + return di(e); +} } // namespace uniq_random struct rd_stats { // keep track of good bases/reads in and out - size_t bases{}; - size_t reads{}; - void update(const bam_rec &b) { + std::size_t bases{}; + std::size_t reads{}; + void + update(const bamxx::bam_rec &b) { bases += get_l_qseq(b); ++reads; } }; - struct uniq_summary { uniq_summary(const rd_stats &rs_in, const rd_stats &rs_out, - const size_t reads_duped) { + const std::size_t reads_duped) { total_reads = rs_in.reads; total_bases = rs_in.bases; unique_reads = rs_out.reads; @@ -94,60 +87,58 @@ struct uniq_summary { } // total_reads is the number of input reads - size_t total_reads{}; + std::size_t total_reads{}; // total_bases is the total number of input bases - size_t total_bases{}; + std::size_t total_bases{}; // unique_reads is the number of unique reads - size_t unique_reads{}; + std::size_t unique_reads{}; // unique_read_bases is the total number of bases for the unique reads - size_t unique_read_bases{}; + std::size_t unique_read_bases{}; // non_duplicate_fraction is the ratio of the number of unique reads with // no duplicates to that of the input reads double non_duplicate_fraction{}; // duplicate_reads is the number of unique reads with at least one duplicate - size_t duplicate_reads{}; + std::size_t duplicate_reads{}; // reads_removed is the number of duplicate reads that have been removed - size_t reads_removed{}; + std::size_t reads_removed{}; // duplication_rate is the average number of duplicates for the reads with // at least one duplicate (>1 by definition) double duplication_rate{}; - string tostring() { + std::string + to_string() { std::ostringstream oss; - oss << "total_reads: " << total_reads << endl - << "total_bases: " << total_bases << endl - << "unique_reads: " << unique_reads << endl - << "unique_read_bases: " << unique_read_bases << endl - << "non_duplicate_fraction: " << non_duplicate_fraction << endl - << "duplicate_reads: " << duplicate_reads << endl - << "reads_removed: " << reads_removed << endl + oss << "total_reads: " << total_reads << "\n" + << "total_bases: " << total_bases << "\n" + << "unique_reads: " << unique_reads << "\n" + << "unique_read_bases: " << unique_read_bases << "\n" + << "non_duplicate_fraction: " << non_duplicate_fraction << "\n" + << "duplicate_reads: " << duplicate_reads << "\n" + << "reads_removed: " << reads_removed << "\n" << "duplication_rate: " << duplication_rate; - return oss.str(); } }; - - static void write_stats_output(const rd_stats &rs_in, const rd_stats &rs_out, - const size_t reads_duped, const string &statfile) { - if (!statfile.empty()) { - uniq_summary summary(rs_in, rs_out, reads_duped); - ofstream out_stat(statfile); - if (!out_stat) throw runtime_error("bad stats output file"); - out_stat << summary.tostring() << endl; - } + const std::size_t reads_duped, const std::string &statfile) { + uniq_summary summary(rs_in, rs_out, reads_duped); + std::ofstream out_stat(statfile); + if (!out_stat) + throw std::runtime_error("bad stats output file"); + out_stat << summary.to_string() << "\n"; } static void -write_hist_output(const vector &hist, const string &histfile) { - if (!histfile.empty()) { - ofstream out_hist(histfile); - if (!out_hist) throw runtime_error("bad hist output file"); - for (size_t i = 0; i < hist.size(); ++i) - if (hist[i] > 0) out_hist << i << '\t' << hist[i] << '\n'; - } +write_hist_output(const std::vector &hist, + const std::string &histfile) { + std::ofstream out_hist(histfile); + if (!out_hist) + throw std::runtime_error("bad hist output file"); + for (std::size_t i = 0; i < std::size(hist); ++i) + if (hist[i] > 0) + out_hist << i << '\t' << hist[i] << '\n'; } /* The "inner" buffer corresponds to all reads sharing chrom, start, @@ -155,22 +146,25 @@ write_hist_output(const vector &hist, const string &histfile) { that shares the same end and strand. */ static void process_inner_buffer(const bool add_dup_count, - const vector::iterator it, - const vector::iterator jt, bamxx::bam_header &hdr, - bamxx::bam_out &out, rd_stats &rs_out, size_t &reads_duped, - vector &hist) { + const std::vector::iterator it, + const std::vector::iterator jt, + bamxx::bam_header &hdr, bamxx::bam_out &out, + rd_stats &rs_out, std::size_t &reads_duped, + std::vector &hist) { constexpr char du_tag[2] = {'D', 'U'}; - const size_t n_reads = std::distance(it, jt); - const size_t selected = uniq_random::rand() % n_reads; + const std::size_t n_reads = std::distance(it, jt); + const std::size_t selected = uniq_random::rand() % n_reads; if (add_dup_count) { const int ret = bam_aux_update_int(*(it + selected), du_tag, n_reads); - if (ret < 0) throw dnmt_error("error adding duplicate count aux field"); + if (ret < 0) + throw dnmt_error("error adding duplicate count aux field"); } if (!out.write(hdr, *(it + selected))) - throw runtime_error("failed writing bam record"); - if (hist.size() <= n_reads) hist.resize(n_reads + 1); + throw std::runtime_error("failed writing bam record"); + if (hist.size() <= n_reads) + hist.resize(n_reads + 1); hist[n_reads]++; rs_out.update(*(it + selected)); reads_duped += (n_reads > 1); @@ -179,13 +173,14 @@ process_inner_buffer(const bool add_dup_count, /* The buffer corresponds to reads sharing the same mapping chromosome and start position. These are gathered and then processed together. */ static void -process_buffer(const bool add_dup_count, rd_stats &rs_out, size_t &reads_duped, - vector &hist, vector &buffer, bamxx::bam_header &hdr, +process_buffer(const bool add_dup_count, rd_stats &rs_out, + std::size_t &reads_duped, std::vector &hist, + std::vector &buffer, bamxx::bam_header &hdr, bamxx::bam_out &out) { - sort(begin(buffer), end(buffer), precedes_by_end_and_strand); - auto it(begin(buffer)); + std::sort(std::begin(buffer), std::end(buffer), precedes_by_end_and_strand); + auto it = std::begin(buffer); auto jt = it + 1; - for (; jt != end(buffer); ++jt) + for (; jt != std::end(buffer); ++jt) if (!equivalent_end_and_strand(*it, *jt)) { process_inner_buffer(add_dup_count, it, jt, hdr, out, rs_out, reads_duped, hist); @@ -197,28 +192,33 @@ process_buffer(const bool add_dup_count, rd_stats &rs_out, size_t &reads_duped, } static void -uniq(const bool add_dup_count, const uint32_t max_buffer_size, - const size_t n_threads, const string &cmd, const string &infile, - const string &statfile, const string &histfile, const bool bam_format, - const string &outfile) { +uniq(const bool add_dup_count, const std::uint32_t max_buffer_size, + const std::size_t n_threads, const std::string &cmd, + const std::string &infile, const std::string &statfile, + const std::string &histfile, const bool bam_format, + const std::string &outfile) { // values to tabulate stats; no real cost rd_stats rs_in, rs_out; - size_t reads_duped = 0; - vector hist; + std::size_t reads_duped = 0; + std::vector hist; bamxx::bam_tpool tpool(n_threads); // outer scope: must be destroyed last bamxx::bam_in hts(infile); - if (!hts) throw dnmt_error("failed to open input file: " + infile); + if (!hts) + throw dnmt_error("failed to open input file: " + infile); bamxx::bam_header hdr(hts); - if (!hdr) throw dnmt_error("failed to read header"); + if (!hdr) + throw dnmt_error("failed to read header"); bamxx::bam_out out(outfile, bam_format); { bamxx::bam_header hdr_out(hdr); - if (!hdr_out) throw dnmt_error("failed create header"); + if (!hdr_out) + throw dnmt_error("failed create header"); hdr_out.add_pg_line(cmd, "DNMTOOLS", VERSION); - if (!out.write(hdr_out)) throw dnmt_error("failed to write header"); + if (!out.write(hdr_out)) + throw dnmt_error("failed to write header"); } if (n_threads > 1) { @@ -226,22 +226,22 @@ uniq(const bool add_dup_count, const uint32_t max_buffer_size, tpool.set_io(out); } - bam_rec aln; + bamxx::bam_rec aln; bool found_mapped_read{false}; // valid SAM/BAM can have 0 reads - while (!found_mapped_read && hts.read(hdr, aln)) { + while (!found_mapped_read && hts.read(hdr, aln)) // ADS: skip reads that have no tid -- they are not mapped if (get_tid(aln) != -1) found_mapped_read = true; - } if (found_mapped_read) { rs_in.update(aln); // update stats for input we just got - vector buffer(1, aln); // select output from this buffer + // select output from this buffer + std::vector buffer(1, aln); // to check that reads are sorted properly - vector chroms_seen(get_n_targets(hdr), false); - int32_t cur_chrom = get_tid(aln); + std::vector chroms_seen(get_n_targets(hdr), false); + std::int32_t cur_chrom = get_tid(aln); while (hts.read(hdr, aln)) { // ADS: skip reads that have no tid -- they are not mapped @@ -251,12 +251,13 @@ uniq(const bool add_dup_count, const uint32_t max_buffer_size, // below works because buffer reset at every new chrom if (precedes_by_start(aln, buffer[0])) - throw runtime_error("not sorted: " + get_qname(buffer[0]) + " " + - get_qname(aln)); + throw std::runtime_error("not sorted: " + get_qname(buffer[0]) + " " + + get_qname(aln)); - const int32_t chrom = get_tid(aln); + const std::int32_t chrom = get_tid(aln); if (chrom != cur_chrom) { - if (chroms_seen[chrom]) throw runtime_error("input not sorted"); + if (chroms_seen[chrom]) + throw std::runtime_error("input not sorted"); chroms_seen[chrom] = true; cur_chrom = chrom; } @@ -271,15 +272,18 @@ uniq(const bool add_dup_count, const uint32_t max_buffer_size, } process_buffer(add_dup_count, rs_out, reads_duped, hist, buffer, hdr, out); } + // write any additional output requested - write_stats_output(rs_in, rs_out, reads_duped, statfile); - write_hist_output(hist, histfile); + if (!statfile.empty()) + write_stats_output(rs_in, rs_out, reads_duped, statfile); + if (!histfile.empty()) + write_hist_output(hist, histfile); } int main_uniq(int argc, char *argv[]) { try { - uint32_t max_buffer_size = std::numeric_limits::max(); + std::uint32_t max_buffer_size = std::numeric_limits::max(); bool VERBOSE = false; bool bam_format = false; @@ -288,19 +292,20 @@ main_uniq(int argc, char *argv[]) { // ADS: Not recommended to change this seed. It shouldn't matter // at all, and we want results to behave as deterministic. - size_t the_seed = 408; - string outfile; - string statfile; - string histfile; - size_t n_threads = 1; + std::size_t the_seed = 408; + std::string outfile; + std::string statfile; + std::string histfile; + std::size_t n_threads = 1; /****************** COMMAND LINE OPTIONS ********************/ - OptionParser opt_parse(strip_path(argv[0]), + OptionParser opt_parse("dnmtools uniq", "program to remove duplicate reads from " "sorted mapped reads", " [out-file]", 2); opt_parse.add_opt("threads", 't', "number of threads", false, n_threads); - opt_parse.add_opt("summary", 'S', "statistics output file", false, statfile); + opt_parse.add_opt("summary", 'S', "statistics output file", false, + statfile); opt_parse.add_opt("add-count", 'a', "add duplicate counts to reads", false, add_dup_count); opt_parse.add_opt("hist", '\0', @@ -311,33 +316,33 @@ main_uniq(int argc, char *argv[]) { opt_parse.add_opt("stdout", '\0', "write to standard output", false, use_stdout); opt_parse.add_opt("seed", 's', "random seed", false, the_seed); - opt_parse.add_opt("max", 'm', "max duplicates to consider", - false, max_buffer_size); + opt_parse.add_opt("max", 'm', "max duplicates to consider", false, + max_buffer_size); opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE); opt_parse.set_show_defaults(); - vector leftover_args; + std::vector leftover_args; opt_parse.parse(argc, argv, leftover_args); if (opt_parse.about_requested() || opt_parse.help_requested() || leftover_args.empty()) { - cerr << opt_parse.help_message() << endl - << opt_parse.about_message() << endl; + std::cerr << opt_parse.help_message() << std::endl + << opt_parse.about_message() << "\n"; return EXIT_SUCCESS; } if (opt_parse.option_missing()) { - cerr << opt_parse.option_missing_message() << endl; + std::cerr << opt_parse.option_missing_message() << "\n"; return EXIT_SUCCESS; } if ((leftover_args.size() == 1 && !use_stdout) || (leftover_args.size() == 2 && use_stdout)) { - cerr << opt_parse.help_message() << endl - << opt_parse.about_message() << endl; + std::cerr << opt_parse.help_message() << std::endl + << opt_parse.about_message() << "\n"; return EXIT_SUCCESS; } - const string infile(leftover_args.front()); + const std::string infile(leftover_args.front()); if (leftover_args.size() == 2 && !use_stdout) outfile = leftover_args.back(); else - outfile = string("-"); // so htslib can write to stdout + outfile = std::string("-"); // so htslib can write to stdout /****************** END COMMAND LINE OPTIONS *****************/ // ADS: Random here is because we choose randomly when keeping one @@ -345,26 +350,26 @@ main_uniq(int argc, char *argv[]) { uniq_random::initialize(the_seed); std::ostringstream cmd; - copy(argv, argv + argc, std::ostream_iterator(cmd, " ")); + std::copy(argv, argv + argc, std::ostream_iterator(cmd, " ")); if (VERBOSE) - cerr << "[output file: " << outfile << "]" << endl - << "[output format: " << (bam_format ? "B" : "S") << "AM]" << endl - << "[stats file: " << (statfile.empty() ? "none" : statfile) << "]" - << endl - << "[hist file: " << (histfile.empty() ? "none" : histfile) << "]" - << endl - << "[add duplicate count: " << (add_dup_count ? "yes" : "no") << "]" - << endl - << "[threads requested: " << n_threads << "]" << endl - << "[command line: \"" << cmd.str() << "\"]" << endl - << "[random number seed: " << the_seed << "]" << endl; + std::cerr << "[output file: " << outfile << "]\n" + << "[output format: " << (bam_format ? "B" : "S") << "AM]\n" + << "[stats file: " << (statfile.empty() ? "none" : statfile) + << "]\n" + << "[hist file: " << (histfile.empty() ? "none" : histfile) + << "]\n" + << "[add duplicate count: " << (add_dup_count ? "yes" : "no") + << "]\n" + << "[threads requested: " << n_threads << "]\n" + << "[command line: \"" << cmd.str() << "\"]\n" + << "[random number seed: " << the_seed << "]\n"; uniq(add_dup_count, max_buffer_size, n_threads, cmd.str(), infile, statfile, histfile, bam_format, outfile); } - catch (const runtime_error &e) { - cerr << e.what() << endl; + catch (const std::exception &e) { + std::cerr << e.what() << "\n"; return EXIT_FAILURE; } return EXIT_SUCCESS; diff --git a/src/utils/unxcounts.cpp b/src/utils/unxcounts.cpp index d3360d87..6a7bd519 100644 --- a/src/utils/unxcounts.cpp +++ b/src/utils/unxcounts.cpp @@ -16,8 +16,17 @@ * General Public License for more details. */ +#include "MSite.hpp" +#include "bsutils.hpp" +#include "counts_header.hpp" + #include +// from smithlab_cpp +#include "OptionParser.hpp" +#include "smithlab_os.hpp" +#include "smithlab_utils.hpp" + #include #include #include @@ -26,91 +35,67 @@ #include #include -// from smithlab_cpp -#include "MSite.hpp" -#include "OptionParser.hpp" -#include "bsutils.hpp" -#include "counts_header.hpp" -#include "dnmt_error.hpp" -#include "smithlab_os.hpp" -#include "smithlab_utils.hpp" - -using std::cbegin; -using std::cend; -using std::cerr; -using std::copy; -using std::copy_n; -using std::cout; -using std::endl; -using std::from_chars; -using std::numeric_limits; -using std::pair; -using std::runtime_error; -using std::string; -using std::to_chars; -using std::to_string; -using std::unordered_map; -using std::unordered_set; -using std::vector; - -using bamxx::bgzf_file; - -template using num_lim = std::numeric_limits; - static void -read_fasta_file_short_names_uppercase(const string &chroms_file, - vector &names, - vector &chroms) { +read_fasta_file_short_names_uppercase(const std::string &chroms_file, + std::vector &names, + std::vector &chroms) { chroms.clear(); names.clear(); read_fasta_file_short_names(chroms_file, names, chroms); for (auto &i : chroms) - transform(cbegin(i), cend(i), begin(i), + transform(std::cbegin(i), std::cend(i), begin(i), [](const char c) { return std::toupper(c); }); } - static void -verify_chrom_orders(const bool verbose, const uint32_t n_threads, - const string &filename, - const unordered_map &chroms_order) { +verify_chrom_orders( + const bool verbose, const std::uint32_t n_threads, + const std::string &filename, + const std::unordered_map &chroms_order) { bamxx::bam_tpool tp(n_threads); - bgzf_file in(filename, "r"); - if (!in) throw runtime_error("bad file: " + filename); + bamxx::bgzf_file in(filename, "r"); + if (!in) + throw std::runtime_error("bad file: " + filename); // set the threads for the input file decompression - if (n_threads > 1 && in.is_bgzf()) tp.set_io(in); + if (n_threads > 1 && in.is_bgzf()) + tp.set_io(in); - unordered_set chroms_seen; - int32_t prev_id = -1; + std::unordered_set chroms_seen; + std::int32_t prev_id = -1; - kstring_t line{0, 0, nullptr}; + kstring_t line = KS_INITIALIZE; const int ret = ks_resize(&line, 1024); - if (ret) throw runtime_error("failed to acquire buffer"); + if (ret) + throw std::runtime_error("failed to acquire buffer"); while (bamxx::getline(in, line)) { - if (std::isdigit(line.s[0])) continue; - if (is_counts_header_line(line.s)) continue; + if (std::isdigit(line.s[0])) + continue; + if (is_counts_header_line(line.s)) + continue; - string chrom{line.s}; - if (verbose) cerr << "verifying: " << chrom << endl; + std::string chrom{line.s}; + if (verbose) + std::cerr << "verifying: " << chrom << "\n"; const auto idx_itr = chroms_order.find(chrom); - if (idx_itr == cend(chroms_order)) - throw runtime_error("chrom not found genome file: " + chrom); + if (idx_itr == std::cend(chroms_order)) + throw std::runtime_error("chrom not found genome file: " + chrom); const auto idx = idx_itr->second; if (chroms_seen.find(idx) != end(chroms_seen)) - throw runtime_error("chroms out of order in: " + filename); + throw std::runtime_error("chroms out of order in: " + filename); chroms_seen.insert(idx); if (idx < prev_id) - throw runtime_error("inconsistent chromosome order at: " + chrom); + throw std::runtime_error("inconsistent chromosome order at: " + chrom); prev_id = idx; } - if (verbose) cerr << "chrom orders are consistent" << endl; + if (verbose) + std::cerr << "chrom orders are consistent" << "\n"; } static const char *tag_values[] = { @@ -126,7 +111,7 @@ static const int tag_sizes[] = {3, 3, 3, 3, 1}; // ADS: the values below allow for things like CHH where the is a N in // the triplet; I'm allowing that for consistency with the weird logic // from earlier versions. -const uint32_t context_codes[] = { +const std::uint32_t context_codes[] = { /*CAA CHH*/ 1, /*CAC CHH*/ 1, /*CAG CXG*/ 2, @@ -154,23 +139,23 @@ const uint32_t context_codes[] = { /*CNN ---*/ 1 // 4 }; -static inline uint32_t -get_tag_from_genome_c(const string &s, const size_t pos) { +static inline std::uint32_t +get_tag_from_genome_c(const std::string &s, const size_t pos) { const auto val = base2int(s[pos + 1]) * 5 + base2int(s[pos + 2]); return context_codes[val]; } -static inline uint32_t -get_tag_from_genome_g(const string &s, const size_t pos) { +static inline std::uint32_t +get_tag_from_genome_g(const std::string &s, const size_t pos) { const auto val = base2int(complement(s[pos - 1])) * 5 + base2int(complement(s[pos - 2])); return context_codes[val]; } static bool -write_missing(const uint32_t name_size, const string &chrom, - const uint64_t start_pos, const uint64_t end_pos, - vector &buf, bgzf_file &out) { +write_missing(const std::uint32_t name_size, const std::string &chrom, + const std::uint64_t start_pos, const std::uint64_t end_pos, + std::vector &buf, bamxx::bgzf_file &out) { static constexpr auto zeros = "\t0\t0\n"; static constexpr auto pos_strand = "\t+\t"; static constexpr auto neg_strand = "\t-\t"; @@ -181,27 +166,28 @@ write_missing(const uint32_t name_size, const string &chrom, const char base = chrom[pos]; if (is_cytosine(base) || is_guanine(base)) { const bool is_c = is_cytosine(base); - const uint32_t the_tag = is_c ? get_tag_from_genome_c(chrom, pos) - : get_tag_from_genome_g(chrom, pos); + const std::uint32_t the_tag = is_c ? get_tag_from_genome_c(chrom, pos) + : get_tag_from_genome_g(chrom, pos); #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wstringop-overflow=0" - auto [ptr, ec] = to_chars(cursor, buf_end, pos); - ptr = copy_n(is_c ? pos_strand : neg_strand, 3, ptr); - ptr = copy_n(tag_values[the_tag], tag_sizes[the_tag], ptr); - ptr = copy_n(zeros, 5, ptr); + auto [ptr, ec] = std::to_chars(cursor, buf_end, pos); + ptr = std::copy_n(is_c ? pos_strand : neg_strand, 3, ptr); + ptr = std::copy_n(tag_values[the_tag], tag_sizes[the_tag], ptr); + ptr = std::copy_n(zeros, 5, ptr); const auto sz = std::distance(buf.data(), ptr); #pragma GCC diagnostic push - if (bgzf_write(out.f, buf.data(), sz) != sz) return false; + if (bgzf_write(out.f, buf.data(), sz) != sz) + return false; } } return true; } static bool -write_missing_cpg(const uint32_t &name_size, const string &chrom, - const uint64_t start_pos, const uint64_t end_pos, - vector &buf, bgzf_file &out) { +write_missing_cpg(const std::uint32_t &name_size, const std::string &chrom, + const std::uint64_t start_pos, const std::uint64_t end_pos, + std::vector &buf, bamxx::bgzf_file &out) { static constexpr auto zeros = "\t0\t0\n"; static constexpr auto pos_strand = "\t+\t"; const auto buf_end = buf.data() + size(buf); @@ -211,25 +197,27 @@ write_missing_cpg(const uint32_t &name_size, const string &chrom, // When this function is called, the "end_pos" is either the chrom // size or the position of a base known to be a C. So we never // have to allow pos+1 to equal end_pos. - if (is_cytosine(chrom[pos]) && is_guanine(chrom[pos+1])) { + if (is_cytosine(chrom[pos]) && is_guanine(chrom[pos + 1])) { #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wstringop-overflow=0" - auto [ptr, ec] = to_chars(cursor, buf_end, pos); - ptr = copy_n(pos_strand, 3, ptr); - ptr = copy_n("CpG", 3, ptr); - ptr = copy_n(zeros, 5, ptr); + auto [ptr, ec] = std::to_chars(cursor, buf_end, pos); + ptr = std::copy_n(pos_strand, 3, ptr); + ptr = std::copy_n("CpG", 3, ptr); + ptr = std::copy_n(zeros, 5, ptr); const auto sz = std::distance(buf.data(), ptr); #pragma GCC diagnostic push - if (bgzf_write(out.f, buf.data(), sz) != sz) return false; + if (bgzf_write(out.f, buf.data(), sz) != sz) + return false; } } return true; } static bool -write_site(const uint32_t name_size, const string &chrom, const uint32_t pos, - const uint32_t n_meth, const uint32_t n_unmeth, vector &buf, - bgzf_file &out) { +write_site(const std::uint32_t name_size, const std::string &chrom, + const std::uint32_t pos, const std::uint32_t n_meth, + const std::uint32_t n_unmeth, std::vector &buf, + bamxx::bgzf_file &out) { static constexpr auto pos_strand = "\t+\t"; static constexpr auto neg_strand = "\t-\t"; static constexpr auto fmt = std::chars_format::general; @@ -238,8 +226,8 @@ write_site(const uint32_t name_size, const string &chrom, const uint32_t pos, const char base = chrom[pos]; assert(is_cytosine(base) || is_guanine(base)); const bool is_c = is_cytosine(base); - const uint32_t the_tag = is_c ? get_tag_from_genome_c(chrom, pos) - : get_tag_from_genome_g(chrom, pos); + const std::uint32_t the_tag = is_c ? get_tag_from_genome_c(chrom, pos) + : get_tag_from_genome_g(chrom, pos); const auto n_reads = n_meth + n_unmeth; const auto meth = static_cast(n_meth) / std::max(n_reads, 1u); @@ -248,20 +236,20 @@ write_site(const uint32_t name_size, const string &chrom, const uint32_t pos, // chrom name is already in the buffer so move past it auto cursor = buf.data() + name_size + 1; { - auto [ptr, ec] = to_chars(cursor, buf_end, pos); + auto [ptr, ec] = std::to_chars(cursor, buf_end, pos); cursor = ptr; } - cursor = copy_n(is_c ? pos_strand : neg_strand, 3, cursor); - cursor = copy_n(tag_values[the_tag], tag_sizes[the_tag], cursor); + cursor = std::copy_n(is_c ? pos_strand : neg_strand, 3, cursor); + cursor = std::copy_n(tag_values[the_tag], tag_sizes[the_tag], cursor); *cursor++ = '\t'; { - // use default precision, 6, same as cout default - auto [ptr, ec] = to_chars(cursor, buf_end, meth, fmt, 6); + // use default precision, 6, same as std::cout default + auto [ptr, ec] = std::to_chars(cursor, buf_end, meth, fmt, 6); cursor = ptr; } *cursor++ = '\t'; { - auto [ptr, ec] = to_chars(cursor, buf_end, n_reads); + auto [ptr, ec] = std::to_chars(cursor, buf_end, n_reads); cursor = ptr; } *cursor++ = '\n'; @@ -271,81 +259,85 @@ write_site(const uint32_t name_size, const string &chrom, const uint32_t pos, return bgzf_write(out.f, buf.data(), sz) == sz; } -typedef vector::const_iterator chrom_itr_t; +typedef std::vector::const_iterator chrom_itr_t; static chrom_itr_t -get_chrom(const unordered_map &chrom_lookup, - const string &chrom_name) { +get_chrom(const std::unordered_map &chrom_lookup, + const std::string &chrom_name) { const auto chr_id = chrom_lookup.find(chrom_name); - if (chr_id == cend(chrom_lookup)) - throw dnmt_error("chromosome not found: " + chrom_name); + if (chr_id == std::cend(chrom_lookup)) + throw std::runtime_error("chromosome not found: " + chrom_name); return chr_id->second; } -static int32_t -get_chrom_id(const unordered_map &name_to_id, - const string &chrom_name) { +static std::int32_t +get_chrom_id(const std::unordered_map &name_to_id, + const std::string &chrom_name) { const auto chr_id = name_to_id.find(chrom_name); - if (chr_id == cend(name_to_id)) - throw dnmt_error("chromosome not found: " + chrom_name); + if (chr_id == std::cend(name_to_id)) + throw std::runtime_error("chromosome not found: " + chrom_name); return chr_id->second; } static bool -verify_chrom(const string &header_line, - const unordered_map &name_to_id, - const vector &chrom_sizes) { - if (is_counts_header_version_line(header_line)) return true; +verify_chrom(const std::string &header_line, + const std::unordered_map &name_to_id, + const std::vector &chrom_sizes) { + if (is_counts_header_version_line(header_line)) + return true; std::istringstream iss(header_line.substr(1)); - string name; - uint64_t chrom_size = 0; - if (!(iss >> name >> chrom_size)) return false; + std::string name; + std::uint64_t chrom_size = 0; + if (!(iss >> name >> chrom_size)) + return false; const auto idx = name_to_id.find(name); - if (idx == cend(name_to_id)) return false; + if (idx == std::cend(name_to_id)) + return false; return chrom_size == chrom_sizes[idx->second]; } static void -get_lookups(const vector &names, const vector &chroms, - unordered_map &chrom_lookup, - unordered_map &name_to_id, - vector &chrom_sizes) { +get_lookups(const std::vector &names, + const std::vector &chroms, + std::unordered_map &chrom_lookup, + std::unordered_map &name_to_id, + std::vector &chrom_sizes) { chrom_lookup.clear(); name_to_id.clear(); - chrom_sizes = vector(size(chroms), 0); + chrom_sizes = std::vector(size(chroms), 0); for (size_t i = 0; i < size(chroms); ++i) { - chrom_lookup[names[i]] = cbegin(chroms) + i; + chrom_lookup[names[i]] = std::cbegin(chroms) + i; name_to_id[names[i]] = i; chrom_sizes[i] = size(chroms[i]); } } static void -process_header_line(const unordered_map &name_to_id, - const vector &chrom_sizes, const kstring_t &line, - bgzf_file &out) { - string hdr_line{line.s}; +process_header_line( + const std::unordered_map &name_to_id, + const std::vector &chrom_sizes, const kstring_t &line, + bamxx::bgzf_file &out) { + std::string hdr_line{line.s}; if (size(hdr_line) > 1 && !verify_chrom(hdr_line, name_to_id, chrom_sizes)) - throw runtime_error{"failed to verify header for: " + hdr_line}; + throw std::runtime_error{"failed to verify header for: " + hdr_line}; if (!write_counts_header_line(hdr_line, out)) - throw runtime_error{"failed to write header line: " + hdr_line}; + throw std::runtime_error{"failed to write header line: " + hdr_line}; } - // write all sites for chroms in the given range static void -write_all_sites(const bool verbose, - const uint32_t prev_chr_id, - const uint32_t chr_id, - const vector &names, - const vector &chroms, - vector &buf, bgzf_file &out) { +write_all_sites(const bool verbose, const std::uint32_t prev_chr_id, + const std::uint32_t chr_id, + const std::vector &names, + const std::vector &chroms, std::vector &buf, + bamxx::bgzf_file &out) { for (auto i = prev_chr_id + 1; i < chr_id; ++i) { if (verbose) - cerr << "processing: " << names[i] << " (missing)" << endl; - auto res = copy(cbegin(names[i]), cend(names[i]), buf.data()); + std::cerr << "processing: " << names[i] << " (missing)" << "\n"; + auto res = + std::copy(std::cbegin(names[i]), std::cend(names[i]), buf.data()); *res = '\t'; write_missing(size(names[i]), chroms[i], 0u, size(chroms[i]), buf, out); } @@ -354,17 +346,17 @@ write_all_sites(const bool verbose, static void process_sites(const bool verbose, const bool add_missing_chroms, const bool require_covered, const bool compress_output, - const size_t n_threads, const string &infile, - const string &outfile, const string &chroms_file) { + const size_t n_threads, const std::string &infile, + const std::string &outfile, const std::string &chroms_file) { // first get the chromosome names and sequences from the FASTA file - vector chroms, names; + std::vector chroms, names; read_fasta_file_short_names_uppercase(chroms_file, names, chroms); if (verbose) - cerr << "[n chroms in reference: " << chroms.size() << "]" << endl; + std::cerr << "[n chroms in reference: " << chroms.size() << "]" << "\n"; - unordered_map chrom_lookup; - unordered_map name_to_id; - vector chrom_sizes(size(chroms), 0); + std::unordered_map chrom_lookup; + std::unordered_map name_to_id; + std::vector chrom_sizes(size(chroms), 0); get_lookups(names, chroms, chrom_lookup, name_to_id, chrom_sizes); if (add_missing_chroms) @@ -373,29 +365,33 @@ process_sites(const bool verbose, const bool add_missing_chroms, bamxx::bam_tpool tp(n_threads); bamxx::bgzf_file in(infile, "r"); - if (!in) throw dnmt_error("failed to open input file"); + if (!in) + throw std::runtime_error("failed to open input file"); - const string output_mode = compress_output ? "w" : "wu"; - bgzf_file out(outfile, output_mode); - if (!out) throw dnmt_error("error opening output file: " + outfile); + const std::string output_mode = compress_output ? "w" : "wu"; + bamxx::bgzf_file out(outfile, output_mode); + if (!out) + throw std::runtime_error("error opening output file: " + outfile); // set the threads for the input file decompression if (n_threads > 1) { - if (in.is_bgzf()) tp.set_io(in); + if (in.is_bgzf()) + tp.set_io(in); tp.set_io(out); } - static constexpr uint32_t output_buffer_size = 1024; - vector buf(output_buffer_size, '\0'); + static constexpr std::uint32_t output_buffer_size = 1024; + std::vector buf(output_buffer_size, '\0'); - kstring_t line{0, 0, nullptr}; + kstring_t line = KS_INITIALIZE; const int ret = ks_resize(&line, output_buffer_size); - if (ret) throw runtime_error("failed to acquire buffer"); + if (ret) + throw std::runtime_error("failed to acquire buffer"); - string chrom_name; - uint32_t nm_sz{}; - int32_t prev_chr_id = -1; - uint64_t pos = num_lim::max(); + std::string chrom_name; + std::uint32_t nm_sz{}; + std::int32_t prev_chr_id = -1; + std::uint64_t pos = std::numeric_limits::max(); // ADS: this is probably a poor strategy since we already would know // the index of the chrom sequence in the vector. @@ -409,12 +405,12 @@ process_sites(const bool verbose, const bool add_missing_chroms, if (!std::isdigit(line.s[0])) { // check if we have a chrom line - if (!require_covered && pos != num_lim::max()) + if (!require_covered && pos != std::numeric_limits::max()) write_missing(nm_sz, *ch_itr, pos + 1, size(*ch_itr), buf, out); - chrom_name = string{line.s}; + chrom_name = std::string{line.s}; nm_sz = size(chrom_name); - const int32_t chr_id = get_chrom_id(name_to_id, chrom_name); + const std::int32_t chr_id = get_chrom_id(name_to_id, chrom_name); if (add_missing_chroms) write_all_sites(verbose, prev_chr_id, chr_id, names, chroms, buf, out); @@ -422,17 +418,19 @@ process_sites(const bool verbose, const bool add_missing_chroms, ch_itr = get_chrom(chrom_lookup, chrom_name); pos = 0; prev_chr_id = chr_id; - if (verbose) cerr << "processing: " << chrom_name << endl; + if (verbose) + std::cerr << "processing: " << chrom_name << "\n"; - auto res = copy(cbegin(chrom_name), cend(chrom_name), buf.data()); + auto res = + std::copy(std::cbegin(chrom_name), std::cend(chrom_name), buf.data()); *res = '\t'; } else { - uint32_t pos_step = 0, n_meth = 0, n_unmeth = 0; + std::uint32_t pos_step = 0, n_meth = 0, n_unmeth = 0; const auto end_line = line.s + line.l; - auto res = from_chars(line.s, end_line, pos_step); - res = from_chars(res.ptr + 1, end_line, n_meth); - res = from_chars(res.ptr + 1, end_line, n_unmeth); + auto res = std::from_chars(line.s, end_line, pos_step); + res = std::from_chars(res.ptr + 1, end_line, n_meth); + res = std::from_chars(res.ptr + 1, end_line, n_unmeth); const auto curr_pos = pos + pos_step; if (!require_covered && pos + 1 < curr_pos) @@ -451,16 +449,16 @@ process_sites(const bool verbose, const bool add_missing_chroms, // write all cpg sites for chroms in the given range static void -write_all_cpgs(const bool verbose, - const uint32_t prev_chr_id, - const uint32_t chr_id, - const vector &names, - const vector &chroms, - vector &buf, bgzf_file &out) { +write_all_cpgs(const bool verbose, const std::uint32_t prev_chr_id, + const std::uint32_t chr_id, + const std::vector &names, + const std::vector &chroms, std::vector &buf, + bamxx::bgzf_file &out) { for (auto i = prev_chr_id + 1; i < chr_id; ++i) { if (verbose) - cerr << "processing: " << names[i] << " (missing)" << endl; - auto res = copy(cbegin(names[i]), cend(names[i]), buf.data()); + std::cerr << "processing: " << names[i] << " (missing)" << "\n"; + auto res = + std::copy(std::cbegin(names[i]), std::cend(names[i]), buf.data()); *res = '\t'; write_missing_cpg(size(names[i]), chroms[i], 0u, size(chroms[i]), buf, out); } @@ -469,17 +467,17 @@ write_all_cpgs(const bool verbose, static void process_cpg_sites(const bool verbose, const bool add_missing_chroms, const bool require_covered, const bool compress_output, - const size_t n_threads, const string &infile, - const string &outfile, const string &chroms_file) { + const size_t n_threads, const std::string &infile, + const std::string &outfile, const std::string &chroms_file) { // first get the chromosome names and sequences from the FASTA file - vector chroms, names; + std::vector chroms, names; read_fasta_file_short_names_uppercase(chroms_file, names, chroms); if (verbose) - cerr << "[n chroms in reference: " << chroms.size() << "]" << endl; + std::cerr << "[n chroms in reference: " << chroms.size() << "]" << "\n"; - unordered_map chrom_lookup; - unordered_map name_to_id; - vector chrom_sizes(size(chroms), 0); + std::unordered_map chrom_lookup; + std::unordered_map name_to_id; + std::vector chrom_sizes(size(chroms), 0); get_lookups(names, chroms, chrom_lookup, name_to_id, chrom_sizes); if (add_missing_chroms) @@ -488,29 +486,33 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms, bamxx::bam_tpool tp(n_threads); bamxx::bgzf_file in(infile, "r"); - if (!in) throw dnmt_error("failed to open input file"); + if (!in) + throw std::runtime_error("failed to open input file"); - const string output_mode = compress_output ? "w" : "wu"; - bgzf_file out(outfile, output_mode); - if (!out) throw dnmt_error("error opening output file: " + outfile); + const std::string output_mode = compress_output ? "w" : "wu"; + bamxx::bgzf_file out(outfile, output_mode); + if (!out) + throw std::runtime_error("error opening output file: " + outfile); // set the threads for the input file decompression if (n_threads > 1) { - if (in.is_bgzf()) tp.set_io(in); + if (in.is_bgzf()) + tp.set_io(in); tp.set_io(out); } - static constexpr uint32_t output_buffer_size = 1024; - vector buf(output_buffer_size, '\0'); + static constexpr std::uint32_t output_buffer_size = 1024; + std::vector buf(output_buffer_size, '\0'); - kstring_t line{0, 0, nullptr}; + kstring_t line = KS_INITIALIZE; const int ret = ks_resize(&line, output_buffer_size); - if (ret) throw runtime_error("failed to acquire buffer"); + if (ret) + throw std::runtime_error("failed to acquire buffer"); - string chrom_name; - uint32_t nm_sz{}; - int32_t prev_chr_id = -1; - uint64_t pos = num_lim::max(); + std::string chrom_name; + std::uint32_t nm_sz{}; + std::int32_t prev_chr_id = -1; + std::uint64_t pos = std::numeric_limits::max(); // ADS: this is probably a poor strategy since we already would know // the index of the chrom sequence in the vector. @@ -524,12 +526,12 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms, if (!std::isdigit(line.s[0])) { // check if we have a chrom line - if (!require_covered && pos != num_lim::max()) + if (!require_covered && pos != std::numeric_limits::max()) write_missing_cpg(nm_sz, *ch_itr, pos + 1, size(*ch_itr), buf, out); - chrom_name = string{line.s}; + chrom_name = std::string{line.s}; nm_sz = size(chrom_name); - const int32_t chr_id = get_chrom_id(name_to_id, chrom_name); + const std::int32_t chr_id = get_chrom_id(name_to_id, chrom_name); if (add_missing_chroms) write_all_cpgs(verbose, prev_chr_id, chr_id, names, chroms, buf, out); @@ -537,17 +539,19 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms, ch_itr = get_chrom(chrom_lookup, chrom_name); pos = 0; prev_chr_id = chr_id; - if (verbose) cerr << "processing: " << chrom_name << endl; + if (verbose) + std::cerr << "processing: " << chrom_name << "\n"; - auto res = copy(cbegin(chrom_name), cend(chrom_name), buf.data()); + auto res = + std::copy(std::cbegin(chrom_name), std::cend(chrom_name), buf.data()); *res = '\t'; } else { - uint32_t pos_step = 0, n_meth = 0, n_unmeth = 0; + std::uint32_t pos_step = 0, n_meth = 0, n_unmeth = 0; const auto end_line = line.s + line.l; - auto res = from_chars(line.s, end_line, pos_step); - res = from_chars(res.ptr + 1, end_line, n_meth); - res = from_chars(res.ptr + 1, end_line, n_unmeth); + auto res = std::from_chars(line.s, end_line, pos_step); + res = std::from_chars(res.ptr + 1, end_line, n_meth); + res = std::from_chars(res.ptr + 1, end_line, n_unmeth); const auto curr_pos = pos + pos_step; if (!require_covered && pos + 1 < curr_pos) @@ -573,9 +577,9 @@ main_unxcounts(int argc, char *argv[]) { bool assume_cpg_only = false; size_t n_threads = 1; - string outfile; - string chroms_file; - const string description = + std::string outfile; + std::string chroms_file; + const std::string description = "convert compressed counts format back to full counts"; /****************** COMMAND LINE OPTIONS ********************/ @@ -592,34 +596,34 @@ main_unxcounts(int argc, char *argv[]) { true, chroms_file); opt_parse.add_opt("zip", 'z', "output gzip format", false, compress_output); opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose); - std::vector leftover_args; + std::vector leftover_args; opt_parse.parse(argc, argv, leftover_args); if (argc == 1 || opt_parse.help_requested()) { - cerr << opt_parse.help_message() << endl - << opt_parse.about_message() << endl; + std::cerr << opt_parse.help_message() << "\n" + << opt_parse.about_message() << "\n"; return EXIT_SUCCESS; } if (opt_parse.about_requested()) { - cerr << opt_parse.about_message() << endl; + std::cerr << opt_parse.about_message() << "\n"; return EXIT_SUCCESS; } if (opt_parse.option_missing()) { - cerr << opt_parse.option_missing_message() << endl; + std::cerr << opt_parse.option_missing_message() << "\n"; return EXIT_SUCCESS; } if (leftover_args.size() != 1) { - cerr << opt_parse.help_message() << endl; + std::cerr << opt_parse.help_message() << "\n"; return EXIT_SUCCESS; } if (require_covered && add_missing_chroms) { - cerr << "options mutually exclusive: reads and missing" << endl; + std::cerr << "options mutually exclusive: reads and missing" << "\n"; return EXIT_FAILURE; } - const string filename(leftover_args.front()); + const std::string filename(leftover_args.front()); /****************** END COMMAND LINE OPTIONS *****************/ if (require_covered && add_missing_chroms) { - cerr << "options mutually exclusive: reads and missing" << endl; + std::cerr << "options mutually exclusive: reads and missing" << "\n"; return EXIT_FAILURE; } @@ -631,8 +635,8 @@ main_unxcounts(int argc, char *argv[]) { process_sites(verbose, add_missing_chroms, require_covered, compress_output, n_threads, filename, outfile, chroms_file); } - catch (const std::runtime_error &e) { - cerr << e.what() << endl; + catch (const std::exception &e) { + std::cerr << e.what() << "\n"; return EXIT_FAILURE; } return EXIT_SUCCESS; From d6e408cf84979389ba39444c5fffcfe05ab011e7 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Wed, 30 Jul 2025 11:13:58 -0700 Subject: [PATCH 2/2] src/utils/kmersites.cpp: adding forgotten header --- src/utils/kmersites.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/kmersites.cpp b/src/utils/kmersites.cpp index 06014369..15318cda 100644 --- a/src/utils/kmersites.cpp +++ b/src/utils/kmersites.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include