From b0babd31a6dadbadb5f72831a4c0a3d75769ae58 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Wed, 11 Jun 2025 15:08:22 -0700 Subject: [PATCH 1/4] src/amrfinder/amrfinder.cpp: converted omp parallel for loops into loops that generate std::thread objects; set n_threads no larger than hardware_concurrency --- src/amrfinder/amrfinder.cpp | 520 +++++++++++++++++++----------------- 1 file changed, 273 insertions(+), 247 deletions(-) diff --git a/src/amrfinder/amrfinder.cpp b/src/amrfinder/amrfinder.cpp index 34d0dac5..436966bf 100644 --- a/src/amrfinder/amrfinder.cpp +++ b/src/amrfinder/amrfinder.cpp @@ -1,7 +1,7 @@ /* amrfinder: program for resolving epialleles in a sliding window * along a chromosome. * - * Copyright (C) 2014-2023 University of Southern California and + * Copyright (C) 2014-2025 University of Southern California and * Andrew D. Smith and Benjamin E. Decato * * Authors: Fang Fang and Benjamin E. Decato and Andrew D. Smith @@ -17,66 +17,49 @@ * General Public License for more details. */ +#include "EpireadStats.hpp" +#include "GenomicRegion.hpp" +#include "OptionParser.hpp" +#include "smithlab_os.hpp" +#include "smithlab_utils.hpp" + #include -#include #include #include #include +#include #include #include +#include #include -#include "EpireadStats.hpp" -#include "GenomicRegion.hpp" -#include "OptionParser.hpp" -#include "smithlab_os.hpp" -#include "smithlab_utils.hpp" - -using std::cerr; -using std::cout; -using std::endl; -using std::max; -using std::min; -using std::move; -using std::ofstream; -using std::ostream_iterator; -using std::pair; -using std::remove_if; -using std::runtime_error; -using std::size; -using std::string; -using std::to_string; -using std::vector; - using bamxx::bgzf_file; -#include - -#include - struct amr_summary { - amr_summary(const vector &amrs) { - amr_count = size(amrs); - amr_total_size = accumulate(cbegin(amrs), cend(amrs), 0ul, - [](const size_t t, const GenomicRegion &p) { - return t + p.get_width(); - }); + amr_summary(const std::vector &amrs) { + amr_count = std::size(amrs); + amr_total_size = + accumulate(std::cbegin(amrs), std::cend(amrs), 0ul, + [](const std::size_t t, const GenomicRegion &p) { + return t + p.get_width(); + }); amr_mean_size = static_cast(amr_total_size) / - std::max(amr_count, static_cast(1)); + std::max(amr_count, static_cast(1)); } // amr_count is the number of identified AMRs, which are the merged // AMRs that are found to be significant when tested as a single // interval - size_t amr_count{}; + std::size_t amr_count{}; // total_amr_size is the sum of the sizes of the identified AMRs - size_t amr_total_size{}; + std::size_t amr_total_size{}; // mean_amr_size is the mean size of the identified AMRs double amr_mean_size{}; - auto tostring() -> string { - static constexpr uint32_t current_precision = 2; + auto + tostring() -> std::string { + static constexpr std::uint32_t current_precision = 2; std::ostringstream oss; oss << std::fixed << std::setprecision(current_precision); oss << "amr_count: " << amr_count << '\n' @@ -89,24 +72,27 @@ struct amr_summary { static inline bamxx::bgzf_file & read_epiread(bamxx::bgzf_file &f, epiread &er) { std::string line; - if (getline(f, line)) er = epiread(line); + if (getline(f, line)) + er = epiread(line); return f; } static inline bool -validate_epiread_bgzf_file(const string &filename) { - constexpr size_t max_lines_to_validate = 10000; +validate_epiread_bgzf_file(const std::string &filename) { + constexpr std::size_t max_lines_to_validate = 10000; bgzf_file in(filename, "r"); - if (!in) throw runtime_error("failed to open file: " + filename); + if (!in) + throw std::runtime_error("failed to open file: " + filename); - string c, s, other; - size_t p = 0; + std::string c, s, other; + std::size_t p = 0; - size_t n_lines = 0; - string line; + std::size_t n_lines = 0; + std::string line; while (getline(in, line) && n_lines++ < max_lines_to_validate) { std::istringstream iss(line); - if (!(iss >> c >> p >> s) || iss >> other) return false; + if (!(iss >> c >> p >> s) || iss >> other) + return false; } return true; } @@ -115,18 +101,18 @@ using epi_r = small_epiread; /* merges amrs within some pre-defined distance */ static void -merge_amrs(const size_t gap_limit, vector &amrs) { - auto j = begin(amrs); +merge_amrs(const std::size_t gap_limit, std::vector &amrs) { + auto j = std::begin(amrs); for (const auto &a : amrs) // check distance between two amrs is greater than gap limit if (j->same_chrom(a) && j->get_end() + gap_limit >= a.get_start()) { j->set_end(a.get_end()); - j->set_score(min(a.get_score(), j->get_score())); + j->set_score(std::min(a.get_score(), j->get_score())); } else *(++j) = a; - amrs.erase(++j, cend(amrs)); + amrs.erase(++j, std::cend(amrs)); } //////////////////////////////////////////////////////////////////////// @@ -138,10 +124,11 @@ merge_amrs(const size_t gap_limit, vector &amrs) { ///// // static void -// set_read_states(vector > &state_counts, vector &reads) { -// for (size_t i = 0; i < reads.size(); ++i) { -// const size_t offset = reads[i].pos; -// for (size_t j = 0; j < reads[i].length(); ++j) { +// set_read_states(std::vector > &state_counts, +// std::vector &reads) { +// for (std::size_t i = 0; i < reads.size(); ++i) { +// const std::size_t offset = reads[i].pos; +// for (std::size_t j = 0; j < reads[i].length(); ++j) { // reads[i].seq[j] = state_counts[offset + j].back(); // state_counts[offset + j].pop_back(); // } @@ -149,96 +136,115 @@ merge_amrs(const size_t gap_limit, vector &amrs) { // } // static void -// get_state_counts(const vector &reads, const size_t total_cpgs, -// vector > &state_counts) { - -// state_counts = vector >(total_cpgs); -// for (size_t i = 0; i < reads.size(); ++i) { -// const size_t offset = reads[i].pos; -// for (size_t j = 0; j < reads[i].length(); ++j) +// get_state_counts(const std::vector &reads, const std::size_t +// total_cpgs, +// std::vector > &state_counts) { + +// state_counts = std::vector >(total_cpgs); +// for (std::size_t i = 0; i < reads.size(); ++i) { +// const std::size_t offset = reads[i].pos; +// for (std::size_t j = 0; j < reads[i].length(); ++j) // state_counts[offset + j].push_back(reads[i].seq[j]); // } -// for (size_t i = 0; i < state_counts.size(); ++i) -// random_shuffle(state_counts[i].begin(), state_counts[i].end()); +// for (std::size_t i = 0; i < state_counts.size(); ++i) +// random_shuffle(state_counts[i].std::begin(), state_counts[i].end()); // } // static void -// randomize_read_states(vector &reads) { +// randomize_read_states(std::vector &reads) { // /**/srand(time(0) + getpid()); -// const size_t total_cpgs = get_n_cpgs(reads); -// vector > state_counts; +// const std::size_t total_cpgs = get_n_cpgs(reads); +// std::vector > state_counts; // get_state_counts(reads, total_cpgs, state_counts); // set_read_states(state_counts, reads); // } // Code for converting between cpg and base pair coordinates below -static inline vector -collect_cpgs(const string &s) { - vector cpgs; - for (uint32_t i = 0; i < size(s) - 1; ++i) - if (s[i] == 'C' && s[i + 1] == 'G') cpgs.push_back(i); +static inline std::vector +collect_cpgs(const std::string &s) { + std::vector cpgs; + for (std::uint32_t i = 0; i < std::size(s) - 1; ++i) + if (s[i] == 'C' && s[i + 1] == 'G') + cpgs.push_back(i); return cpgs; } -template static inline bool -convert_coordinates(const vector &cpgs, GenomicRegion ®ion) { - if (size(cpgs) <= region.get_end()) return false; +template +static inline bool +convert_coordinates(const std::vector &cpgs, GenomicRegion ®ion) { + if (std::size(cpgs) <= region.get_end()) + return false; region.set_start(cpgs[region.get_start()]); region.set_end(cpgs[region.get_end()]); return true; } -static vector> -get_chrom_partition(const vector &r) { - if (r.empty()) return {}; - vector> parts; - string prev_chrom{r.front().get_chrom()}; - uint32_t prev_idx = 0u; - for (auto i = 0u; i < size(r); ++i) +static std::vector> +get_chrom_partition(const std::vector &r) { + if (r.empty()) + return {}; + std::vector> parts; + std::string prev_chrom{r.front().get_chrom()}; + std::uint32_t prev_idx = 0u; + for (auto i = 0u; i < std::size(r); ++i) if (r[i].get_chrom() != prev_chrom) { parts.emplace_back(prev_idx, i); prev_idx = i; prev_chrom = r[i].get_chrom(); } - parts.emplace_back(prev_idx, size(r)); + parts.emplace_back(prev_idx, std::size(r)); return parts; } -static bool -convert_coordinates(const string &genome_file, vector &amrs) { - vector c_name, c_seq; +[[nodiscard]] static bool +convert_coordinates(const std::size_t n_threads, const std::string &genome_file, + std::vector &amrs) { + std::vector c_name, c_seq; read_fasta_file_short_names(genome_file, c_name, c_seq); - const size_t n_chroms = size(c_seq); + const std::size_t n_chroms = std::size(c_seq); for (auto &s : c_seq) - for (auto &c : s) c = std::toupper(c); + for (auto &c : s) + c = std::toupper(c); - std::unordered_map chrom_lookup; + std::unordered_map chrom_lookup; for (auto i = 0u; i < n_chroms; ++i) chrom_lookup.emplace(std::move(c_name[i]), std::move(c_seq[i])); - vector> chrom_parts = get_chrom_partition(amrs); + const auto chrom_parts = get_chrom_partition(amrs); + const auto n_parts = std::size(chrom_parts); + const auto parts_beg = std::cbegin(chrom_parts); + const std::uint32_t n_per = (n_parts + n_threads - 1) / n_threads; + std::atomic_uint32_t conv_failure = 0; -#pragma omp parallel for - for (const auto &p : chrom_parts) { - const string chrom_name = amrs[p.first].get_chrom(); - auto c_itr = chrom_lookup.find(chrom_name); - if (c_itr == end(chrom_lookup)) - conv_failure++; - else { - vector cpgs = collect_cpgs(c_itr->second); - for (uint32_t i = p.first; i < p.second; ++i) - conv_failure += !convert_coordinates(cpgs, amrs[i]); - } + std::vector threads; + for (auto i = 0ul; i < n_threads; ++i) { + const auto p_beg = parts_beg + i * n_per; + const auto p_end = parts_beg + std::min((i + 1) * n_per, n_parts); + threads.emplace_back([&, p_beg, p_end] { + for (auto p = p_beg; p != p_end; ++p) { + const std::string chrom_name = amrs[p->first].get_chrom(); + auto c_itr = chrom_lookup.find(chrom_name); + if (c_itr == std::end(chrom_lookup)) + conv_failure++; + else { + std::vector cpgs = collect_cpgs(c_itr->second); + for (auto j = p->first; j < p->second; ++j) + conv_failure += !convert_coordinates(cpgs, amrs[j]); + } + } + }); } + for (auto &thread : threads) + thread.join(); return conv_failure == 0; } // make sure the current read is shortened to only include positions // within the relevant window static inline epi_r -clip_read(const size_t start_pos, const size_t end_pos, epi_r r) { +clip_read(const std::size_t start_pos, const std::size_t end_pos, epi_r r) { if (r.pos < start_pos) { r.seq = r.seq.substr(start_pos - r.pos); r.pos = start_pos; @@ -248,25 +254,23 @@ clip_read(const size_t start_pos, const size_t end_pos, epi_r r) { return r; } -static vector -get_current_epireads(const vector &epireads, const size_t max_len, - const size_t cpg_window, const size_t start_pos, - size_t &read_id) { - - // assert(is_sorted(cbegin(epireads), cend(epireads), +static std::vector +get_current_epireads(const std::vector &epireads, + const std::size_t max_len, const std::size_t cpg_window, + const std::size_t start_pos, std::size_t &read_id) { + // assert(is_sorted(std::cbegin(epireads), std::cend(epireads), // [](const epi_r &a, const epi_r &b) { // return a.pos < b.pos; // })); + const auto n_epi = std::size(epireads); - const auto n_epi = size(epireads); - - while (read_id < size(epireads) && + while (read_id < std::size(epireads) && epireads[read_id].pos + max_len <= start_pos) ++read_id; const auto end_pos = start_pos + cpg_window; - vector current_epireads; + std::vector current_epireads; for (auto i = read_id; i < n_epi && epireads[i].pos < end_pos; ++i) if (epireads[i].end() > start_pos) current_epireads.push_back(clip_read(start_pos, end_pos, epireads[i])); @@ -274,140 +278,155 @@ get_current_epireads(const vector &epireads, const size_t max_len, return current_epireads; } -static inline size_t -total_states(const vector &epireads) { - size_t tot = 0; - for (auto &e : epireads) tot += e.length(); +static inline std::size_t +total_states(const std::vector &epireads) { + std::size_t tot = 0; + for (auto &e : epireads) + tot += e.length(); return tot; } static inline void -add_amr(const string &chrom_name, const size_t start_cpg, - const size_t cpg_window, const vector &reads, const double score, - vector &amrs) { +add_amr(const std::string &chrom_name, const std::size_t start_cpg, + const std::size_t cpg_window, const std::vector &reads, + const double score, std::vector &amrs) { const auto end_cpg = start_cpg + cpg_window - 1; - const auto rds = to_string(size(reads)); + const auto rds = std::to_string(std::size(reads)); amrs.emplace_back(chrom_name, start_cpg, end_cpg, rds, score, '+'); } -static inline size_t +static inline std::size_t get_n_cpgs(const std::vector &reads) { - size_t n_cpgs = 0; - for (auto &r : reads) n_cpgs = max(n_cpgs, static_cast(r.end())); + std::size_t n_cpgs = 0; + for (auto &r : reads) + n_cpgs = std::max(n_cpgs, static_cast(r.end())); return n_cpgs; } -template static inline vector> +template +static inline std::vector> get_block_bounds(const T start_pos, const T end_pos, const T block_size) { - if (block_size == 0) return {{start_pos, end_pos}}; - vector> blocks; + if (block_size == 0) + return {{start_pos, end_pos}}; + std::vector> blocks; auto block_start = start_pos; while (block_start < end_pos) { - const auto block_end = min({block_start + block_size, end_pos}); + const auto block_end = std::min({block_start + block_size, end_pos}); blocks.emplace_back(block_start, block_end); block_start = block_end; } return blocks; } -static size_t -process_chrom(const bool verbose, const uint32_t n_threads, - const size_t min_obs_per_cpg, const size_t window_size, - const EpireadStats &epistat, const string &chrom_name, - const vector &epireads, vector &amrs) { +static std::size_t +process_chrom(const bool verbose, const std::uint32_t n_threads, + const std::size_t min_obs_per_cpg, const std::size_t window_size, + const EpireadStats &epistat, const std::string &chrom_name, + const std::vector &epireads, + std::vector &amrs) { constexpr auto blocks_per_thread = 8u; auto max_epiread_len = 0u; for (auto &e : epireads) - max_epiread_len = max(max_epiread_len, e.length()); - const size_t min_obs_per_window = window_size*min_obs_per_cpg; + max_epiread_len = std::max(max_epiread_len, e.length()); + const std::size_t min_obs_per_window = window_size * min_obs_per_cpg; - const size_t n_cpgs = get_n_cpgs(epireads); + const std::size_t n_cpgs = get_n_cpgs(epireads); if (verbose) - cerr << "processing " << chrom_name << " " - << "[reads: " << size(epireads) << "] " - << "[cpgs: " << n_cpgs << "]" << endl; + std::cerr << "processing " << chrom_name << " " + << "[reads: " << std::size(epireads) << "] " + << "[cpgs: " << n_cpgs << "]" << std::endl; - const auto n_blocks = n_threads*blocks_per_thread; + const auto n_blocks = n_threads * blocks_per_thread; - const size_t lim = (n_cpgs >= window_size) ? - n_cpgs - window_size + 1 : 0; - ; - const auto blocks = get_block_bounds(static_cast(0), - lim, lim/n_blocks); + const std::uint32_t lim = + n_cpgs >= window_size ? n_cpgs - window_size + 1 : 0; + const auto blocks = get_block_bounds(0u, lim, lim / n_blocks); + const auto blocks_beg = std::cbegin(blocks); + const std::uint32_t n_per = (n_blocks + n_threads - 1) / n_threads; + const auto n_epireads = std::size(epireads); std::atomic_ulong windows_tested = 0; - vector> all_amrs; - -#pragma omp parallel for - for (const auto &b : blocks) { - vector curr_amrs; - size_t start_idx = 0; - size_t windows_tested_block = 0; - for (auto i = b.first; i < b.second && start_idx < size(epireads); ++i) { - // ADS: below, we could do binary search, but this does not seem - // to be a bottleneck - auto curr_epireads = get_current_epireads(epireads, max_epiread_len, - window_size, i, start_idx); - if (total_states(curr_epireads) >= min_obs_per_window) { - bool is_significant = false; - const auto score = epistat.test_asm(curr_epireads, is_significant); - if (is_significant) - add_amr(chrom_name, i, window_size, curr_epireads, score, curr_amrs); - ++windows_tested_block; + std::vector> all_amrs(n_threads); + + std::vector threads; + for (auto i = 0u; i < n_threads; ++i) { + const auto b_beg = blocks_beg + i * n_per; + const auto b_end = blocks_beg + std::min((i + 1) * n_per, n_blocks); + threads.emplace_back([&, i, b_beg, b_end] { + std::vector curr_amrs; + std::size_t windows_tested_thread = 0; + for (auto b = b_beg; b != b_end; ++b) { + std::size_t start_idx = 0; + for (auto j = b->first; j < b->second && start_idx < n_epireads; ++j) { + // ADS: below, we could do binary search, but this does not seem + // to be a bottleneck + auto curr_epireads = get_current_epireads(epireads, max_epiread_len, + window_size, j, start_idx); + if (total_states(curr_epireads) >= min_obs_per_window) { + bool is_significant = false; + const auto score = epistat.test_asm(curr_epireads, is_significant); + if (is_significant) + add_amr(chrom_name, j, window_size, curr_epireads, score, + curr_amrs); + ++windows_tested_thread; + } + } } - } -#pragma omp critical - { - all_amrs.push_back(std::move(curr_amrs)); - } - windows_tested += windows_tested_block; + all_amrs[i] = std::move(curr_amrs); + windows_tested += windows_tested_thread; + }); } + for (auto &thread : threads) + thread.join(); auto total_amrs = 0u; for (const auto &a : all_amrs) - total_amrs += size(a); + total_amrs += std::size(a); amrs.reserve(total_amrs); for (auto &v : all_amrs) - for (auto &a : v) amrs.push_back(std::move(a)); + for (auto &a : v) + amrs.push_back(std::move(a)); return windows_tested; } struct rename_amr { - void operator()(GenomicRegion &r) { + void + operator()(GenomicRegion &r) { static constexpr auto label = "AMR"; - r.set_name(label + to_string(idx++) + ":" + r.get_name()); + r.set_name(label + std::to_string(idx++) + ":" + r.get_name()); } - uint32_t idx{}; + std::uint32_t idx{}; }; int main_amrfinder(int argc, const char **argv) { try { - const string description = + const std::string description = "identify regions of allele-specific methylation"; bool verbose = false; - uint32_t n_threads = 1; + std::uint32_t n_threads = 1; - string outfile; - string genome_file; - string summary_file; + std::string outfile; + std::string genome_file; + std::string summary_file; - uint32_t max_itr = 10; - uint32_t window_size = 10; - size_t gap_limit = 1000; + std::uint32_t max_itr = 10; + std::uint32_t window_size = 10; + std::size_t gap_limit = 1000; - double high_prob = 0.75, low_prob = 0.25; - uint32_t min_obs_per_cpg = 4; + double high_prob = 0.75; + double low_prob = 0.25; + std::uint32_t min_obs_per_cpg = 4; double critical_value = 0.01; // ADS: below, for when the time comes // auto eng = std::default_random_engine(rng_seed); - // std::shuffle(begin(things), end(things), eng); + // std::shuffle(std::begin(things), std::end(things), eng); // bool RANDOMIZE_READS = false; bool use_bic = false; @@ -432,62 +451,65 @@ main_amrfinder(int argc, const char **argv) { opt_parse.add_opt("pvals", 'h', "adjusts p-values using Hochberg step-up", false, apply_correction); opt_parse.add_opt("nofdr", 'f', "omits FDR procedure", false, use_fdr); - opt_parse.add_opt("nordc", 'r', "turn off read count correction", - false, correct_for_read_count); + opt_parse.add_opt("nordc", 'r', "turn off read count correction", false, + correct_for_read_count); opt_parse.add_opt("bic", 'b', "use BIC to compare models", false, use_bic); opt_parse.add_opt("summary", 'S', "write summary output here", false, summary_file); opt_parse.add_opt("threads", 't', "number of threads", false, n_threads); opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose); opt_parse.set_show_defaults(); - vector leftover_args; + std::vector leftover_args; opt_parse.parse(argc, argv, leftover_args); if (argc == 1 || opt_parse.help_requested()) { - cerr << opt_parse.help_message() << endl - << opt_parse.about_message() << endl; + std::cerr << opt_parse.help_message() << std::endl + << opt_parse.about_message() << std::endl; return EXIT_SUCCESS; } if (opt_parse.about_requested()) { - cerr << opt_parse.about_message() << endl; + std::cerr << opt_parse.about_message() << std::endl; return EXIT_SUCCESS; } if (opt_parse.option_missing()) { - cerr << opt_parse.option_missing_message() << endl; + std::cerr << opt_parse.option_missing_message() << std::endl; return EXIT_SUCCESS; } if (leftover_args.size() != 1) { - cerr << opt_parse.help_message() << endl; + std::cerr << opt_parse.help_message() << std::endl; return EXIT_SUCCESS; } - const string reads_file(leftover_args.front()); + const std::string reads_file(leftover_args.front()); /****************** END COMMAND LINE OPTIONS *****************/ - omp_set_num_threads(n_threads); + + n_threads = std::min(std::thread::hardware_concurrency(), n_threads); if (!validate_epiread_bgzf_file(reads_file)) - throw runtime_error("invalid states file: " + reads_file); + throw std::runtime_error("invalid states file: " + reads_file); if (verbose) - cerr << "AMR TESTING OPTIONS: " - << "[test=" << (use_bic ? "BIC" : "LRT") << "] " - << "[iterations=" << max_itr << "]" << endl; + std::cerr << "AMR TESTING OPTIONS: " + << "[test=" << (use_bic ? "BIC" : "LRT") << "] " + << "[iterations=" << max_itr << "]" << std::endl; const EpireadStats epistat{low_prob, high_prob, critical_value, - max_itr, use_bic, correct_for_read_count}; + max_itr, use_bic, correct_for_read_count}; bamxx::bam_tpool tp(n_threads); bgzf_file in(reads_file, "r"); - if (!in) throw runtime_error("failed to open input file: " + reads_file); - if (n_threads > 1 && in.is_bgzf()) tp.set_io(in); + if (!in) + throw std::runtime_error("failed to open input file: " + reads_file); + if (n_threads > 1 && in.is_bgzf()) + tp.set_io(in); - vector amrs; + std::vector amrs; // windows_tested is the number of sliding windows in the // methylome that were tested for a signal of significant // allele-specific methylation - size_t windows_tested = 0; + std::size_t windows_tested = 0; epiread er; - vector epireads; - string prev_chrom, curr_chrom, tmp_states; + std::vector epireads; + std::string prev_chrom, curr_chrom, tmp_states; while (read_epiread(in, er)) { if (!epireads.empty() && er.chr != prev_chrom) { @@ -505,18 +527,18 @@ main_amrfinder(int argc, const char **argv) { prev_chrom, epireads, amrs); // because the threads might not add these in order - sort(begin(amrs), end(amrs)); + std::sort(std::begin(amrs), std::end(amrs)); // otherwise the names are not in chrom order - for_each(begin(amrs), end(amrs), rename_amr()); + std::for_each(std::begin(amrs), std::end(amrs), rename_amr()); if (verbose) - cerr << "========= POST PROCESSING =========" << endl; + std::cerr << "========= POST PROCESSING =========" << std::endl; // windows_accepted is the number of sliding windows in the // methylome that were found to have a significant signal of // allele-specific methylation - const auto windows_accepted = size(amrs); + const auto windows_accepted = std::size(amrs); if (!amrs.empty()) { /* ADS: there are several "steps" below that are done @@ -541,22 +563,23 @@ main_amrfinder(int argc, const char **argv) { */ // get all the pvals... (but they might be BIC scores) - vector pvals(size(amrs)); - transform(cbegin(amrs), cend(amrs), begin(pvals), - [](const GenomicRegion &r) {return r.get_score();}); + std::vector pvals(std::size(amrs)); + transform(std::cbegin(amrs), std::cend(amrs), std::begin(pvals), + [](const GenomicRegion &r) { return r.get_score(); }); // if the pvalues are not BIC scores, get an FDR cutoff // corresponding to the given critical value const double fdr_cutoff = use_bic - ? 0.0 - : smithlab::get_fdr_cutoff(windows_tested, pvals, critical_value); + ? 0.0 + : smithlab::get_fdr_cutoff(windows_tested, pvals, critical_value); // if we are not using BIC, and if corrected p-values are // requested, then adjust the p-values if (!use_bic && apply_correction) { smithlab::correct_pvals(windows_tested, pvals); - for (auto i = 0u; i < size(pvals); ++i) amrs[i].set_score(pvals[i]); + for (auto i = 0u; i < std::size(pvals); ++i) + amrs[i].set_score(pvals[i]); } // ADS: not sure it's a good idea in this next collapse function @@ -566,10 +589,10 @@ main_amrfinder(int argc, const char **argv) { // collapsed_amrs is the number of intervals of consecutive CpG // sites that are found to reside in a window among those // accepted as significant - const auto n_collapsed_amrs = size(amrs); + const auto n_collapsed_amrs = std::size(amrs); - if (!convert_coordinates(genome_file, amrs)) { - cerr << "failed converting coordinates" << endl; + if (!convert_coordinates(n_threads, genome_file, amrs)) { + std::cerr << "failed converting coordinates" << std::endl; return EXIT_FAILURE; } @@ -581,54 +604,57 @@ main_amrfinder(int argc, const char **argv) { // merged_amrs are the number of intervals that result from // merging any collapsed amrs that have a distance of less than // gap_limit/2 from each other - const auto n_merged_amrs = size(amrs); + const auto n_merged_amrs = std::size(amrs); // if BIC was not requested, then eliminate AMRs based on either // the p-value cutoff, or with the FDR-based cutoff, if it was // requested. if (!use_bic) { - const auto cutoff = (apply_correction || !use_fdr) ? - critical_value : fdr_cutoff; - amrs.erase(remove_if(begin(amrs), end(amrs), - [cutoff](const GenomicRegion &r) { return r.get_score() >= cutoff; }), - cend(amrs)); + const auto cutoff = + (apply_correction || !use_fdr) ? critical_value : fdr_cutoff; + amrs.erase(std::remove_if(std::begin(amrs), std::end(amrs), + [cutoff](const GenomicRegion &r) { + return r.get_score() >= cutoff; + }), + std::cend(amrs)); } - const auto amrs_passing_fdr = size(amrs); + const auto amrs_passing_fdr = std::size(amrs); // ADS: eliminating AMRs based on their size makes sense, but // not if that size is tied to the gap between AMRs we would // merge. There is no symmetry for these. const auto min_size = gap_limit / 2; - amrs.erase(remove_if(begin(amrs), end(amrs), - [min_size](const GenomicRegion &r) { return r.get_width() < min_size; }), - cend(amrs)); + amrs.erase( + std::remove_if(std::begin(amrs), std::end(amrs), + [&](const auto &r) { return r.get_width() < min_size; }), + std::cend(amrs)); if (verbose) { - cerr << "windows_tested: " << windows_tested << '\n' - << "windows_accepted: " << windows_accepted << '\n' - << "collapsed_amrs: " << n_collapsed_amrs << '\n' - << "merged_amrs: " << n_merged_amrs << '\n'; + std::cerr << "windows_tested: " << windows_tested << '\n' + << "windows_accepted: " << windows_accepted << '\n' + << "collapsed_amrs: " << n_collapsed_amrs << '\n' + << "merged_amrs: " << n_merged_amrs << '\n'; if (use_fdr) - cerr << "fdr_cutoff: " << fdr_cutoff << '\n' - << "amrs_passing_fdr: " << amrs_passing_fdr << '\n'; - cerr << amr_summary(amrs).tostring() << '\n'; + std::cerr << "fdr_cutoff: " << fdr_cutoff << '\n' + << "amrs_passing_fdr: " << amrs_passing_fdr << '\n'; + std::cerr << amr_summary(amrs).tostring() << '\n'; } - ofstream of; - if (!outfile.empty()) of.open(outfile); - std::ostream out(outfile.empty() ? cout.rdbuf() : of.rdbuf()); - if (!out) runtime_error("failed to open output file: " + outfile); - copy(cbegin(amrs), cend(amrs), - ostream_iterator(out, "\n")); + std::ofstream out(outfile); + if (!out) + std::runtime_error("failed to open output file: " + outfile); + std::copy(std::cbegin(amrs), std::cend(amrs), + std::ostream_iterator(out, "\n")); } if (!summary_file.empty()) { - ofstream summary_out(summary_file); - if (!summary_out) throw runtime_error("failed to open: " + summary_file); - summary_out << amr_summary(amrs).tostring() << endl; + std::ofstream summary_out(summary_file); + if (!summary_out) + throw std::runtime_error("failed to open: " + summary_file); + summary_out << amr_summary(amrs).tostring() << std::endl; } } - catch (const runtime_error &e) { - cerr << e.what() << endl; + catch (const std::exception &e) { + std::cerr << e.what() << std::endl; return EXIT_FAILURE; } return EXIT_SUCCESS; From 5131b3201160d7ed14083af25bf5b2416695197d Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Wed, 11 Jun 2025 15:30:09 -0700 Subject: [PATCH 2/4] src/abismal: updated submodule --- src/abismal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/abismal b/src/abismal index a83a9546..3e53fe26 160000 --- a/src/abismal +++ b/src/abismal @@ -1 +1 @@ -Subproject commit a83a95468e9e2d42757db304cd91e0529715e0e0 +Subproject commit 3e53fe269177c38a9be6797157b3454c0a46fa46 From 833127322657f33d5673f84077dbe77f3d780d24 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Wed, 11 Jun 2025 15:33:55 -0700 Subject: [PATCH 3/4] m4/ads_openmp.m4: removing --- m4/ads_openmp.m4 | 159 ----------------------------------------------- 1 file changed, 159 deletions(-) delete mode 100644 m4/ads_openmp.m4 diff --git a/m4/ads_openmp.m4 b/m4/ads_openmp.m4 deleted file mode 100644 index 700c3489..00000000 --- a/m4/ads_openmp.m4 +++ /dev/null @@ -1,159 +0,0 @@ -# -# SYNOPSIS -# -# ADS_OPENMP([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) -# -# DESCRIPTION -# -# This macro is copied from the one in autoconf-archive with -# modifications to work with the clang of some macOS versions as of -# 2022. The documentation is mostly copied from the original, with -# the initials ADS where the new stuff appears. -# -# This macro tries to find out how to compile programs that use OpenMP a -# standard API and set of compiler directives for parallel programming -# (see http://www-unix.mcs/) -# -# On success, it sets the OPENMP_CFLAGS/OPENMP_CXXFLAGS/OPENMP_F77FLAGS -# output variable to the flag (e.g. -omp) used both to compile *and* link -# OpenMP programs in the current language. -# -# NOTE: You are assumed to not only compile your program with these flags, -# but also link it with them as well. -# -# If you want to compile everything with OpenMP, you should set: -# -# CFLAGS="$CFLAGS $OPENMP_CFLAGS" -# #OR# CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS" -# #OR# FFLAGS="$FFLAGS $OPENMP_FFLAGS" -# -# (depending on the selected language). -# -# The user can override the default choice by setting the corresponding -# environment variable (e.g. OPENMP_CFLAGS). -# -# ACTION-IF-FOUND is a list of shell commands to run if an OpenMP flag is -# found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is -# not found. If ACTION-IF-FOUND is not specified, the default action will -# define HAVE_OPENMP. -# -# (ADS) This macro attempts to take care of both compiling and -# linking, which means you will need an appropriate library in your -# LIBS already. This can be accomplished by using AC_SEARCH_LIBS -# with the function `omp_get_num_threads` and libraries like gomp -# and omp. -# -# (ADS) OpenMP is not enabled unless the OPENMP_FLAGS are set as -# outputs, which can be done with AC_SUBST or by calling AC_OPENMP -# first. The latter might allow for more elegant handling of code -# that should run either as single or multithreaded. -# -# LICENSE -# -# Copyright (c) 2008 Steven G. Johnson -# Copyright (c) 2015 John W. Peterson -# Copyright (c) 2016 Nick R. Papior -# Copyright (c) 2022 Andrew D. Smith -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 14 garbage - -AC_DEFUN([ADS_OPENMP], [ -AC_PREREQ([2.69]) dnl for _AC_LANG_PREFIX - -AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, -[ -AC_REQUIRE([_ADS_OPENMP_SAFE_WD]) -save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS -ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown -# Flags to try: -fopenmp (gcc), -mp (SGI & PGI), -# -qopenmp (icc>=15), -openmp (icc), -# -xopenmp (Sun), -omp (Tru64), -# -qsmp=omp (AIX), -# -Wp,-fopenmp (macOS) -# none -ax_openmp_flags="-fopenmp -openmp -qopenmp -mp -xopenmp -omp -qsmp=omp -Wp,-fopenmp none" -if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then - ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags" -fi -for ax_openmp_flag in $ax_openmp_flags; do - case $ax_openmp_flag in - none) []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[] ;; - *) []_AC_LANG_PREFIX[]FLAGS="$save[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flag" ;; - esac - AC_LINK_IFELSE([AC_LANG_SOURCE([[ -@%:@include - -static void -parallel_fill(int * data, int n) -{ - int i; -@%:@pragma omp parallel for - for (i = 0; i < n; ++i) - data[i] = i; -} - -int -main() -{ - int arr[100000]; - omp_set_num_threads(2); - parallel_fill(arr, 100000); - return 0; -} -]])],[ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break],[]) -done -[]_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[]FLAGS -rm -rf mp penmp mp.dSYM penmp.dSYM -]) -if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" = "xunknown"; then - m4_default([$2],:) -else - if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" != "xnone"; then - OPENMP_[]_AC_LANG_PREFIX[]FLAGS=$ax_cv_[]_AC_LANG_ABBREV[]_openmp - fi - m4_default([$1], [AC_DEFINE(HAVE_OPENMP,1,[Define if OpenMP is enabled])]) -fi -])dnl ADS_OPENMP - -# _ADS_OPENMP_SAFE_WD -# ------------------ -# AC_REQUIREd by ADS_OPENMP. Checks both at autoconf time and at -# configure time for files that ADS_OPENMP clobbers. -AC_DEFUN([_ADS_OPENMP_SAFE_WD], -[m4_syscmd([test ! -e penmp && test ! -e mp && test ! -e penmp.dSYM && test ! -e mp.dSYM])]dnl -[m4_if(sysval, [0], [], [m4_fatal(m4_normalize( - [ADS_OPENMP clobbers files named 'mp' and 'penmp'. - To use ADS_OPENMP you must not have either of these files - at the top level of your *build* tree.]))])]dnl -[if test -e penmp || test -e mp || test -e penmp.dSYM || test -e mp.dSYM ; then - AC_MSG_ERROR(m4_normalize( - [ADS@&t@_OPENMP clobbers files named 'mp' and 'penmp', and - directoreis named 'mp.dSYM' and 'penmp.dSYM'. - Aborting configure because one of these files already exists.])) -fi]) From a856e9f0d9d4bc08fdee26456b958740c7827a04 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Wed, 11 Jun 2025 15:34:53 -0700 Subject: [PATCH 4/4] Removing all things OpenMP --- Makefile.am | 3 +-- configure.ac | 10 ---------- docs/content/abismal.md | 2 +- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/Makefile.am b/Makefile.am index 9e832e16..7cc58d28 100644 --- a/Makefile.am +++ b/Makefile.am @@ -24,8 +24,7 @@ SUBDIRS := src/smithlab_cpp src/abismal install installdirs: SUBDIRS := $(filter-out src/smithlab_cpp src/abismal, $(SUBDIRS)) AM_CPPFLAGS = -I $(top_srcdir)/src/common -I $(top_srcdir)/src/smithlab_cpp -I $(top_srcdir)/src/bamxx -AM_CXXFLAGS = $(OPENMP_CXXFLAGS) -CXXFLAGS = -Wall -Wextra -Wpedantic -Wno-unknown-attributes +AM_CXXFLAGS = -Wall -Wextra -Wpedantic -Wno-unknown-attributes CXXFLAGS += -O3 -DNDEBUG # default has optimization on EXTRA_DIST = \ diff --git a/configure.ac b/configure.ac index b2ff44c0..057daec9 100644 --- a/configure.ac +++ b/configure.ac @@ -26,16 +26,6 @@ AC_PROG_CXX AX_CXX_COMPILE_STDCXX_17([noext], [mandatory]) AC_PROG_RANLIB -dnl OpenMP happens here -AC_OPENMP([C++]) -AS_VAR_IF(OPENMP_CXXFLAGS, [], [ -dnl check for the OpenMP library; can't be later -AC_SEARCH_LIBS([omp_get_num_threads], [gomp omp], [], - [AC_MSG_FAILURE([OpenMP library not found])]) -dnl now we get setup for the right OpenMP flags -ADS_OPENMP([], [AC_MSG_FAILURE([OpenMP must be installed to build dnmtools])]) -])dnl end of OpenMP stuff - dnl recursively configure abismal and smithlab_cpp AC_CONFIG_SUBDIRS([src/abismal]) AC_CONFIG_SUBDIRS([src/smithlab_cpp]) diff --git a/docs/content/abismal.md b/docs/content/abismal.md index 8e863221..312da12a 100644 --- a/docs/content/abismal.md +++ b/docs/content/abismal.md @@ -195,7 +195,7 @@ even longer. It usually take quite a long time to map reads from a single large file with tens of millions of reads. If you have access to a cluster, one strategy is to launch multiple jobs, each working on a subset of reads simultaneously, and finally combine their output. -`abismal` takes advantage of OpenMP to parallelize the process of +`abismal` uses standard c++ threads to parallelize the process of mapping reads using the shared index. If each node can only access its local storage, dividing the set of