diff --git a/src/radmeth/radmeth.cpp b/src/radmeth/radmeth.cpp index 97ea9ad7..8fe1835d 100644 --- a/src/radmeth/radmeth.cpp +++ b/src/radmeth/radmeth.cpp @@ -26,6 +26,7 @@ #include // GSL header for chisqrd distribution #include +#include #include #include #include @@ -36,6 +37,24 @@ #include #include +[[nodiscard]] static std::string +format_duration(const std::chrono::duration elapsed) { + static constexpr auto s_per_h = 3600; + static constexpr auto s_per_m = 60; + const double tot_s = elapsed.count(); + + // break down into hours, minutes, seconds + const std::uint32_t hours = tot_s / 3600; + const std::uint32_t minutes = (static_cast(tot_s) % s_per_h) / s_per_m; + const double seconds = tot_s - (hours * s_per_h) - (minutes * s_per_m); + + std::ostringstream oss; + oss << std::setfill('0') << std::setw(2) << hours << ":" << std::setfill('0') + << std::setw(2) << minutes << ":" << std::fixed << std::setprecision(2) + << std::setw(5) << seconds; + return oss.str(); +} + struct file_progress { double one_thousand_over_filesize{}; std::size_t prev_offset{}; @@ -190,7 +209,6 @@ radmeth(const bool show_progress, const bool more_na_info, const std::uint32_t n_threads, const std::string &table_filename, const std::string &outfile, const Regression &alt_model, const Regression &null_model, const std::uint32_t test_factor_idx) { - static constexpr auto prefix_fmt = "%s\t%ld\t%c\t%s\t"; static constexpr auto suffix_fmt = "\t%ld\t%ld\t%ld\t%ld\n"; static constexpr auto buf_size = 1024; static constexpr auto max_lines = 16384; @@ -307,11 +325,8 @@ that the design matrix and the proportion table are correctly formatted. n_bytes[b] = [&] { // clang-format off const int n_prefix_bytes = - std::sprintf(bufs[b].data(), prefix_fmt, - t_alt_model.props.chrom.data(), - t_alt_model.props.position, - t_alt_model.props.strand, - t_alt_model.props.context.data()); + std::sprintf(bufs[b].data(), "%s\t", + t_alt_model.props.rowname.data()); // clang-format on if (n_prefix_bytes < 0) return n_prefix_bytes; @@ -454,8 +469,14 @@ main_radmeth(int argc, char *argv[]) { if (verbose) std::cerr << "Null model:\n" << null_model.design << '\n'; + const auto start_time = std::chrono::steady_clock::now(); radmeth(show_progress, more_na_info, n_threads, table_filename, outfile, alt_model, null_model, test_factor_idx); + const auto stop_time = std::chrono::steady_clock::now(); + + if (verbose) + std::cerr << "[total time: " << format_duration(stop_time - start_time) + << "]\n"; } catch (const std::exception &e) { std::cerr << e.what() << '\n'; diff --git a/src/radmeth/radmeth_model.cpp b/src/radmeth/radmeth_model.cpp index a4b6ef6f..98fb2b84 100644 --- a/src/radmeth/radmeth_model.cpp +++ b/src/radmeth/radmeth_model.cpp @@ -24,8 +24,8 @@ #include #include -double Regression::tolerance = 1e-3; -double Regression::stepsize = 0.001; +double Regression::tolerance = 1e-4; +double Regression::stepsize = 0.01; std::uint32_t Regression::max_iter = 250; void @@ -33,45 +33,12 @@ SiteProp::parse(const std::string &line) { const auto first_ws = line.find_first_of(" \t"); // Parse the row name (must be like: "chr:position:strand:context") - bool failed = false; + bool failed = (first_ws == std::string::npos); - const auto label_end = line.data() + first_ws; auto field_s = line.data(); - auto field_e = std::find(field_s + 1, label_end, ':'); - if (field_e == label_end) - failed = true; - - { - const std::uint32_t d = std::distance(field_s, field_e); - chrom = std::string{field_s, d}; - } - - field_s = field_e + 1; - field_e = std::find(field_s + 1, label_end, ':'); - failed = failed || (field_e == label_end); - - { - const auto [ptr, ec] = std::from_chars(field_s, field_e, position); - failed = failed || (ptr == field_s); - } - - field_s = field_e + 1; - field_e = std::find(field_s + 1, label_end, ':'); - failed = failed || (field_e != field_s + 1 || field_e == label_end); - - strand = *field_s; - failed = failed || (strand != '-' && strand != '+'); - - field_s = field_e + 1; - field_e = std::find_if(field_s + 1, label_end, - [](const auto x) { return x == ' ' || x == '\t'; }); - failed = failed || (field_e != label_end); - - { - const std::uint32_t d = std::distance(field_s, field_e); - context = std::string{field_s, d}; - } - + auto field_e = line.data() + first_ws; + rowname = std::string{field_s, field_e}; + std::replace(std::begin(rowname), std::end(rowname), ':', '\t'); if (failed) throw std::runtime_error("failed to parse label from:\n" + line); diff --git a/src/radmeth/radmeth_model.hpp b/src/radmeth/radmeth_model.hpp index 8775ff5a..56b0dc30 100644 --- a/src/radmeth/radmeth_model.hpp +++ b/src/radmeth/radmeth_model.hpp @@ -75,10 +75,7 @@ operator>>(std::istream &in, mcounts &rm) { } struct SiteProp { - std::string chrom; - std::size_t position{}; - char strand{}; - std::string context; + std::string rowname; std::vector mc; void @@ -97,7 +94,8 @@ struct Regression { // scratch space std::vector cumul; std::vector p_v; - std::vector log1p_fact_v; + std::vector cache; + std::uint32_t max_r_count{}; [[nodiscard]] std::size_t n_factors() const { diff --git a/src/radmeth/radmeth_optimize.cpp b/src/radmeth/radmeth_optimize.cpp index 4839c229..3823e540 100644 --- a/src/radmeth/radmeth_optimize.cpp +++ b/src/radmeth/radmeth_optimize.cpp @@ -37,21 +37,34 @@ get_p(const std::vector &v, const gsl_vector *params) { } static inline auto -cache_log1p_factors(Regression ®, const double phi) { +set_max_r_count(Regression ®) { const auto &cumul = reg.cumul; const auto max_itr = std::max_element( std::cbegin(cumul), std::cend(cumul), [](const auto &a, const auto &b) { return std::size(a.r_counts) < std::size(b.r_counts); }); - const std::size_t max_k = std::size(max_itr->r_counts); - auto &cache = reg.log1p_fact_v; + reg.max_r_count = std::size(max_itr->r_counts); // ADS: avoid the realloc that can happen even for resize(smaller_size) - if (max_k > std::size(cache)) - cache.resize(max_k, 0.0); + if (reg.max_r_count > std::size(reg.cache)) + reg.cache.resize(reg.max_r_count); +} + +static inline auto +cache_log1p_factors(Regression ®, const double phi) { + const std::size_t max_k = reg.max_r_count; + auto &cache = reg.cache; for (std::size_t k = 0; k < max_k; ++k) cache[k] = std::log1p(phi * (k - 1.0)); } +static inline auto +cache_dispersion_effect(Regression ®, const double phi) { + const std::size_t max_k = reg.max_r_count; + auto &cache = reg.cache; + for (std::size_t k = 0; k < max_k; ++k) + cache[k] = (k - 1.0) / (1.0 + phi * (k - 1.0)); +} + [[nodiscard]] static double log_likelihood(const gsl_vector *params, Regression ®) { const auto phi = logistic(gsl_vector_get(params, reg.design.n_factors())); @@ -64,7 +77,7 @@ log_likelihood(const gsl_vector *params, Regression ®) { // ADS: precompute the log1p(phi * (k - 1.0)) values, which are reused for // each group. cache_log1p_factors(reg, phi); - const auto &log1p_fact_v = reg.log1p_fact_v; + const auto &log1p_fact_v = reg.cache; double log_lik = 0.0; for (std::size_t g_idx = 0; g_idx < n_groups; ++g_idx) { @@ -102,10 +115,14 @@ gradient(const gsl_vector *params, Regression ®, gsl_vector *output) { for (auto g_idx = 0u; g_idx < n_groups; ++g_idx) p_v[g_idx] = get_p(groups[g_idx], params); + cache_dispersion_effect(reg, phi); + const auto &dispersion_effect = reg.cache; // (k-1)/(1 + phi(k-1)) + // init output to zero for all factors gsl_vector_set_all(output, 0.0); auto &data = output->data; + double disp_deriv = 0.0; for (std::size_t g_idx = 0; g_idx < n_groups; ++g_idx) { const auto p = p_v[g_idx]; const auto one_minus_p = 1.0 - p; @@ -114,13 +131,26 @@ gradient(const gsl_vector *params, Regression ®, gsl_vector *output) { const auto denom_term1 = one_minus_phi * p; const auto &cumul_y = cumul[g_idx].m_counts; - for (auto k = 0u; k < std::size(cumul_y); ++k) - deriv += cumul_y[k] / (denom_term1 + phi * k); + const auto y_lim = std::size(cumul_y); + for (auto k = 0u; k < y_lim; ++k) { + const auto common_factor = cumul_y[k] / (denom_term1 + phi * k); + deriv += common_factor; + disp_deriv += (k - p) * common_factor; + } const auto denom_term2 = one_minus_phi * one_minus_p; const auto &cumul_d = cumul[g_idx].d_counts; - for (auto k = 0u; k < std::size(cumul_d); ++k) - deriv -= cumul_d[k] / (denom_term2 + phi * k); + const auto d_lim = std::size(cumul_d); + for (auto k = 0u; k < d_lim; ++k) { + const auto common_factor = cumul_d[k] / (denom_term2 + phi * k); + deriv -= common_factor; + disp_deriv += (k - one_minus_p) * common_factor; + } + + const auto &cumul_n = cumul[g_idx].r_counts; + const auto n_lim = std::size(cumul_n); + for (auto k = 0u; k < n_lim; ++k) + disp_deriv -= cumul_n[k] * dispersion_effect[k]; const auto &g = groups[g_idx]; const auto denom_term1_one_minus_p = denom_term1 * one_minus_p; @@ -131,31 +161,7 @@ gradient(const gsl_vector *params, Regression ®, gsl_vector *output) { data[fact_idx] += deriv * (denom_term1_one_minus_p * level); } } - - double deriv = 0.0; - auto cumul_itr = std::cbegin(cumul); - for (auto g_idx = 0u; g_idx < n_groups; ++g_idx, ++cumul_itr) { - const auto p = get_p(groups[g_idx], params); - const auto one_minus_p = 1.0 - p; - - const auto term1 = one_minus_phi * p; - const auto &cumul_y = cumul_itr->m_counts; - const auto y_lim = std::size(cumul_y); - for (auto k = 0u; k < y_lim; ++k) - deriv += cumul_y[k] * (k - p) / (term1 + phi * k); - - const auto term2 = one_minus_phi * one_minus_p; - const auto &cumul_d = cumul_itr->d_counts; - const auto d_lim = std::size(cumul_d); - for (auto k = 0u; k < d_lim; ++k) - deriv += cumul_d[k] * (k - one_minus_p) / (term2 + phi * k); - - const auto &cumul_n = cumul_itr->r_counts; - const auto n_lim = std::size(cumul_n); - for (auto k = 0u; k < n_lim; ++k) - deriv -= cumul_n[k] * (k - 1.0) / (1.0 + phi * (k - 1.0)); - } - gsl_vector_set(output, n_factors, deriv * (phi * one_minus_phi)); + gsl_vector_set(output, n_factors, disp_deriv * (phi * one_minus_phi)); } [[nodiscard]] static double @@ -230,6 +236,7 @@ fit_regression_model(Regression &r, std::vector ¶ms_init) { const auto max_iter = Regression::max_iter; get_cumulative(r.design.group_id, r.design.n_groups(), r.props.mc, r.cumul); + set_max_r_count(r); // one more than the number of factors const std::size_t n_params = r.n_factors() + 1;