Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 33 additions & 35 deletions src/radmeth/radmeth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,17 @@ struct file_progress {
one_hundred_over_filesize{100.0 / std::filesystem::file_size(filename)} {}
void
operator()(std::ifstream &in) {
const std::size_t curr_offset = in.tellg() * one_hundred_over_filesize;
const std::size_t curr_offset =
in.eof() ? 100 : in.tellg() * one_hundred_over_filesize;
if (curr_offset <= prev_offset)
return;
std::cerr << "[progress: " << std::setw(3) << curr_offset
<< (curr_offset == 100 ? "%]\n" : "%]\r");
std::cerr << "\r[progress: " << std::setw(3) << curr_offset
<< (curr_offset == 100 ? "%]\n" : "%]");
prev_offset = (curr_offset == 100) ? std::numeric_limits<std::size_t>::max()
: curr_offset;
}
};

static void
remove_factor(Design &design, const std::size_t factor_idx) {
design.factor_names.erase(std::begin(design.factor_names) + factor_idx);
for (std::size_t i = 0; i < design.n_samples(); ++i)
design.matrix[i].erase(std::begin(design.matrix[i]) + factor_idx);
}

static bool
consistent_sample_names(const Regression &reg, const std::string &header) {
std::istringstream iss(header);
Expand Down Expand Up @@ -141,19 +135,14 @@ get_test_factor_idx(const Regression &model, const std::string &test_factor) {
return std::distance(std::cbegin(factors), itr);
}

static void
read_design(const bool verbose, const std::string &design_filename,
Design &design) {
if (verbose)
std::cerr << "design table filename: " << design_filename << '\n';
[[nodiscard]] static Design
read_design(const std::string &design_filename) {
std::ifstream design_file(design_filename);
if (!design_file)
throw std::runtime_error("could not open file: " + design_filename);

// initialize full design matrix from file
Design design;
design_file >> design;
if (verbose)
std::cerr << design << '\n';
return design;
}

enum class row_status : std::uint8_t {
Expand Down Expand Up @@ -219,7 +208,7 @@ that the design matrix and the proportion table are correctly formatted.
throw std::runtime_error("header:\n" + sample_names_header + message);
}

const std::size_t n_samples = alt_model.design.n_samples();
const std::size_t n_samples = alt_model.n_samples();

std::ofstream out(outfile);
if (!out)
Expand Down Expand Up @@ -302,11 +291,12 @@ that the design matrix and the proportion table are correctly formatted.

n_bytes[b] = [&] {
// clang-format off
const int n_prefix_bytes = std::sprintf(bufs[b].data(), prefix_fmt,
t_alt_model.props.chrom.data(),
t_alt_model.props.position,
t_alt_model.props.strand,
t_alt_model.props.context.data());
const int n_prefix_bytes =
std::sprintf(bufs[b].data(), prefix_fmt,
t_alt_model.props.chrom.data(),
t_alt_model.props.position,
t_alt_model.props.strand,
t_alt_model.props.context.data());
// clang-format on
if (n_prefix_bytes < 0)
return n_prefix_bytes;
Expand Down Expand Up @@ -360,6 +350,9 @@ that the design matrix and the proportion table are correctly formatted.
if (n_lines < n_threads)
break;
}

if (show_progress)
progress(table_file);
}

int
Expand Down Expand Up @@ -421,28 +414,33 @@ main_radmeth(int argc, char *argv[]) {
const std::string table_filename(leftover_args.back());
/****************** END COMMAND LINE OPTIONS *****************/

if (verbose)
std::cerr << "design table filename: " << design_filename << '\n';

// initialize full design matrix from file
Regression orig_alt_model;
read_design(verbose, design_filename, orig_alt_model.design);
Regression alt_model;
alt_model.design = read_design(design_filename);
if (verbose)
std::cerr << "Alternate model:\n" << alt_model.design << '\n';

// Check that provided test factor name exists and find its index.
const auto test_factor_idx =
get_test_factor_idx(orig_alt_model, test_factor);
const auto test_factor_idx = get_test_factor_idx(alt_model, test_factor);

// verify that the design includes more than one level for the
// test factor
if (!has_two_values(orig_alt_model, test_factor_idx)) {
const auto first_level = orig_alt_model.design.matrix[0][test_factor_idx];
if (!has_two_values(alt_model, test_factor_idx)) {
const auto first_level = alt_model.design.matrix[0][test_factor_idx];
throw std::runtime_error("test factor only one level: " + test_factor +
", " + std::to_string(first_level));
}

Regression orig_null_model;
orig_null_model.design = orig_alt_model.design;
remove_factor(orig_null_model.design, test_factor_idx);
Regression null_model = alt_model;
null_model.design = alt_model.design.drop_factor(test_factor_idx);
if (verbose)
std::cerr << "Null model:\n" << null_model.design << '\n';

radmeth(show_progress, more_na_info, n_threads, table_filename, outfile,
orig_alt_model, orig_null_model, test_factor_idx);
alt_model, null_model, test_factor_idx);
}
catch (const std::exception &e) {
std::cerr << e.what() << '\n';
Expand Down
49 changes: 47 additions & 2 deletions src/radmeth/radmeth_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ double Regression::stepsize = 0.001;
std::uint32_t Regression::max_iter = 250;

void
SiteProportions::parse(const std::string &line) {
SiteProp::parse(const std::string &line) {
const auto first_ws = line.find_first_of(" \t");

// Parse the row name (must be like: "chr:position:strand:context")
Expand Down Expand Up @@ -105,6 +105,27 @@ SiteProportions::parse(const std::string &line) {
throw std::runtime_error("failed to parse counts from:\n" + line);
}

static void
make_groups(Design &design) {
auto s = design.matrix;
std::sort(std::begin(s), std::end(s));
s.erase(std::unique(std::begin(s), std::end(s)), std::end(s));
design.groups = std::move(s);
}

static void
assign_groups(Design &design) {
const auto &matrix = design.matrix;
const auto &s = design.groups;
const auto n_samples = design.n_samples();
auto &group_id = design.group_id;
group_id.resize(n_samples);
for (auto i = 0u; i < n_samples; ++i) {
const auto x = std::find(std::cbegin(s), std::cend(s), matrix[i]);
group_id[i] = std::distance(std::cbegin(s), x);
}
}

template <typename T>
static void
transpose(const std::vector<std::vector<T>> &mat,
Expand Down Expand Up @@ -152,10 +173,33 @@ operator>>(std::istream &is, Design &design) {
}

transpose(design.matrix, design.tmatrix);
make_groups(design);
assign_groups(design);

return is;
}

[[nodiscard]] Design
Design::drop_factor(const std::size_t factor_idx) {
// clang-format off
Design design{
factor_names,
sample_names,
matrix,
tmatrix,
groups,
group_id,
};
// clang-format on
design.factor_names.erase(std::begin(design.factor_names) + factor_idx);
for (auto i = 0u; i < n_samples(); ++i)
design.matrix[i].erase(std::begin(design.matrix[i]) + factor_idx);
transpose(design.matrix, design.tmatrix);
make_groups(design);
assign_groups(design);
return design;
}

std::ostream &
operator<<(std::ostream &out, const Design &design) {
for (std::size_t factor = 0; factor < design.factor_names.size(); ++factor) {
Expand All @@ -168,8 +212,9 @@ operator<<(std::ostream &out, const Design &design) {
for (std::size_t i = 0; i < design.n_samples(); ++i) {
out << design.sample_names[i];
for (std::size_t j = 0; j < design.n_factors(); ++j)
out << "\t" << design.matrix[i][j];
out << "\t" << static_cast<std::uint32_t>(design.matrix[i][j]);
out << "\n";
}

return out;
}
45 changes: 36 additions & 9 deletions src/radmeth/radmeth_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,37 @@
#include <string>
#include <vector>

struct cumul_counts {
std::vector<std::uint32_t> m_counts;
std::vector<std::uint32_t> r_counts;
std::vector<std::uint32_t> d_counts;
};

struct Design {
std::vector<std::string> factor_names;
std::vector<std::string> sample_names;
std::vector<std::vector<std::uint8_t>> matrix;
std::vector<std::vector<std::uint8_t>> tmatrix;
std::size_t
std::vector<std::vector<std::uint8_t>> groups;
std::vector<std::uint32_t> group_id;

[[nodiscard]] std::size_t
n_factors() const {
return factor_names.size();
return std::size(factor_names);
}

[[nodiscard]] std::size_t
n_groups() const {
return std::size(groups);
}
std::size_t

[[nodiscard]] std::size_t
n_samples() const {
return sample_names.size();
return std::size(sample_names);
}

[[nodiscard]] Design
drop_factor(const std::size_t factor_idx);
};

std::istream &
Expand All @@ -56,7 +74,7 @@ operator>>(std::istream &in, mcounts &rm) {
return in >> rm.n_reads >> rm.n_meth;
}

struct SiteProportions {
struct SiteProp {
std::string chrom;
std::size_t position{};
char strand{};
Expand All @@ -68,20 +86,29 @@ struct SiteProportions {
};

struct Regression {
static double tolerance; // 1e-4;
static double tolerance; // 1e-3;
static double stepsize; // 0.001;
static std::uint32_t max_iter; // 700;
static std::uint32_t max_iter; // 250;

Design design;
SiteProportions props;
SiteProp props;
double max_loglik{};
std::vector<double> p_v; // scratch space

// scratch space
std::vector<cumul_counts> cumul;
std::vector<double> p_v;
std::vector<double> log1p_fact_v;

[[nodiscard]] std::size_t
n_factors() const {
return design.n_factors();
}

[[nodiscard]] std::size_t
n_groups() const {
return design.n_groups();
}

[[nodiscard]] std::size_t
props_size() const {
return std::size(props.mc);
Expand Down
Loading