diff --git a/Makefile.am b/Makefile.am
index 7ebf1d31..068ffad6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -236,6 +236,7 @@ dnmtools_SOURCES += src/amrfinder/amrtester.cpp
 dnmtools_SOURCES += src/radmeth/dmr.cpp
 dnmtools_SOURCES += src/radmeth/methdiff.cpp
 dnmtools_SOURCES += src/radmeth/radmeth_optimize.cpp
+dnmtools_SOURCES += src/radmeth/radmeth_model.cpp
 dnmtools_SOURCES += src/radmeth/radmeth.cpp
 dnmtools_SOURCES += src/radmeth/radmeth-adjust.cpp
 dnmtools_SOURCES += src/radmeth/radmeth-merge.cpp
diff --git a/docs/content/radmeth.md b/docs/content/radmeth.md
index 54d03292..d46dc108 100644
--- a/docs/content/radmeth.md
+++ b/docs/content/radmeth.md
@@ -1,62 +1,65 @@
 ## radmeth - Differential CpG methylation
 
 ## Synopsis
+
 ```console
-$ dnmtools radmeth -factor case design_matrix.txt proportion_table.txt >output.bed
+dnmtools radmeth -factor case -o output.txt design_matrix.txt proportion_table.txt
 ```
 
 ## Description
 
-The DM detection method described in this section is based on the
-beta-binomial regression. We recommend using this method when more
-than three replicates are available in each group. For rapid
-differential methylation analysis the regression-based method should
-be run on a computing cluster with a few hundred available nodes, in
-which case it takes approximately a few hours to process a dataset
-consisting of 30-50 WGBS samples. The analysis can be also performed
-on a personal workstation, but it will take substantially longer. Note
-that the actual processing time depends on the coverage of each
-methylome, the number of sites analyzed, and the number of methylomes
-in the dataset.
+The differential-methylation detection method described in this section is
+based on the beta-binomial regression. We recommend using this method when
+more than three replicates are available in each group. For rapid differential
+methylation analysis the regression-based method should be run on a large
+machine with many cores. Note that the actual processing time depends on the
+coverage of each methylome, the number of sites analyzed, and the number of
+methylomes in the dataset.
 
 ### Generating proportion tables
 
-The first step in the differential methylation analysis is to assemble
-a proportion table containing read proportions for all target
-methylomes. Dnmtools includes a program [merge](../merge) to generate
-a proportion table from the given collection of methylomes. Suppose
-that we want to compare methylomes named `control-a.meth`,
-`control-b.meth`, `control-c.meth` to the methylomes `case-a.meth`,
-`case-b.meth`, `case-c.meth`.  The proportion table can be created with
-the following command:
+The first step in the differential methylation analysis is to assemble a
+proportion table containing read proportions for all target
+methylomes. Dnmtools includes a program [merge](../merge) to generate a
+proportion table from the given collection of methylomes. Suppose that we want
+to compare methylomes named `control-a.meth`, `control-b.meth`,
+`control-c.meth` to the methylomes `case-a.meth`, `case-b.meth`,
+`case-c.meth`.  The proportion table can be created with the following
+command:
+
 ```console
 $ dnmtools merge -t -radmeth \
      control-a.meth control-b.meth control-c.meth \
      case-a.meth case-b.meth case-c.meth > proportion-table.txt
 ```
+
 This is what `proportion-table.txt` looks like:
+
 ```txt
-       control_a       control_b       control_c       case_a  case_b  case_c
+control_a       control_b       control_c       case_a  case_b  case_c
 chr1:108:+:CpG 9       6       10      8       1       1       2       2       2       1       14      1
 chr1:114:+:CpG 17      7       10      0       14      3       5       1       9       1       7       1
 chr1:160:+:CpG 12      8       10      5       17      4       15      14      13      6       4       4
 chr1:309:+:CpG 1       1       1       0       17      12      12      8       2       1       19      8
 ```
-As indicated in the header, this proportion table contains information
-about 6 methylomes: 3 controls and 3 cases.  Each row of the table
-contains information about a CpG site and a proportion of reads
-mapping over this site in each methylome. For example, the first row
-describes a cytosine within a CpG site located on chromosome 1 at
-position 108. This site is present in 9 reads in the methylome control
-a and is methylated in 6 of them. Note that the dnmtools `merge`
-command adds methylomes into the proportion table in the order in
-which they are listed on the command line.
+
+As indicated in the header, this proportion table contains information about 6
+methylomes: 3 controls and 3 cases.  Each row of the table contains
+information about a CpG site and a proportion of reads mapping over this site
+in each methylome. For example, the first row describes a cytosine within a
+CpG site located on chromosome 1 at position 108. This site is present in 9
+reads in the methylome control a and is methylated in 6 of them. Note that the
+dnmtools `merge` command adds methylomes into the proportion table in the
+order in which they are listed on the command line.
+
+Note: the header conforms to a traditional "data frame" from R, which does not
+include a column name for the row names, which are the first column.
 
 ### Design matrix
 
-The next step is to specify the design matrix, which describes the
-structure of the experiment. For our running example, the design
-matrix looks like this:
+The next step is to specify the design matrix, which describes the structure
+of the experiment. For our running example, the design matrix looks like this:
+
 ```txt
 base   case
 control_a      1       0
@@ -66,59 +69,67 @@ case_a 1       1
 case_b 1       1
 case_c 1       1
 ```
-The design matrix shows that samples in this dataset are associated
-with two factors: base and case. The first column corresponds to the
-base factor and will always be present in the design matrix. Think of
-it as stating that all samples have the same baseline mean methylation
-level. To distinguish cases from controls we add another factor case
-(second column). The `1`s in this column correspond to the samples
-which belong to the cases group. You can use this design matrix as a
-template to create design matrices for two-group comparisons involving
-arbitrary many methylomes.
-
-After creating the proportion table and the design matrix, we are now
-ready to start the differential methylation analysis.  It consists of
-(1) regression, (2) combining significance, and (3) multiple testing
-adjustment steps.
+
+The design matrix shows that samples in this dataset are associated with two
+factors: base and case. The first column corresponds to the base factor and
+will always be present in the design matrix. Think of it as stating that all
+samples have the same baseline mean methylation level. To distinguish cases
+from controls we add another factor case (second column). The `1`s in this
+column correspond to the samples which belong to the cases group. You can use
+this design matrix as a template to create design matrices for two-group
+comparisons involving arbitrary many methylomes.
+
+After creating the proportion table and the design matrix, we are now ready to
+start the differential methylation analysis.  It consists of (1) regression,
+(2) combining significance, and (3) multiple testing adjustment steps.
 
 ### Regression
 
 Suppose that the `proportion-table.txt` and `design-matrix.txt` are as
 described above. The regression step is run with the command
+
 ```console
-$ dnmtools radmeth -factor case design-matrix.txt proportion-table.txt > output.bed
+$ dnmtools radmeth -factor case -o output.txt design-matrix.txt proportion-table.txt
 ```
-The `-factor` parameter specifies the factor with respect to which we
-want to test for differential methylation. The test factor is case,
-meaning that we are testing for differential methylation between cases
-and controls. In the output file `output.bed`, the last four columns
-correspond to the total read counts and methylated read counts of the
-case group and control group, respectively. The p-value (5-th column)
-is given by the log-likelihood ratio test.
-
-Depending on the distribution of methylated and unmethylated CpGs,
-some p-values may not be calculated. If there are no reads that cover
-a given CpG, the value `NA_LOW_COV` will be printed instead of a
-p-value. If all CpGs are fully methylated and fully methylated in both
-case and control, we say the CpG has an "extreme distribution". In
-these cases, we cannot reject the null and output `NA_EXTREME_CNT`.
-Finally, if there are numerical errors in the log-likelihood ratio
-test, we simply print `NA`. You can opt out of these additional NA
-informations by not using the `-n` flag.
-
-We do not recommend using the individual p-values of CpGs from
-radmeth, as they often do not contain sufficient coverage for
-significant biological interpretations. Instead, we recommend merging
-p-values of neighboring CpGs using [radadjust](../radadjust).
+
+The `-factor` parameter specifies the factor with respect to which we want to
+test for differential methylation. The test factor is 'case', meaning that we
+are testing for differential methylation between cases and controls. In the
+output file `output.txt`, the last four columns correspond to the total read
+counts and methylated read counts of the case group and control group,
+respectively. The p-value (5-th column) is given by the log-likelihood ratio
+test.
+
+Depending on the distribution of methylated and unmethylated CpGs, some
+p-values may not be calculated. If there are no reads that cover a given CpG,
+the value `NA_LOW_COV` will be printed instead of a p-value. If all CpGs are
+fully methylated and fully methylated in both case and control, we say the CpG
+has an "extreme distribution". In these cases, we cannot reject the null and
+output `NA_EXTREME_CNT`.  Finally, if there are numerical errors in the
+log-likelihood ratio test, we simply print `NA`. You can opt out of these
+additional NA informations by not using the `-n` flag.
+
+We do not recommend relying too much on the individual p-values of CpGs from
+radmeth, as they often do not contain sufficient coverage for significant
+biological interpretations. Instead, we recommend merging p-values of
+neighboring CpGs using [radadjust](../radadjust).
 
 ## Tutorial
 
-This tutorial aims to answer questions users often have about the
-assumptions of radmeth and how to interepret the results. We will
-assume a factor of interest, with levels A and B. We will also assume
-a potentially confounding factor, sex, with levels M and F. Further,
-we have 8 total samples, 2 samples for each of the 4 combinations of
-{A,B}x{M,F}. The design matrix should look like this:
+The data used here is available at:
+
+```txt
+https://github.com/andrewdavidsmith/radmeth-demo-simulated-data
+```
+
+And the results below were generated with v1.5.1 on Linux.
+
+This tutorial should answer questions users often have about the assumptions
+of radmeth and how to interepret the results. We will assume a factor of
+interest, with levels A and B. We will also assume a potentially confounding
+factor, sex, with levels M and F. Further, we have 8 total samples, 2 samples
+for each of the 4 combinations of {A,B}x{M,F}. The design matrix should look
+like this:
 
 ```txt
 base    sex factor
@@ -131,28 +142,28 @@ sample_MA2  1   0   1
 sample_MB1  1   0   0
 sample_MB2  1   0   0
 ```
-Note that the 3 headings correspond with the three binary columns, but
-need not be aligned over them. The first sample in the design matrix,
-`sample_FA1`, has an indicator in the other columns for both the "F"
-and the "A", both encoded with a "1" in their respective columns. But
-the replicate number 1 in the `_FA1` is just a replicate, and if it
-had meaning we would consider encoding it using some factor.
-Designating it as a replicate with no meaning is a choice here,
-aligning with the idea that the replicates are statistically
-identical. If all replicates identified with a "2" were done on "day
-2", for example, then we might reconsider.
-
-For our example we will use a genome file `genome.fa` that is very
-small and not a real genome. We will also use a file `features.bed`
-that will be the starting point for generating data that fit with our
-hypotheses. We will be generating data that *should* show us the
-differences we want to see -- assuming we use the tools properly. I
-will refer to each of the intervals in `features.bed` as a "feature"
-but just think of it as a genomic interval with a chromosome name, a
-start and an end.  We need to first obtain some features that are
-associated specifically with each of the levels of our factors: A, B,
+
+Note that the 3 headings correspond with the three binary columns, but need
+not be aligned over them. The first sample in the design matrix, `sample_FA1`,
+has an indicator in the other columns for both the "F" and the "A", both
+encoded with a "1" in their respective columns. But the replicate number 1 in
+the `_FA1` is just a replicate, and if it had meaning we would consider
+encoding it using some factor.  Designating it as a replicate with no meaning
+is a choice here, aligning with the idea that the replicates are statistically
+identical. If all replicates identified with a "2" were done on "day 2", for
+example, then we might reconsider.
+
+For our example we will use a genome file `genome.fa` that is very small and
+not a real genome. We will also use a file `features.bed` that will be the
+starting point for generating data that fit with our hypotheses. We will be
+generating data that *should* show us the differences we want to see --
+assuming we use the tools properly. I will refer to each of the intervals in
+`features.bed` as a "feature" but just think of it as a genomic interval with
+a chromosome name, a start and an end.  We need to first obtain some features
+that are associated specifically with each of the levels of our factors: A, B,
 M and F. We can do this starting with our features file and using the
 `bedtools` shuffle command:
+
 ```console
 $ bedtools shuffle -i features.bed -g genome.chrom.sizes > sim/features_M.bed;
 $ cat sim/features_*.bed | sort -k 1,1 -k 2,2g -o excl.bed;
@@ -162,10 +173,11 @@ $ bedtools shuffle -excl excl.bed -i features.bed -g genome.chrom.sizes > sim/fe
 $ cat sim/features_*.bed | sort -k 1,1 -k 2,2g -o excl.bed;
 $ bedtools shuffle -excl excl.bed -i features.bed -g genome.chrom.sizes > sim/features_B.bed;
 ```
-This ensures none of the "features" are overlapping between A, B, M or
-F, and all are similar in number and size distribution. Using each of
-these, I then made files of features corresponding to the
-*combinations* of factors:
+
+This ensures none of the "features" are overlapping between A, B, M or F, and
+all are similar in number and size distribution. Using each of these, I then
+made files of features corresponding to the *combinations* of factors:
+
 ```shell
 for i in M F; do
     for j in A B; do
@@ -174,28 +186,30 @@ for i in M F; do
     done;
 done;
 ```
+
 Now I have files named:
+
 ```txt
 sim/features_MA.bed
 sim/features_MB.bed
 sim/features_FA.bed
 sim/features_FB.bed
 ```
-The files named with an "M" include all intervals in
-`sim/features_M.bed`, and similarly the files named with a "B" include
-all intervals in `sim/features_B.bed`. So the file
-`sim/features_FA.bed` is all the features for "A" and all the features
-for "F". It will differ from `sim/features_FB.bed` less than it will
-from `sim/features_MB.bed`.
-
-Using these files, I simulated methylation levels at every CpG site in
-the fake genome `genome.fa` with CpGs outside the intervals receiving
-a methylation level of 0.8 and CpGs inside the intervals receiving a
-methylation level of 0.15. At each CpG site, a number of reads was
-sampled (Poisson with mean 10) and then the methylation levels were
-sampled with a coin flip weighed as either 0.8 or 0.15, depending on
-the CpG site. I did this twice for each combination of {M,F}x{A,B}.
-The result is 8 files named as follows:
+
+The files named with an "M" include all intervals in `sim/features_M.bed`, and
+similarly the files named with a "B" include all intervals in
+`sim/features_B.bed`. So the file `sim/features_FA.bed` is all the features
+for "A" and all the features for "F". It will differ from
+`sim/features_FB.bed` less than it will from `sim/features_MB.bed`.
+
+Using these files, I simulated methylation levels at every CpG site in the
+fake genome `genome.fa` with CpGs outside the intervals receiving a
+methylation level of 0.8 and CpGs inside the intervals receiving a methylation
+level of 0.15. At each CpG site, a number of reads was sampled (Poisson with
+mean 10) and then the methylation levels were sampled with a coin flip weighed
+as either 0.8 or 0.15, depending on the CpG site. I did this twice for each
+combination of {M,F}x{A,B}.  The result is 8 files named as follows:
+
 ```console
 $ ls -1 sim/*.counts
 sim/sample_FA1.counts
@@ -207,12 +221,14 @@ sim/sample_MA2.counts
 sim/sample_MB1.counts
 sim/sample_MB2.counts
 ```
-The program I used for the simulation above is not currently
-available. In this simulation, the only difference between replicates
-(e.g., `sim/sample_FA1.counts` vs. `sim/sample_FA2.counts`) is random
-noise due to sampling.
+
+The program I used for the simulation above is not currently available. In
+this simulation, the only difference between replicates (e.g.,
+`sim/sample_FA1.counts` vs. `sim/sample_FA2.counts`) is random noise due to
+sampling.
 
 Here is a look inside one of these files:
+
 ```console
 $ head sim/sample_FA1.counts
 chr1    163 +   CG  0.857143    7
@@ -226,12 +242,16 @@ chr1    324 +   CG  0.555556    9
 chr1    350 +   CG  0.933333    15
 chr1    356 +   CG  0.923077    13
 ```
+
 Then I used the `merge` command from dnmtools to make a data matrix as
 follows:
+
 ```console
 $ dnmtools merge -t -radmeth -v -remove .counts -o sim/table.txt sim/sample_*.counts
 ```
+
 And this is what it gave me:
+
 ```console
 $ head sim/table.txt
 sample_FA1  sample_FA2  sample_FB1  sample_FB2  sample_MA1  sample_MA2  sample_MB1  sample_MB2
@@ -245,67 +265,75 @@ chr1:322:+:CG   6   5   6   6   9   8   19  14  13  11  13  11  9   6   10  7
 chr1:324:+:CG   9   5   10  9   8   5   10  9   7   6   14  9   7   6   6   6
 chr1:350:+:CG   15  14  8   6   6   5   9   6   7   6   6   4   9   8   11  10
 ```
-With this simulated data, in which each column has the influence of
-two different factors, we can use `radmeth` with the design matrix
-shown above (in a file named `desgin.txt`) to get p-values for
-differential methylation between levels A and B of our factor of
-interest -- while controling for the effect of M/F:
+
+With this simulated data, in which each column has the influence of two
+different factors, we can use `radmeth` with the design matrix shown above (in
+a file named `desgin.txt`) to get p-values for differential methylation
+between levels A and B of our factor of interest -- while controling for the
+effect of M/F:
+
 ```console
 $ dnmtools radmeth -o sim/samples.radmeth -f factor design.txt sim/table.txt
 ```
-As explained already, the 5th column of the output gives the p-values
-for differential methylation between levels for our factor of interest
-(named "factor" in the design matrix).  Here is a look at part of the
-output:
+
+As explained already, the 5th column of the output gives the p-values for
+differential methylation between levels for our factor of interest (named
+"factor" in the design matrix).  Here is a look at part of the output:
+
 ```console
 $ head sim/samples.radmeth
-chr1    163 +   CG  0.0864953   35  22  45  36
-chr1    206 +   CG  0.444024    37  30  43  38
-chr1    232 +   CG  0.756591    28  20  35  26
-chr1    278 +   CG  0.780385    43  35  37  31
-chr1    296 +   CG  0.64704 33  29  38  32
-chr1    310 +   CG  0.640826    37  28  31  22
-chr1    322 +   CG  0.0971364   38  33  47  35
-chr1    324 +   CG  0.257851    40  29  31  26
-chr1    350 +   CG  0.893476    36  30  35  29
-chr1    356 +   CG  0.0527032   37  35  42  33
+chr1    163 +   CG  0.0877822   35  22  45  36
+chr1    206 +   CG  0.442745    37  30  43  38
+chr1    232 +   CG  0.756925    28  20  35  26
+chr1    278 +   CG  0.794626    43  35  37  31
+chr1    296 +   CG  0.635986    33  29  38  32
+chr1    310 +   CG  0.641986    37  28  31  22
+chr1    322 +   CG  0.0973903   38  33  47  35
+chr1    324 +   CG  0.260703    40  29  31  26
+chr1    350 +   CG  0.888242    36  30  35  29
+chr1    356 +   CG  0.0535286   37  35  42  33
 ```
-Focusing on the 5th column, we see values between 0 and 1, but none of
-them are very extreme. This particular output file has 17903 rows.  We
-can use the dnmtools command `selectsites` to pull out the rows in
-this file that correspond to each of the original feature sets. Here
-is how we would do it for all 4 different original sets of features:
+
+Focusing on the 5th column, we see values between 0 and 1, but none of them
+are very extreme. This particular output file has 17903 rows.  We can use the
+dnmtools command `selectsites` to pull out the rows in this file that
+correspond to each of the original feature sets. Here is how we would do it
+for all 4 different original sets of features:
+
 ```shell
 for i in M F A B; do
-    dnmtools selectsites -o sim/features_${i}.txt sim/features_${i}.bed sim/samples.radmeth;
+    dnmtools selectsites -relaxed -o sim/features_${i}.txt sim/features_${i}.bed sim/samples.radmeth;
 done
 ```
-The result allows us to verify that `radmeth` with the design matrix
-given above identifies sites as significant when they differ between
-levels A and B of the factor of interest:
+
+The result allows us to verify that `radmeth` with the design matrix given
+above identifies sites as significant when they differ between levels A and B
+of the factor of interest:
+
 ```console
-$ for i in sim/features_*.counts; do echo $i `awk 'BEGIN{k=0;c=0}{k+=$5;c+=1}END{print k,c,k/c}' $i`; done | column -t
-sim/features_A.counts  0.255448  430  0.000594064
-sim/features_B.counts  0.114256  372  0.00030714
-sim/features_F.counts  196.093   400  0.490232
-sim/features_M.counts  295.234   588  0.502099
+$ for i in sim/features_[ABFM].txt; do echo $i `awk 'BEGIN{k=0;c=0}{k+=$5;c+=1}END{print k,c,k/c}' $i`; done | column -t
+sim/features_A.txt  0.256072  430  0.000595517
+sim/features_B.txt  0.114786  372  0.000308564
+sim/features_F.txt  197.332   400  0.493329
+sim/features_M.txt  297.728   588  0.50634
 ```
-The p-values in features associated with either M or F are averaging
-around 0.5, which is what we want if we are trying to control this
-possible confounding variable. In the features associated either with
-A or B, the p-values are very small on average. Of course this is
-literally by design. Knowing that we simulated just as much difference
-between M and F as between A and B, if we had used `-f sex` then
-`radmeth` would have correctly assigned low p-values to sites in the
-intervals of `features_M.bed` and `features_F.bed`.
-
-We can use the same simulated data to verify that we do need the
-intercept in our design. In other words, the column "base" in
-`design.txt` needs to be present (you can give it any name you want;
-make sure it's all 1s).
-
-We change the design matrix to make the following, which has the
-intercept column removed:
+
+The p-values in features associated with either M or F are averaging around
+0.5, which is what we want if we are trying to control this possible
+confounding variable. In the features associated either with A or B, the
+p-values are very small on average. Of course this is literally by
+design. Knowing that we simulated just as much difference between M and F as
+between A and B, if we had used `-f sex` then `radmeth` would have correctly
+assigned low p-values to sites in the intervals of `features_M.bed` and
+`features_F.bed`.
+
+We can use the same simulated data to verify that we do need the intercept in
+our design. In other words, the column "base" in `design.txt` needs to be
+present (you can give it any name you want; make sure it's all 1s).
+
+We change the design matrix to make the following, which has the intercept
+column removed:
+
 ```console
 $ cat design_noint.txt
 sex factor
@@ -318,44 +346,56 @@ sample_MA2  0   1
 sample_MB1  0   0
 sample_MB2  0   0
 ```
-Then we re-run all the above commands (I won't exaplain each) to get
-results analogous but with this modified design matrix:
+
+Then we re-run all the above commands (I won't exaplain each) to get results
+analogous but with this modified design matrix:
+
 ```console
 $ dnmtools radmeth -o sim/samples_noint.radmeth -f factor design_noint.txt sim/table.txt
-$ for i in M F A B; do dnmtools selectsites -o sim/features_noint_${i}.txt sim/features_${i}.bed sim/samples_noint.radmeth; done
+$ for i in M F A B; do dnmtools selectsites -relaxed -o sim/features_noint_${i}.txt sim/features_${i}.bed sim/samples_noint.radmeth; done
 $ for i in sim/*_noint_*.txt; do echo $i `awk 'BEGIN{k=0;c=0}{k+=$5;c+=1}END{print k,c,k/c}' $i`; done | column -t
-sim/features_noint_A.txt  8.27788  430  0.0192509
-sim/features_noint_B.txt  15.2391  372  0.0409653
-sim/features_noint_F.txt  77.2438  400  0.193109
-sim/features_noint_M.txt  93.7181  588  0.159385
+sim/features_noint_A.txt  8.27931  430  0.0192542
+sim/features_noint_B.txt  15.2397  372  0.040967
+sim/features_noint_F.txt  77.2656  400  0.193164
+sim/features_noint_M.txt  93.7312  588  0.159407
 ```
-Although the sites within features corresponding to A and B are much
-lower on average than those for M or F, the distinction has been
-reduced. More importantly, we know that the sites in
-`sim/features_M.bed` and `sim/features_F.bed` are not influenced by
-the level A/B of our factor of interest. The p-values for sites in the
-last two rows above should have mean of 0.5. But without the intercept
-included in the design matrix, the outcomes to not match our
-statistical assumptions. I cannot predict what will happen if you use
-the wrong design matrix. In analyzing bisulfite sequencing data for
-differential methylation, we become aware of uncertainty through the
-counts, and we leverage this to try and be more accurate. So unlike
-other regression settings, if you try to normalize the data first, in
-order to avoid having to deal with an intercept, you will not arrive
-at an equivalent procedure -- if you want to normalize the data
-yourself (i.e. subtract mu and divide by sigma) then you are probably
-better off using a general regression package.
+
+Although the sites within features corresponding to A and B are much lower on
+average than those for M or F, the distinction has been reduced. More
+importantly, we know that the sites in `sim/features_M.bed` and
+`sim/features_F.bed` are not influenced by the level A/B of our factor of
+interest. The p-values for sites in the last two rows above should have mean
+of 0.5. But without the intercept included in the design matrix, the outcomes
+to not match our statistical assumptions. I cannot predict what will happen if
+you use the wrong design matrix. In analyzing bisulfite sequencing data for
+differential methylation, we become aware of uncertainty through the counts,
+and we leverage this to try and be more accurate. So unlike other regression
+settings, if you try to normalize the data first, in order to avoid having to
+deal with an intercept, you will not arrive at an equivalent procedure -- if
+you want to normalize the data yourself (i.e. subtract mu and divide by sigma)
+then you are probably better off using a general regression package.
 
 ## Options
 
 ```txt
  -o, -out
 ```
+
 The name of the output file (default: stdout).
 
+```txt
+ -t, -threads
+```
+
+Use multiple threads. This gives very good speedup as long as you do not
+exceed the number of available physical cores. Most CPUs support 2 hardware
+threads per physical core via SMT/Hyper-Threading, so check your number of
+cores if you are unsure.
+
 ```txt
  -n -na-info
 ```
+
 If a p-value is not calculated, print NAs in more detail: low coverage
 (`NA_LOW_COV`), extreme values (`NA_EXTREME`) or numerical errors in
 likelihood ratios (`NA`).
@@ -363,10 +403,34 @@ likelihood ratios (`NA`).
 ```txt
  -v, -verbose
 ```
+
 Print more information while the command is running.
 
+```txt
+ -progress
+```
+
+Show progress while radmeth runs.
+
 ```txt
  -f, -factor
 ```
+
 The name of the factor on which to test differences. This must be the
 name of one of the columns in file design matrix. This is required.
+
+```txt
+ -tolerance
+```
+
+The numerical tolerance for convergence when estimating model parameters.
+This cutoff is applied to the norm of the gradient of the log-likelihood
+function for MLE parameter estimation, and is multiplied by both the square
+root of the number of parameters and by the number of methylomes in the
+analysis.
+
+```txt
+ -max-iter
+```
+
+The maximum iterations to allow when estimating model parameters.
diff --git a/src/analysis/nanopore.cpp b/src/analysis/nanopore.cpp
index d8886c5b..6eb3ba53 100644
--- a/src/analysis/nanopore.cpp
+++ b/src/analysis/nanopore.cpp
@@ -760,7 +760,6 @@ consistent_existing_targets(
   return true;
 }
 
-int counter{};
 struct read_processor {
   static constexpr auto default_expected_basecall_model =
     "dna_r10.4.1_e8.2_400bps_hac@v4.3.0";
diff --git a/src/radmeth/radmeth.cpp b/src/radmeth/radmeth.cpp
index 815e599a..0fa8fcbe 100644
--- a/src/radmeth/radmeth.cpp
+++ b/src/radmeth/radmeth.cpp
@@ -16,15 +16,17 @@
  * General Public License for more details.
  */
 
-#include <gsl/gsl_cdf.h> // GSL header for chisqrd distribution
+#include <gsl/gsl_cdf.h>  // GSL header for chisqrd distribution
 
 #include <algorithm>
 #include <cmath>
+#include <filesystem>
 #include <fstream>
 #include <iomanip>
 #include <iostream>
 #include <sstream>
 #include <string>
+#include <thread>
 #include <vector>
 
 // smithlab headers
@@ -37,175 +39,44 @@
 #include "radmeth_optimize.hpp"
 
 using std::begin;
-using std::cerr;
 using std::cout;
 using std::distance;
 using std::end;
-using std::endl;
-using std::istringstream;
-using std::runtime_error;
-using std::sort;
-using std::string;
-using std::vector;
-
-static std::istream &
-operator>>(std::istream &is, Design &design) {
-  string header_encoding;
-  getline(is, header_encoding);
-
-  istringstream header_is(header_encoding);
-  string header_name;
-  while (header_is >> header_name) design.factor_names.push_back(header_name);
-
-  string row;
-  while (getline(is, row)) {
-    if (row.empty()) continue;
-
-    istringstream row_is(row);
-    string token;
-    row_is >> token;
-    design.sample_names.push_back(token);
-
-    vector<double> matrix_row;
-    while (row_is >> token) {
-      if (token.length() == 1 && (token == "0" || token == "1"))
-        matrix_row.push_back(token == "1");
-      else
-        throw runtime_error("only binary factor levels are allowed:\n" + row);
-    }
-
-    if (matrix_row.size() != design.n_factors())
-      throw runtime_error(
-        "each row must have as many columns as factors:\n" +
-        row);
 
-    design.matrix.push_back(vector<double>());
-    swap(design.matrix.back(), matrix_row);
+struct file_progress {
+  double one_hundred_over_filesize{};
+  std::size_t prev_offset{};
+  explicit file_progress(const std::string &filename) :
+    one_hundred_over_filesize{100.0 / std::filesystem::file_size(filename)} {}
+  void
+  operator()(std::ifstream &in) {
+    const std::size_t curr_offset = in.tellg() * one_hundred_over_filesize;
+    if (curr_offset <= prev_offset)
+      return;
+    std::cerr << "[progress: " << std::setw(3) << curr_offset
+              << (curr_offset == 100 ? "%]\n" : "%]\r");
+    prev_offset = (curr_offset == 100) ? std::numeric_limits<std::size_t>::max()
+                                       : curr_offset;
   }
-  return is;
-}
-
-static std::ostream &
-operator<<(std::ostream &out, const Design &design) {
-  for (size_t factor = 0; factor < design.factor_names.size(); ++factor) {
-    out << design.factor_names[factor];
-    if (factor + 1 != design.factor_names.size()) out << "\t";
-  }
-  out << endl;
-
-  for (size_t i = 0; i < design.n_samples(); ++i) {
-    out << design.sample_names[i];
-    for (size_t factor = 0; factor < design.factor_names.size(); ++factor)
-      out << "\t" << design.matrix[i][factor];
-    out << "\n";
-  }
-  return out;
-}
+};
 
 static void
-remove_factor(Design &design, const size_t factor) {
-  design.factor_names.erase(begin(design.factor_names) + factor);
-  for (size_t i = 0; i < design.n_samples(); ++i)
-    design.matrix[i].erase(begin(design.matrix[i]) + factor);
-}
-
-// Parses a natural number from its string representation. Throws exception if
-// the string does not encode one.
-static size_t
-parse_natural_number(string encoding) {
-  istringstream iss(encoding);
-  size_t number;
-  iss >> number;
-  if (!iss)
-    throw runtime_error("The token \"" + encoding +
-                        "\" does not encode a natural number");
-  return number;
-}
-
-static std::istream &
-operator>>(std::istream &table_encoding, SiteProportions &props) {
-  props.chrom.clear();
-  props.position = 0;
-  props.strand.clear();
-  props.context.clear();
-  props.meth.clear();
-  props.total.clear();
-
-  string row_encoding;
-  getline(table_encoding, row_encoding);
-
-  // Skip lines contining only the newline character (e.g. the last line of the
-  // proportion table).
-  if (row_encoding.empty()) return table_encoding;
-
-  // Get the row name (which must be specified like this: "chr:position") and
-  // parse it.
-  istringstream row_stream(row_encoding);
-  string row_name_encoding;
-  row_stream >> row_name_encoding;
-
-  // Every row must start an identifier consisiting of genomic loci of the
-  // corresponding site. Here we check this identifier has the correct number
-  // of colons.
-  const size_t num_colon =
-    count(row_name_encoding.begin(), row_name_encoding.end(), ':');
-
-  if (num_colon != 3)
-    throw runtime_error(
-      "Each row in the count table must start with "
-      "a line chromosome:position:strand:context. Got \"" +
-      row_name_encoding + "\" instead.");
-
-  // First parse the row identifier.
-  istringstream name_stream(row_name_encoding);
-  getline(name_stream, props.chrom, ':');
-
-  if (props.chrom.empty())
-    throw runtime_error("Error parsing " + row_name_encoding +
-                        ": chromosome name is missing.");
-
-  string position_encoding;
-
-  getline(name_stream, position_encoding, ':');
-  props.position = parse_natural_number(position_encoding);
-  getline(name_stream, props.strand, ':');
-  getline(name_stream, props.context, ':');
-
-  // After parsing the row identifier, parse count proportions.
-  size_t total_count, meth_count;
-
-  while (row_stream >> total_count >> meth_count) {
-    props.total.push_back(total_count);
-    props.meth.push_back(meth_count);
-  }
-
-  if (!row_stream.eof())
-    throw runtime_error("Some row entries are not natural numbers: " +
-                        row_stream.str());
-
-  if (props.total.size() != props.meth.size())
-    throw runtime_error(
-      "This row does not encode proportions"
-      "correctly:\n" +
-      row_encoding);
-  return table_encoding;
-}
-
-static bool
-fit_regression_model(Regression &r) {
-  vector<double> initial_params;
-  return fit_regression_model(r, initial_params);
+remove_factor(Design &design, const std::size_t factor_idx) {
+  design.factor_names.erase(std::begin(design.factor_names) + factor_idx);
+  for (std::size_t i = 0; i < design.n_samples(); ++i)
+    design.matrix[i].erase(std::begin(design.matrix[i]) + factor_idx);
 }
 
 /***************** RADMETH ALGORITHM *****************/
 static bool
-consistent_sample_names(const Regression &reg, const string &header) {
-  istringstream iss(header);
-  auto nm_itr(begin(reg.design.sample_names));
-  const auto nm_end(end(reg.design.sample_names));
-  string token;
+consistent_sample_names(const Regression &reg, const std::string &header) {
+  std::istringstream iss(header);
+  auto nm_itr(std::begin(reg.design.sample_names));
+  const auto nm_end(std::end(reg.design.sample_names));
+  std::string token;
   while (iss >> token && nm_itr != nm_end)
-    if (token != *nm_itr++) return false;
+    if (token != *nm_itr++)
+      return false;
   return true;
 }
 
@@ -213,13 +84,13 @@ consistent_sample_names(const Regression &reg, const string &header) {
 // function outputs the p-value of the log-likelihood ratio. *Note* that it is
 // assumed that the reduced model has one fewer factor than the reduced model.
 static double
-loglikratio_test(double null_loglik, double full_loglik) {
+llr_test(double null_loglik, double full_loglik) {
   // The log-likelihood ratio statistic.
   const double log_lik_stat = -2 * (null_loglik - full_loglik);
 
   // It is assumed that null model has one fewer factor than the full
   // model. Hence the number of degrees of freedom is 1.
-  const size_t degrees_of_freedom = 1;
+  const std::size_t degrees_of_freedom = 1;
 
   // Log-likelihood ratio statistic has a chi-sqare distribution.
   const double chisq_p = gsl_cdf_chisq_P(log_lik_stat, degrees_of_freedom);
@@ -229,46 +100,273 @@ loglikratio_test(double null_loglik, double full_loglik) {
 }
 
 static bool
-has_low_coverage(const Regression &reg, const size_t test_fac) {
+has_low_coverage(const Regression &reg, const std::size_t test_fac) {
   bool cvrd_in_test_fact_smpls = false;
-  for (size_t i = 0; i < reg.n_samples() && !cvrd_in_test_fact_smpls; ++i)
+  for (std::size_t i = 0; i < reg.n_samples() && !cvrd_in_test_fact_smpls; ++i)
     cvrd_in_test_fact_smpls =
-      (reg.design.matrix[i][test_fac] == 1 && reg.props.total[i] != 0);
+      (reg.design.matrix[i][test_fac] == 1 && reg.props.mc[i].n_reads != 0);
 
   bool cvrd_in_other_smpls = false;
-  for (size_t i = 0; i < reg.n_samples() && !cvrd_in_other_smpls; ++i)
+  for (std::size_t i = 0; i < reg.n_samples() && !cvrd_in_other_smpls; ++i)
     cvrd_in_other_smpls =
-      (reg.design.matrix[i][test_fac] != 1 && reg.props.total[i] != 0);
+      (reg.design.matrix[i][test_fac] != 1 && reg.props.mc[i].n_reads != 0);
 
   return !cvrd_in_test_fact_smpls || !cvrd_in_other_smpls;
 }
 
-static bool
+[[nodiscard]] static bool
 has_extreme_counts(const Regression &reg) {
-  bool is_maximally_methylated = true;
-  for (size_t i = 0; i < reg.n_samples() && is_maximally_methylated; ++i)
-    is_maximally_methylated = (reg.props.total[i] == reg.props.meth[i]);
+  const auto &mc = reg.props.mc;
+
+  bool full_meth = true;
+  for (auto i = std::cbegin(mc); i != std::cend(mc) && full_meth; ++i)
+    full_meth = (i->n_reads == i->n_meth);
 
-  bool is_unmethylated = true;
-  for (size_t i = 0; i < reg.n_samples() && is_unmethylated; ++i)
-    is_unmethylated = (reg.props.meth[i] == 0.0);
+  bool no_meth = true;
+  for (auto i = std::cbegin(mc); i != std::cend(mc) && no_meth; ++i)
+    no_meth = (i->n_meth == 0.0);
 
-  return is_maximally_methylated || is_unmethylated;
+  return full_meth || no_meth;
 }
 
+[[nodiscard]] static bool
+has_two_values(const Regression &reg, const std::size_t test_factor) {
+  const auto &v = reg.design.tmatrix[test_factor];
+  for (auto i = std::cbegin(v); i != std::cend(v); ++i)
+    if (*i != v[0])
+      return true;
+  return false;
+}
 
-static bool
-verify_multiple_levels(const Regression &full_regression,
-                       const size_t test_factor) {
-  const size_t n_samples = full_regression.design.n_samples();
-  const auto first_level = full_regression.design.matrix[0][test_factor];
-  bool test_fact_mult_levels = false;
-  for (size_t i = 1; i < n_samples; ++i)
-    if (full_regression.design.matrix[i][test_factor] != first_level)
-      test_fact_mult_levels = true;
-  return test_fact_mult_levels;
+[[nodiscard]] static std::uint32_t
+get_test_factor_idx(const Regression &model, const std::string &test_factor) {
+  const auto &factors = model.design.factor_names;
+  const auto itr =
+    std::find(std::cbegin(factors), std::cend(factors), test_factor);
+
+  if (itr == std::cend(factors))
+    throw std::runtime_error("Factor not part of design: " + test_factor);
+
+  return std::distance(std::cbegin(factors), itr);
 }
 
+static void
+read_design(const bool verbose, const std::string &design_filename,
+            Design &design) {
+  if (verbose)
+    std::cerr << "design table filename: " << design_filename << std::endl;
+  std::ifstream design_file(design_filename);
+  if (!design_file)
+    throw std::runtime_error("could not open file: " + design_filename);
+
+  // initialize full design matrix from file
+  design_file >> design;
+
+  if (verbose)
+    std::cerr << design << std::endl;
+}
+
+enum class row_status : std::uint8_t {
+  ok,
+  na,
+  na_low_cov,
+  na_extreme_cnt,
+};
+
+[[nodiscard]] static std::vector<double>
+drop_idx(const std::vector<double> &v, const std::size_t to_drop) {
+  std::vector<double> u;
+  u.reserve(std::size(v) - 1);
+  for (auto i = 0u; i < std::size(v); ++i)
+    if (i != to_drop)
+      u.push_back(v[i]);
+  return u;
+}
+
+static inline void
+get_chunk_bounds(const std::uint32_t n_elements, const std::uint32_t n_chunks,
+                 std::vector<std::pair<std::uint32_t, std::uint32_t>> &chunks) {
+  const std::uint32_t q = n_elements / n_chunks;
+  const std::uint32_t r = n_elements - q * n_chunks;
+  std::uint32_t block_start{};
+  for (std::size_t i = 0; i < n_chunks; ++i) {
+    const auto sz = (i < r) ? q + 1 : q;
+    const auto block_end = block_start + sz;
+    chunks[i] = {block_start, block_end};
+    block_start = block_end;
+  }
+}
+
+static void
+radmeth(const bool show_progress, const bool more_na_info,
+        const std::uint32_t n_threads, const std::string &table_filename,
+        const std::string &outfile, const Regression &alt_model,
+        const Regression &null_model, const std::uint32_t test_factor_idx) {
+  static constexpr auto prefix_fmt = "%s\t%ld\t%c\t%s\t";
+  static constexpr auto suffix_fmt = "\t%ld\t%ld\t%ld\t%ld\n";
+  static constexpr auto buf_size = 1024;
+  static constexpr auto max_lines = 16384;
+
+  // ADS: open the data table file
+  std::ifstream table_file(table_filename);
+  if (!table_file)
+    throw std::runtime_error("could not open file: " + table_filename);
+
+  // Make sure that the first line of the proportion table file contains
+  // names of the samples. Throw an exception if the names or their order
+  // in the proportion table does not match those in the full design matrix.
+  std::string sample_names_header;
+  std::getline(table_file, sample_names_header);
+
+  if (!consistent_sample_names(alt_model, sample_names_header))
+    throw std::runtime_error(
+      "header:\n" + sample_names_header + "\n" +
+      "does not match factor names or their order in the\n"
+      "design matrix. Check that the design matrix and\n"
+      "the proportion table are correctly formatted.");
+
+  const std::size_t n_samples = alt_model.design.n_samples();
+
+  std::ofstream out(outfile);
+  if (!out)
+    throw std::runtime_error("failed to open output file: " + outfile);
+
+  file_progress progress{table_filename};
+
+  std::vector<std::vector<char>> bufs(max_lines,
+                                      std::vector<char>(buf_size, 0));
+  std::vector<int> n_bytes(max_lines, 0);
+  std::vector<std::string> lines(max_lines);
+
+  std::vector<Regression> alt_models(n_threads, alt_model);
+  std::vector<Regression> null_models(n_threads, null_model);
+
+  std::vector<std::pair<std::uint32_t, std::uint32_t>> chunks(n_threads);
+
+  // Iterate over rows in the file and do llr test on proportions from
+  // each. Do this in sets of rows to avoid having to spawn too many threads.
+  while (true) {
+    std::uint32_t n_lines = 0;
+    while (n_lines < max_lines && std::getline(table_file, lines[n_lines]))
+      ++n_lines;
+    if (n_lines == 0)
+      break;
+
+    get_chunk_bounds(n_lines, n_threads, chunks);
+
+    std::vector<std::thread> threads;
+    for (auto thread_id = 0u; thread_id < n_threads; ++thread_id) {
+      threads.emplace_back([&, thread_id] {
+        const auto &[chunk_beg, chunk_end] = chunks[thread_id];
+        auto &t_alt_model = alt_models[thread_id];
+        auto &t_null_model = null_models[thread_id];
+        for (auto b = chunk_beg; b != chunk_end; ++b) {
+          t_alt_model.props.parse(lines[b]);
+          if (t_alt_model.props_size() != n_samples)
+            throw std::runtime_error("found row with wrong number of columns");
+
+          const auto [p_val, status] = [&]() -> std::tuple<double, row_status> {
+            // Skip the test if (1) no coverage in all cases or in all controls,
+            // or (2) the site is completely methylated or completely
+            // unmethylated across all samples.
+            if (has_low_coverage(t_alt_model, test_factor_idx))
+              return std::tuple{1.0, row_status::na_low_cov};
+
+            if (has_extreme_counts(t_alt_model))
+              return std::tuple{1.0, row_status::na_extreme_cnt};
+
+            std::vector<double> alternate_params;
+            fit_regression_model(t_alt_model, alternate_params);
+            t_null_model.props = t_alt_model.props;
+
+            auto null_params = drop_idx(alternate_params, test_factor_idx);
+
+            fit_regression_model(t_null_model, null_params);
+            const double p_value =
+              llr_test(t_null_model.max_loglik, t_alt_model.max_loglik);
+
+            return (p_value != p_value) ? std::tuple{1.0, row_status::na}
+                                        : std::tuple{p_value, row_status::ok};
+          }();
+
+          std::size_t n_reads_factor = 0;
+          std::size_t n_reads_others = 0;
+          std::size_t n_meth_factor = 0;
+          std::size_t n_meth_others = 0;
+
+          const auto &mc = t_alt_model.props.mc;
+          const auto &vec = t_alt_model.design.tmatrix[test_factor_idx];
+          for (std::size_t s = 0; s < n_samples; ++s)
+            if (vec[s] != 0) {
+              n_reads_factor += mc[s].n_reads;
+              n_meth_factor += mc[s].n_meth;
+            }
+            else {
+              n_reads_others += mc[s].n_reads;
+              n_meth_others += mc[s].n_meth;
+            }
+
+          n_bytes[b] = [&] {
+            // clang-format off
+            const int n_prefix_bytes = std::sprintf(bufs[b].data(), prefix_fmt,
+                                                    t_alt_model.props.chrom.data(),
+                                                    t_alt_model.props.position,
+                                                    t_alt_model.props.strand,
+                                                    t_alt_model.props.context.data());
+            // clang-format on
+            if (n_prefix_bytes < 0)
+              return n_prefix_bytes;
+
+            auto cursor = bufs[b].data() + n_prefix_bytes;
+
+            const int n_pval_bytes = [&] {
+              if (status == row_status::ok)
+                return std::sprintf(cursor, "%.6g", p_val);
+              if (!more_na_info || status == row_status::na)
+                return std::sprintf(cursor, "NA");
+              if (status == row_status::na_extreme_cnt)
+                return std::sprintf(cursor, "NA_EXTREME_CNT");
+              // if (status == row_status::na_low_cov)
+              return std::sprintf(cursor, "NA_LOW_COV");
+            }();
+
+            if (n_pval_bytes < 0)
+              return n_pval_bytes;
+
+            cursor += n_pval_bytes;
+
+            // clang-format off
+          const int n_suffix_bytes = std::sprintf(cursor, suffix_fmt,
+                                                  n_reads_factor,
+                                                  n_meth_factor,
+                                                  n_reads_others,
+                                                  n_meth_others);
+            // clang-format on
+            if (n_suffix_bytes < 0)
+              return n_suffix_bytes;
+
+            return n_prefix_bytes + n_pval_bytes + n_suffix_bytes;
+          }();
+        }
+      });
+    }
+
+    for (auto &thread : threads)
+      thread.join();
+
+    for (auto i = 0u; i < n_lines; ++i) {
+      if (n_bytes[i] < 0)
+        throw std::runtime_error("failed to write to output buffer");
+      out.write(bufs[i].data(), n_bytes[i]);
+    }
+
+    if (show_progress)
+      progress(table_file);
+
+    if (n_lines < n_threads)
+      break;
+  }
+}
 
 /***********************************************************************
  * Run beta-binoimial regression using the specified table with
@@ -277,171 +375,88 @@ verify_multiple_levels(const Regression &full_regression,
 int
 main_radmeth(int argc, char *argv[]) {
   try {
-    static const string description =
+    static const std::string description =
       "calculate differential methylation scores";
 
-    string outfile;
-    string test_factor_name;
-    bool VERBOSE = false;
-    bool more_na_info = false;
+    std::string outfile;
+    std::string test_factor;
+    bool verbose{false};
+    bool show_progress{false};
+    bool more_na_info{false};
+    std::uint32_t n_threads{1};
 
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(strip_path(argv[0]), description,
                            "<design-matrix> <data-matrix>");
     opt_parse.set_show_defaults();
-    opt_parse.add_opt("out", 'o', "output file (default: stdout)", false,
-                      outfile);
-    opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
+    opt_parse.add_opt("out", 'o', "output file", true, outfile);
+    opt_parse.add_opt("threads", 't', "number of threads to use", false,
+                      n_threads);
+    opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
+    opt_parse.add_opt("progress", '\0', "show progress", false, show_progress);
     opt_parse.add_opt(
       "na-info", 'n',
       "if a p-value is not calculated, print NAs in more detail: "
       "low count (NA_LOW_COV) extreme values (NA_EXTREME_CNT) or "
       "numerical errors in likelihood ratios (NA)",
       false, more_na_info);
-    opt_parse.add_opt("factor", 'f', "a factor to test", true,
-                      test_factor_name);
-
-    vector<string> leftover_args;
+    opt_parse.add_opt("factor", 'f', "a factor to test", true, test_factor);
+    opt_parse.add_opt("tolerance", '\0',
+                      "numerical tolerance to test convergence", false,
+                      Regression::tolerance);
+    opt_parse.add_opt("max-iter", '\0',
+                      "max iterations when estimating parameters", false,
+                      Regression::max_iter);
+
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << endl
-           << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.help_message() << std::endl
+                << opt_parse.about_message() << std::endl;
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << endl;
+      std::cerr << opt_parse.about_message() << std::endl;
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << endl;
+      std::cerr << opt_parse.option_missing_message() << std::endl;
       return EXIT_SUCCESS;
     }
     if (leftover_args.size() != 2) {
-      cerr << opt_parse.help_message() << endl;
+      std::cerr << opt_parse.help_message() << std::endl;
       return EXIT_SUCCESS;
     }
-    const string design_filename(leftover_args.front());
-    const string table_filename(leftover_args.back());
+    const std::string design_filename(leftover_args.front());
+    const std::string table_filename(leftover_args.back());
     /****************** END COMMAND LINE OPTIONS *****************/
 
-    if (VERBOSE) cerr << "design table filename: " << design_filename << endl;
-    std::ifstream design_file(design_filename);
-    if (!design_file)
-      throw runtime_error("could not open file: " + design_filename);
-
     // initialize full design matrix from file
-    Regression full_regression;
-    design_file >> full_regression.design;
-
-    if (VERBOSE) cerr << full_regression.design << endl;
+    Regression orig_alt_model;
+    read_design(verbose, design_filename, orig_alt_model.design);
 
     // Check that provided test factor name exists and find its index.
-    // Identify test factors with their indexes to simplify naming
-    auto test_fact_it =
-      find(begin(full_regression.design.factor_names),
-           end(full_regression.design.factor_names), test_factor_name);
-
-    if (test_fact_it == end(full_regression.design.factor_names))
-      throw runtime_error(test_factor_name +
-                          " factor not part of design specification");
-
-    const size_t test_factor =
-      distance(begin(full_regression.design.factor_names), test_fact_it);
+    const auto test_factor_idx =
+      get_test_factor_idx(orig_alt_model, test_factor);
 
     // verify that the design includes more than one level for the
     // test factor
-    if (!verify_multiple_levels(full_regression, test_factor)) {
-      const auto first_level = full_regression.design.matrix[0][test_factor];
-      throw runtime_error("test factor only one level: " +
-                          test_factor_name + ", " +
-                          std::to_string(first_level));
+    if (!has_two_values(orig_alt_model, test_factor_idx)) {
+      const auto first_level = orig_alt_model.design.matrix[0][test_factor_idx];
+      throw std::runtime_error("test factor only one level: " + test_factor +
+                               ", " + std::to_string(first_level));
     }
 
-    Regression null_regression;
-    null_regression.design = full_regression.design;
-    remove_factor(null_regression.design, test_factor);
-    // ADS: done setup for the model
-
-    // ADS: open the data table file
-    std::ifstream table_file(table_filename);
-    if (!table_file)
-      throw runtime_error("could not open file: " + table_filename);
-
-    // Make sure that the first line of the proportion table file contains
-    // names of the samples. Throw an exception if the names or their order
-    // in the proportion table does not match those in the full design matrix.
-    string sample_names_header;
-    getline(table_file, sample_names_header);
-
-    if (!consistent_sample_names(full_regression, sample_names_header))
-      throw runtime_error("header:\n" + sample_names_header + "\n" +
-                          "does not match factor names or their order in the\n"
-                          "design matrix. Check that the design matrix and\n"
-                          "the proportion table are correctly formatted.");
-
-    const size_t n_samples = full_regression.design.n_samples();
-
-    // ADS: now open the output file because each row is processed
-    // sequentially
-    std::ofstream of;
-    if (!outfile.empty()) of.open(outfile);
-    std::ostream out(outfile.empty() ? cout.rdbuf() : of.rdbuf());
-
-    // Performing the log-likelihood ratio test on proportions from each row
-    // of the proportion table.
-    while (table_file >> full_regression.props) {
-      if (full_regression.props.total.size() != n_samples)
-        throw runtime_error("found row with wrong number of columns");
-
-      size_t coverage_factor = 0, coverage_rest = 0, meth_factor = 0,
-             meth_rest = 0;
-
-      for (size_t s = 0; s < n_samples; ++s) {
-        if (full_regression.design.matrix[s][test_factor] != 0) {
-          coverage_factor += full_regression.props.total[s];
-          meth_factor += full_regression.props.meth[s];
-        }
-        else {
-          coverage_rest += full_regression.props.total[s];
-          meth_rest += full_regression.props.meth[s];
-        }
-      }
-
-      out << full_regression.props.chrom << "\t"
-          << full_regression.props.position << "\t"
-          << full_regression.props.strand << "\t"
-          << full_regression.props.context << "\t";
-
-      // Do not perform the test if there's no coverage in either all
-      // case or all control samples. Also do not test if the site is
-      // completely methylated or completely unmethylated across all
-      // samples.
-      if (has_low_coverage(full_regression, test_factor)) {
-        out << ((more_na_info) ? "NA_LOW_COV" : "NA");
-      }
-      else if (has_extreme_counts(full_regression)) {
-        out << ((more_na_info) ? "NA_EXTREME_CNT" : "NA");
-      }
-      else {
-        fit_regression_model(full_regression);
-        null_regression.props = full_regression.props;
-        fit_regression_model(null_regression);
-        const double p_value = loglikratio_test(null_regression.max_loglik,
-                                                full_regression.max_loglik);
-
-        // If error occured in fitting (p-val = nan or -nan).
-        if (p_value != p_value)
-          out << "NA";
-        else
-          out << p_value;
-      }
-      out << "\t" << coverage_factor << "\t" << meth_factor << "\t"
-          << coverage_rest << "\t" << meth_rest << endl;
-    }
+    Regression orig_null_model;
+    orig_null_model.design = orig_alt_model.design;
+    remove_factor(orig_null_model.design, test_factor_idx);
+
+    radmeth(show_progress, more_na_info, n_threads, table_filename, outfile,
+            orig_alt_model, orig_null_model, test_factor_idx);
   }
   catch (const std::exception &e) {
-    cerr << "ERROR: " << e.what() << endl;
-    exit(EXIT_FAILURE);
+    std::cerr << "ERROR: " << e.what() << std::endl;
+    return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
 }
diff --git a/src/radmeth/radmeth_model.cpp b/src/radmeth/radmeth_model.cpp
new file mode 100644
index 00000000..faa4a8bb
--- /dev/null
+++ b/src/radmeth/radmeth_model.cpp
@@ -0,0 +1,175 @@
+/* Copyright (C) 2025 Andrew D Smith
+ *
+ * Authors: Andrew D. Smith
+ *
+ * This program is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include "radmeth_model.hpp"
+
+#include <algorithm>
+#include <charconv>
+#include <cstdint>
+#include <istream>
+#include <ostream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+double Regression::tolerance = 1e-3;
+double Regression::stepsize = 0.001;
+std::uint32_t Regression::max_iter = 250;
+
+void
+SiteProportions::parse(const std::string &line) {
+  const auto first_ws = line.find_first_of(" \t");
+
+  // Parse the row name (must be like: "chr:position:strand:context")
+  bool failed = false;
+
+  const auto label_end = line.data() + first_ws;
+  auto field_s = line.data();
+  auto field_e = std::find(field_s + 1, label_end, ':');
+  if (field_e == label_end)
+    failed = true;
+
+  {
+    const std::uint32_t d = std::distance(field_s, field_e);
+    chrom = std::string{field_s, d};
+  }
+
+  field_s = field_e + 1;
+  field_e = std::find(field_s + 1, label_end, ':');
+  failed = failed || (field_e == label_end);
+
+  {
+    const auto [ptr, ec] = std::from_chars(field_s, field_e, position);
+    failed = failed || (ptr == field_s);
+  }
+
+  field_s = field_e + 1;
+  field_e = std::find(field_s + 1, label_end, ':');
+  failed = failed || (field_e != field_s + 1 || field_e == label_end);
+
+  strand = *field_s;
+  failed = failed || (strand != '-' && strand != '+');
+
+  field_s = field_e + 1;
+  field_e = std::find_if(field_s + 1, label_end,
+                         [](const auto x) { return x == ' ' || x == '\t'; });
+  failed = failed || (field_e != label_end);
+
+  {
+    const std::uint32_t d = std::distance(field_s, field_e);
+    context = std::string{field_s, d};
+  }
+
+  if (failed)
+    throw std::runtime_error("failed to parse label from:\n" + line);
+
+  // Parse the counts of total reads and methylated reads
+  const auto is_sep = [](const auto x) { return std::isspace(x); };
+  const auto not_sep = [](const auto x) { return std::isdigit(x); };
+
+  const auto line_end = line.data() + std::size(line);
+  mcounts mc1{};
+  mc.clear();
+  while (field_e != line_end) {
+    // get the total count
+    field_s = std::find_if(field_e + 1, line_end, not_sep);
+    field_e = std::find_if(field_s + 1, line_end, is_sep);
+    {
+      const auto [ptr, ec] = std::from_chars(field_s, field_e, mc1.n_reads);
+      failed = failed || (ptr != field_e);
+    }
+
+    field_s = std::find_if(field_e + 1, line_end, not_sep);
+    field_e = std::find_if(field_s + 1, line_end, is_sep);
+    {
+      const auto [ptr, ec] = std::from_chars(field_s, field_e, mc1.n_meth);
+      failed = failed || (ptr != field_e);
+    }
+
+    mc.push_back(mc1);
+  }
+
+  if (failed)
+    throw std::runtime_error("failed to parse counts from:\n" + line);
+}
+
+template <typename T>
+static void
+transpose(const std::vector<std::vector<T>> &mat,
+          std::vector<std::vector<T>> &tmat) {
+  const auto n_row = std::size(mat);
+  const auto n_col = std::size(mat.front());
+  tmat.resize(n_col, std::vector<T>(n_row, 0.0));
+  for (auto row_idx = 0u; row_idx < n_row; ++row_idx)
+    for (auto col_idx = 0u; col_idx < n_col; ++col_idx)
+      tmat[col_idx][row_idx] = mat[row_idx][col_idx];
+}
+
+std::istream &
+operator>>(std::istream &is, Design &design) {
+  std::string header_encoding;
+  std::getline(is, header_encoding);
+
+  std::istringstream header_is(header_encoding);
+  std::string header_name;
+  while (header_is >> header_name)
+    design.factor_names.push_back(header_name);
+
+  std::string row;
+  while (std::getline(is, row)) {
+    if (row.empty())
+      continue;
+
+    std::istringstream row_is(row);
+    std::string token;
+    row_is >> token;
+    design.sample_names.push_back(token);
+
+    std::vector<std::uint8_t> matrix_row;
+    while (row_is >> token) {
+      if (std::size(token) != 1 || (token != "0" && token != "1"))
+        throw std::runtime_error("Must use binary factor levels:\n" + row);
+      matrix_row.push_back(token == "1");
+    }
+
+    if (std::size(matrix_row) != design.n_factors())
+      throw std::runtime_error(
+        "each row must have as many columns as factors:\n" + row);
+
+    design.matrix.push_back(matrix_row);
+  }
+
+  transpose(design.matrix, design.tmatrix);
+
+  return is;
+}
+
+std::ostream &
+operator<<(std::ostream &out, const Design &design) {
+  for (std::size_t factor = 0; factor < design.factor_names.size(); ++factor) {
+    if (factor != 0)
+      out << '\t';
+    out << design.factor_names[factor];
+  }
+  out << '\n';
+
+  for (std::size_t i = 0; i < design.n_samples(); ++i) {
+    out << design.sample_names[i];
+    for (std::size_t j = 0; j < design.n_factors(); ++j)
+      out << "\t" << design.matrix[i][j];
+    out << "\n";
+  }
+  return out;
+}
diff --git a/src/radmeth/radmeth_model.hpp b/src/radmeth/radmeth_model.hpp
index 0402a269..d7a01d63 100644
--- a/src/radmeth/radmeth_model.hpp
+++ b/src/radmeth/radmeth_model.hpp
@@ -19,32 +19,78 @@
 #ifndef RADMETH_MODEL_HPP
 #define RADMETH_MODEL_HPP
 
+#include <cstdint>
+#include <istream>
+#include <ostream>
 #include <string>
 #include <vector>
 
 struct Design {
   std::vector<std::string> factor_names;
   std::vector<std::string> sample_names;
-  std::vector<std::vector<double> > matrix;
-  size_t n_factors() const {return factor_names.size();}
-  size_t n_samples() const {return sample_names.size();}
+  std::vector<std::vector<std::uint8_t>> matrix;
+  std::vector<std::vector<std::uint8_t>> tmatrix;
+  std::size_t
+  n_factors() const {
+    return factor_names.size();
+  }
+  std::size_t
+  n_samples() const {
+    return sample_names.size();
+  }
 };
 
+std::istream &
+operator>>(std::istream &is, Design &design);
+
+std::ostream &
+operator<<(std::ostream &os, const Design &design);
+
+struct mcounts {
+  std::uint32_t n_reads{};
+  std::uint32_t n_meth{};
+};
+
+[[nodiscard]] inline std::istream &
+operator>>(std::istream &in, mcounts &rm) {
+  return in >> rm.n_reads >> rm.n_meth;
+}
+
 struct SiteProportions {
   std::string chrom;
-  size_t position;
-  std::string strand;
+  std::size_t position{};
+  char strand{};
   std::string context;
-  std::vector<size_t> total;
-  std::vector<size_t> meth;
+  std::vector<mcounts> mc;
+
+  void
+  parse(const std::string &line);
 };
 
 struct Regression {
+  static double tolerance;        // 1e-4;
+  static double stepsize;         // 0.001;
+  static std::uint32_t max_iter;  // 700;
+
   Design design;
   SiteProportions props;
-  double max_loglik;
-  size_t n_factors() const {return design.n_factors();}
-  size_t n_samples() const {return design.n_samples();}
+  double max_loglik{};
+  std::vector<double> p_v;  // scratch space
+
+  [[nodiscard]] std::size_t
+  n_factors() const {
+    return design.n_factors();
+  }
+
+  [[nodiscard]] std::size_t
+  props_size() const {
+    return std::size(props.mc);
+  }
+
+  [[nodiscard]] std::size_t
+  n_samples() const {
+    return design.n_samples();
+  }
 };
 
 #endif
diff --git a/src/radmeth/radmeth_optimize.cpp b/src/radmeth/radmeth_optimize.cpp
index b1942328..8affe87e 100644
--- a/src/radmeth/radmeth_optimize.cpp
+++ b/src/radmeth/radmeth_optimize.cpp
@@ -16,184 +16,221 @@
  * General Public License for more details.
  */
 
+#include "radmeth_optimize.hpp"
+
 #include "radmeth_model.hpp"
 
 #include <gsl/gsl_multimin.h>
 #include <gsl/gsl_vector.h>
 
+#include <iomanip>
+#include <iostream>
+#include <numeric>
 #include <stdexcept>
 #include <vector>
 
-using std::vector;
-using std::runtime_error;
-
-static double
-pi(Regression *reg, size_t sample, const gsl_vector *params) {
+template <typename T>
+[[nodiscard]] static double
+pi(const std::vector<T> &v, const gsl_vector *params) {
   // ADS: this function doesn't have a very helpful name
-  double dot_prod = 0;
-  for (size_t fact = 0; fact < reg->design.n_factors(); ++fact)
-    dot_prod += reg->design.matrix[sample][fact]*gsl_vector_get(params, fact);
-
-  double p = exp(dot_prod)/(1 + exp(dot_prod));
-
-  return p;
+  const auto a = v.data();
+  const double dot = std::inner_product(a, a + std::size(v), params->data, 0.0);
+  // const double p = std::exp(dot) / (1.0 + std::exp(dot));
+  return 1.0 / (1.0 / std::exp(dot) + 1.0);
 }
 
-
-static double
+[[nodiscard]] static double
 neg_loglik(const gsl_vector *params, void *object) {
   Regression *reg = (Regression *)(object);
-  const size_t n_params = reg->design.factor_names.size() + 1;
-
-  double log_lik = 0;
 
   // ADS: the dispersion parameter phi is the last element of
   // parameter vector
-  const double disp_param = gsl_vector_get(params, n_params - 1);
-  const double phi = exp(disp_param)/(1.0 + exp(disp_param));
+  const double disp_param = gsl_vector_get(params, reg->design.n_factors());
+  // const double phi = std::exp(disp_param) / (1.0 + std::exp(disp_param));
+  const double phi = 1.0 / (1.0 / std::exp(disp_param) + 1.0);
+  const double one_minus_phi = 1.0 - phi;
 
-  for(size_t s = 0; s < reg->design.n_samples(); ++s) {
-    const double n_s = reg->props.total[s];
-    const double y_s = reg->props.meth[s];
-    const double p_s = pi(reg, s, params);
+  const auto &mc = reg->props.mc;
+  const auto &mat = reg->design.matrix;
 
-    for (int k = 0; k < y_s; ++k)
-      log_lik += log((1 - phi)*p_s + phi*k);
-
-    for (int k = 0; k < n_s - y_s; ++k)
-      log_lik += log((1 - phi)*(1 - p_s) + phi*k);
-
-    for (int k = 0; k < n_s; ++k)
-      log_lik -= log(1.0 + phi*(k - 1));
+  double log_lik = 0;
+  const std::size_t n_samples = reg->design.n_samples();
+  for (std::size_t col_idx = 0; col_idx < n_samples; ++col_idx) {
+    const auto n = mc[col_idx].n_reads;
+    const auto y = mc[col_idx].n_meth;
+    const double p = pi(mat[col_idx], params);
+    const double one_minus_p = 1.0 - p;
+
+    const double term1 = one_minus_phi * p;
+    for (auto k = 0u; k < y; ++k)
+      log_lik += std::log(term1 + phi * k);
+
+    const double term2 = one_minus_phi * one_minus_p;
+    for (auto k = 0u; k < n - y; ++k)
+      log_lik += std::log(term2 + phi * k);
+
+    for (auto k = 0u; k < n; ++k)
+      // log_lik -= std::log(1.0 + phi * (k - 1.0));
+      log_lik -= std::log1p(phi * (k - 1.0));
   }
-
   return -log_lik;
 }
 
-
 static void
-neg_gradient(const gsl_vector *params, void *object,
-             gsl_vector *output) {
-
+neg_gradient(const gsl_vector *params, void *object, gsl_vector *output) {
   Regression *reg = (Regression *)(object);
-  const size_t n_params = reg->design.n_factors() + 1;
+  const std::size_t n_samples = reg->design.n_samples();
+  const std::size_t n_factors = reg->design.n_factors();
 
-  const double disp_param = gsl_vector_get(params, n_params - 1);
+  /// ADS: using scratch space held in reg instead of allocating here
+  // std::vector<double> p_v(n_samples, 0.0);
+  auto &p_v = reg->p_v;
+  for (auto col_idx = 0u; col_idx < n_samples; ++col_idx)
+    p_v[col_idx] = pi(reg->design.matrix[col_idx], params);
 
-  const double phi = exp(disp_param)/(1 + exp(disp_param));
+  const double disp_param = gsl_vector_get(params, n_factors);
 
-  for(size_t f = 0; f < n_params; ++f) {
+  const auto &tmat = reg->design.tmatrix;
+  // const double phi = std::exp(disp_param) / (1.0 + std::exp(disp_param));
+  const double phi = 1.0 / (1.0 / std::exp(disp_param) + 1.0);
+  const double one_minus_phi = 1.0 - phi;
 
-    double deriv = 0;
+  const auto &mc = reg->props.mc;
 
-    for(size_t s = 0; s < reg->design.n_samples(); ++s) {
-      int n_s = reg->props.total[s];
-      int y_s = reg->props.meth[s];
-      double p_s = pi(reg, s, params);
+  for (std::size_t factor_idx = 0; factor_idx < n_factors; ++factor_idx) {
+    double deriv = 0;
+    const auto &vec = tmat[factor_idx];
+    for (std::size_t col_idx = 0; col_idx < n_samples; ++col_idx) {
+      const auto n = mc[col_idx].n_reads;
+      const auto y = mc[col_idx].n_meth;
+      const double p = p_v[col_idx];
+      const double one_minus_p = 1.0 - p;
+
+      const double denom_term1 = one_minus_phi * p;
+      const double factor = denom_term1 * one_minus_p * vec[col_idx];
+      if (factor == 0)
+        continue;
 
       double term = 0;
 
-      //a parameter linked to p
-      if(f < reg->design.factor_names.size()) {
-        double factor = (1 - phi)*p_s*(1 - p_s)*reg->design.matrix[s][f];
-        if (factor == 0) continue;
-
-        for(int k = 0; k < y_s; ++k)
-          term += 1/((1 - phi)*p_s + phi*k);
-
-        for(int k = 0; k < n_s - y_s; ++k)
-          term -= 1/((1 - phi)*(1 - p_s) + phi*k);
+      const auto accum_term = [&](auto k, const auto lim,
+                                  const double denom_base) {
+        for (; k < lim; ++k)
+          term += 1.0 / (denom_base + phi * k);
+      };
 
-        deriv += term*factor;
-      } else { // the parameter linked to phi
-        for(int k = 0; k < y_s; ++k)
-          term += (k - p_s)/((1 - phi)*p_s + phi*k);
+      const auto accum_subtract_term = [&](auto k, const auto lim,
+                                           const double denom_base) {
+        for (; k < lim; ++k)
+          term -= 1.0 / (denom_base + phi * k);
+      };
 
-        for(int k = 0; k < n_s - y_s; ++k)
-          term += (k - (1 - p_s))/((1 - phi)*(1 - p_s) + phi*k);
+      accum_term(0u, y, denom_term1);
+      accum_subtract_term(0u, n - y, one_minus_phi * one_minus_p);
 
-        for(int k = 0; k < n_s; ++k) {
-          term -= (k - 1)/(1 + phi*(k - 1));
-        }
-
-        deriv += term * phi * (1 - phi);
-      }
+      deriv += term * factor;
     }
-
-    gsl_vector_set(output, f, deriv);
+    gsl_vector_set(output, factor_idx, deriv);
   }
 
+  double deriv = 0;
+  for (std::size_t col_idx = 0; col_idx < n_samples; ++col_idx) {
+    const auto n = mc[col_idx].n_reads;
+    const auto y = mc[col_idx].n_meth;
+    const double p = p_v[col_idx];
+    const double one_minus_p = 1.0 - p;
+
+    double term = 0;
+    const auto accum_term = [&](auto k, const auto lim, const double num_shift,
+                                const double denom_base) {
+      for (; k < lim; ++k)
+        term += (k - num_shift) / (denom_base + phi * k);
+    };
+    const auto accum_subtract_term = [&](auto k, const auto lim) {
+      for (; k < lim; ++k)
+        term -= (k - 1.0) / (1.0 + phi * (k - 1.0));
+    };
+
+    accum_term(0u, y, p, one_minus_phi * p);
+    accum_term(0u, n - y, one_minus_p, one_minus_phi * one_minus_p);
+    accum_subtract_term(0u, n);
+
+    deriv += term * phi * one_minus_phi;
+  }
+  gsl_vector_set(output, n_factors, deriv);
   gsl_vector_scale(output, -1.0);
 }
 
 static void
-neg_loglik_and_grad(const gsl_vector *params,
-                    void *object,
-                    double *loglik_val,
+neg_loglik_and_grad(const gsl_vector *params, void *object, double *loglik_val,
                     gsl_vector *d_loglik_val) {
-
   *loglik_val = neg_loglik(params, object);
   neg_gradient(params, object, d_loglik_val);
 }
 
-
 bool
-fit_regression_model(Regression &r, vector<double> &initial_params) {
+fit_regression_model(Regression &r, std::vector<double> &params_init) {
+  const auto stepsize = Regression::stepsize;
+  const auto max_iter = Regression::max_iter;
 
   // one more than the number of factors
-  const size_t n_params = r.n_factors() + 1;
-  if (initial_params.empty()) {
-    initial_params.resize(n_params, 0.0);
-    initial_params.back() = -2.5;
+  const std::size_t n_params = r.n_factors() + 1;
+  if (params_init.empty()) {
+    params_init.resize(n_params, 0.0);
+    params_init.back() = -2.5;
   }
 
-  if (initial_params.size() != n_params)
-    throw runtime_error("Wrong number of initial parameters.");
+  r.p_v.resize(r.n_samples());
 
-  int status = 0;
+  const double tolerance =
+    std::sqrt(n_params) * r.n_samples() * Regression::tolerance;
 
-  size_t iter = 0;
+  if (params_init.size() != n_params)
+    throw std::runtime_error("Wrong number of initial parameters.");
 
-  {
-    gsl_multimin_function_fdf loglik_bundle;
-    loglik_bundle.f = &neg_loglik;
-    loglik_bundle.df = &neg_gradient;
-    loglik_bundle.fdf = &neg_loglik_and_grad;
-    loglik_bundle.n = n_params;
-    loglik_bundle.params = (void *)&r;
+  // clang-format off
+  gsl_multimin_function_fdf loglik_bundle = {
+    &neg_loglik,              // objective function
+    &neg_gradient,            // gradient
+    &neg_loglik_and_grad,     // combined obj and grad
+    n_params,                 // number of model params
+    static_cast<void *>(&r)   // parameters for objective and gradient functions
+  };
+  // clang-format on
 
-    gsl_vector *params = gsl_vector_alloc(n_params);
+  // Alternatives:
+  // - gsl_multimin_fdfminimizer_conjugate_pr
+  // - gsl_multimin_fdfminimizer_conjugate_fr
+  // - gsl_multimin_fdfminimizer_vector_bfgs2
+  // - gsl_multimin_fdfminimizer_steepest_descent
+  const gsl_multimin_fdfminimizer_type *minimizer =
+    gsl_multimin_fdfminimizer_conjugate_fr;
 
-    for (size_t param = 0; param < initial_params.size(); ++param)
-      gsl_vector_set(params, param, initial_params[param]);
+  gsl_multimin_fdfminimizer *s =
+    gsl_multimin_fdfminimizer_alloc(minimizer, n_params);
 
-    //can also try gsl_multimin_fdfminimizer_conjugate_pr;
-    const gsl_multimin_fdfminimizer_type *T = gsl_multimin_fdfminimizer_conjugate_fr;
+  gsl_vector *params = gsl_vector_alloc(n_params);
+  for (std::size_t i = 0; i < n_params; ++i)
+    gsl_vector_set(params, i, params_init[i]);
 
-    gsl_multimin_fdfminimizer *s = gsl_multimin_fdfminimizer_alloc (T, n_params);
+  gsl_multimin_fdfminimizer_set(s, &loglik_bundle, params, stepsize, tolerance);
 
-    // ADS: 0.001 is the step size and 1e-4 is the tolerance
-    // get the minimizer
-    gsl_multimin_fdfminimizer_set(s, &loglik_bundle, params, 0.001, 1e-4);
+  int status = 0;
+  std::size_t iter = 0;
 
-    do {
-      iter++;
-      // do one iteration and get the status
-      status = gsl_multimin_fdfminimizer_iterate(s);
-      if (status)
-        break;
-      // check the status based on gradient
-      status = gsl_multimin_test_gradient (s->gradient, 1e-4);
-    }
-    while (status == GSL_CONTINUE && iter < 700);
-    // It it reasonable to reduce the number of iterations to 500?
-    // ADS: 700 vs. 500? what's the difference?
+  do {
+    status = gsl_multimin_fdfminimizer_iterate(s);  // one iter and get status
+    if (status)
+      break;
+    // check status from gradient
+    status = gsl_multimin_test_gradient(s->gradient, tolerance);
+  } while (status == GSL_CONTINUE && ++iter < max_iter);
+  // ADS: What is a reasonable number of iterations?
 
-    r.max_loglik = (-1)*neg_loglik(s->x, &r);
+  r.max_loglik = -1.0 * neg_loglik(s->x, &r);
 
-    gsl_multimin_fdfminimizer_free(s);
-    gsl_vector_free(params);
-  }
+  gsl_multimin_fdfminimizer_free(s);
+  gsl_vector_free(params);
 
   return status == GSL_SUCCESS;
 }
diff --git a/src/radmeth/radmeth_optimize.hpp b/src/radmeth/radmeth_optimize.hpp
index 2bf115de..16f4a061 100644
--- a/src/radmeth/radmeth_optimize.hpp
+++ b/src/radmeth/radmeth_optimize.hpp
@@ -19,10 +19,11 @@
 #ifndef RADMETH_OPTIMIZE_HPP
 #define RADMETH_OPTIMIZE_HPP
 
+#include <vector>
+
 struct Regression;
 
 bool
 fit_regression_model(Regression &r, std::vector<double> &initial_params);
 
-
 #endif
diff --git a/src/utils/selectsites.cpp b/src/utils/selectsites.cpp
index 43b288a7..b0389ac4 100644
--- a/src/utils/selectsites.cpp
+++ b/src/utils/selectsites.cpp
@@ -294,6 +294,7 @@ main_selectsites(int argc, char *argv[]) {
     bool VERBOSE = false;
     bool keep_file_on_disk = false;
     bool compress_output = false;
+    bool allow_extra_fields = false;
 
     std::string outfile("-");
     std::string summary_file;
@@ -315,6 +316,8 @@ main_selectsites(int argc, char *argv[]) {
     opt_parse.add_opt("summary", 'S', "write summary to this file", false,
                       summary_file);
     opt_parse.add_opt("zip", 'z', "output gzip format", false, compress_output);
+    opt_parse.add_opt("relaxed", '\0', "input has extra fields", false,
+                      allow_extra_fields);
     opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
     opt_parse.set_show_defaults();
     std::vector<std::string> leftover_args;
@@ -340,6 +343,8 @@ main_selectsites(int argc, char *argv[]) {
     const std::string sites_file = leftover_args.back();
     /****************** END COMMAND LINE OPTIONS *****************/
 
+    MSite::no_extra_fields = (allow_extra_fields == false);
+
     selectsites_summary summary;
     summary.command_line = get_command_line(argc, argv);