aboutsummaryrefslogtreecommitdiff
path: root/usr.bin/ministat
diff options
context:
space:
mode:
authorColin Percival <cperciva@FreeBSD.org>2016-11-05 06:33:39 +0000
committerColin Percival <cperciva@FreeBSD.org>2016-11-05 06:33:39 +0000
commita304ad90e9ae16b7b4e135e84fc347d8bdd844af (patch)
treeecbd0e75459d9d58df2f03595ddbf5f4cc912d79 /usr.bin/ministat
parentb780b03cbefa2bcbc9a39005e8544aeb304554a1 (diff)
downloadsrc-a304ad90e9ae16b7b4e135e84fc347d8bdd844af.tar.gz
src-a304ad90e9ae16b7b4e135e84fc347d8bdd844af.zip
Reduce the bogosity of ministat's % difference calculations.
The previous calculation used an approximation which was only valid in cases where the means being compared were similar; this resulted in very odd claims being made, e.g. that 0 +/- 0 is a difference of -100% +/- 1% from 100 +/- 1. The new calculation scales sample standard deviations by the means, and yields approximately correct percentage difference bounds providing that the reference population is bounded away from zero. (In the case where the values being compared are not sufficiently bounded away from zero, the distribution of ratios becomes much harder to calculate, and is not likely to be useful anyway.) Note that when ministat is used for its intended purpose of determining whether two samples are statistically different, this change is unlikely to have any noticeable effect; in such cases the means will be similar enough that the correction applied here will be minimal.
Notes
Notes: svn path=/head/; revision=308329
Diffstat (limited to 'usr.bin/ministat')
-rw-r--r--usr.bin/ministat/ministat.c8
1 files changed, 7 insertions, 1 deletions
diff --git a/usr.bin/ministat/ministat.c b/usr.bin/ministat/ministat.c
index a7f7182ce420..02b5025a436c 100644
--- a/usr.bin/ministat/ministat.c
+++ b/usr.bin/ministat/ministat.c
@@ -232,6 +232,7 @@ static void
Relative(struct dataset *ds, struct dataset *rs, int confidx)
{
double spool, s, d, e, t;
+ double re;
int i;
i = ds->n + rs->n - 2;
@@ -246,11 +247,16 @@ Relative(struct dataset *ds, struct dataset *rs, int confidx)
d = Avg(ds) - Avg(rs);
e = t * s;
+ re = (ds->n - 1) * Var(ds) + (rs->n - 1) * Var(rs) *
+ (Avg(ds) * Avg(ds)) / (Avg(rs) * Avg(rs));
+ re *= (ds->n + rs->n) / (ds->n * rs->n * (ds->n + rs->n - 2.0));
+ re = t * sqrt(re);
+
if (fabs(d) > e) {
printf("Difference at %.1f%% confidence\n", studentpct[confidx]);
printf(" %g +/- %g\n", d, e);
- printf(" %g%% +/- %g%%\n", d * 100 / Avg(rs), e * 100 / Avg(rs));
+ printf(" %g%% +/- %g%%\n", d * 100 / Avg(rs), re * 100 / Avg(rs));
printf(" (Student's t, pooled s = %g)\n", spool);
} else {
printf("No difference proven at %.1f%% confidence\n",