Next: , Previous: , Up: Running Statistics   [Index]


22.5 Examples

Here is a basic example of how to use the statistical functions:

#include <stdio.h>
#include <gsl/gsl_rstat.h>

int
main(void)
{
  double data[5] = {17.2, 18.1, 16.5, 18.3, 12.6};
  double mean, variance, largest, smallest;
  gsl_rstat_workspace *rstat_p = gsl_rstat_alloc();
  size_t i;

  /* add data to rstat accumulator */
  for (i = 0; i < 5; ++i)
    gsl_rstat_add(data[i], rstat_p);

  mean     = gsl_rstat_mean(rstat_p);
  variance = gsl_rstat_variance(rstat_p);
  largest  = gsl_rstat_max(rstat_p);
  smallest = gsl_rstat_min(rstat_p);

  printf ("The dataset is %g, %g, %g, %g, %g\n",
         data[0], data[1], data[2], data[3], data[4]);

  printf ("The sample mean is %g\n", mean);
  printf ("The estimated variance is %g\n", variance);
  printf ("The largest value is %g\n", largest);
  printf ("The smallest value is %g\n", smallest);

  gsl_rstat_free(rstat_p);

  return 0;
}

The program should produce the following output,

The dataset is 17.2, 18.1, 16.5, 18.3, 12.6
The sample mean is 16.54
The estimated variance is 5.373
The largest value is 18.3
The smallest value is 12.6

This next program estimates the lower quartile, median and upper quartile from 10,000 samples of a random Rayleigh distribution, using the P^2 algorithm of Jain and Chlamtec. For comparison, the exact values are also computed from the sorted dataset.

#include <stdio.h>
#include <stdlib.h>
#include <gsl/gsl_rstat.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_sort.h>

int
main(void)
{
  const size_t N = 10000;
  double *data = malloc(N * sizeof(double));
  gsl_rstat_quantile_workspace *work_25 = gsl_rstat_quantile_alloc(0.25);
  gsl_rstat_quantile_workspace *work_50 = gsl_rstat_quantile_alloc(0.5);
  gsl_rstat_quantile_workspace *work_75 = gsl_rstat_quantile_alloc(0.75);
  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
  double exact_p25, exact_p50, exact_p75;
  double val_p25, val_p50, val_p75;
  size_t i;

  /* add data to quantile accumulators; also store data for exact
   * comparisons */
  for (i = 0; i < N; ++i)
    {
      data[i] = gsl_ran_rayleigh(r, 1.0);
      gsl_rstat_quantile_add(data[i], work_25);
      gsl_rstat_quantile_add(data[i], work_50);
      gsl_rstat_quantile_add(data[i], work_75);
    }

  /* exact values */
  gsl_sort(data, 1, N);
  exact_p25 = gsl_stats_quantile_from_sorted_data(data, 1, N, 0.25);
  exact_p50 = gsl_stats_quantile_from_sorted_data(data, 1, N, 0.5);
  exact_p75 = gsl_stats_quantile_from_sorted_data(data, 1, N, 0.75);

  /* estimated values */
  val_p25 = gsl_rstat_quantile_get(work_25);
  val_p50 = gsl_rstat_quantile_get(work_50);
  val_p75 = gsl_rstat_quantile_get(work_75);

  printf ("The dataset is %g, %g, %g, %g, %g, ...\n",
         data[0], data[1], data[2], data[3], data[4]);

  printf ("0.25 quartile: exact = %.5f, estimated = %.5f, error = %.6e\n",
          exact_p25, val_p25, (val_p25 - exact_p25) / exact_p25);
  printf ("0.50 quartile: exact = %.5f, estimated = %.5f, error = %.6e\n",
          exact_p50, val_p50, (val_p50 - exact_p50) / exact_p50);
  printf ("0.75 quartile: exact = %.5f, estimated = %.5f, error = %.6e\n",
          exact_p75, val_p75, (val_p75 - exact_p75) / exact_p75);

  gsl_rstat_quantile_free(work_25);
  gsl_rstat_quantile_free(work_50);
  gsl_rstat_quantile_free(work_75);
  gsl_rng_free(r);
  free(data);

  return 0;
}

The program should produce the following output,

The dataset is 0.00645272, 0.0074002, 0.0120706, 0.0207256, 0.0227282, ...
0.25 quartile: exact = 0.75766, estimated = 0.75580, error = -2.450209e-03
0.50 quartile: exact = 1.17508, estimated = 1.17438, error = -5.995912e-04
0.75 quartile: exact = 1.65347, estimated = 1.65696, error = 2.110571e-03

Next: , Previous: , Up: Running Statistics   [Index]