use std::ptr::Unique;
use std::{cmp, mem};
use cast;
use floaty::Floaty;
use num_cpus;
use thread_scoped as thread;
use tuple::{Tuple, TupledDistributions};
use univariate::Percentiles;
use univariate::resamples::Resamples;
pub struct Sample<A>([A]);
impl<A> Sample<A> {
pub fn as_slice(&self) -> &[A] {
unsafe {
mem::transmute(self)
}
}
}
impl<A> Sample<A> where A: Floaty {
pub fn new(slice: &[A]) -> &Sample<A> {
assert!(
slice.len() > 1 &&
slice.iter().all(|x| !x.is_nan())
);
unsafe {
mem::transmute(slice)
}
}
pub fn max(&self) -> A {
let mut elems = self.as_slice().iter();
match elems.next() {
Some(&head) => elems.fold(head, |a, &b| a.max(b)),
None => unreachable!(),
}
}
pub fn mean(&self) -> A {
let n = self.as_slice().len();
self.sum() / A::cast(n)
}
pub fn median_abs_dev(&self, median: Option<A>) -> A where
usize: cast::From<A, Output=Result<usize, cast::Error>>,
{
let median = median.unwrap_or_else(|| self.percentiles().median());
let abs_devs = self.as_slice().iter().map(|&x| (x - median).abs()).collect::<Vec<_>>();
let abs_devs: &Sample<A> = unsafe {
mem::transmute(&*abs_devs)
};
abs_devs.percentiles().median() * A::cast(1.4826)
}
pub fn median_abs_dev_pct(&self) -> A where
usize: cast::From<A, Output=Result<usize, cast::Error>>,
{
let _100 = A::cast(100);
let median = self.percentiles().median();
let mad = self.median_abs_dev(Some(median));
(mad / median) * _100
}
pub fn min(&self) -> A {
let mut elems = self.as_slice().iter();
match elems.next() {
Some(&elem) => elems.fold(elem, |a, &b| a.min(b)),
None => unreachable!(),
}
}
pub fn percentiles(&self) -> Percentiles<A> where
usize: cast::From<A, Output=Result<usize, cast::Error>>,
{
use std::cmp::Ordering;
fn cmp<T>(a: &T, b: &T) -> Ordering where T: PartialOrd {
if a < b {
Ordering::Less
} else if a == b {
Ordering::Equal
} else {
Ordering::Greater
}
}
let mut v = self.as_slice().to_vec().into_boxed_slice();
v.sort_by(cmp);
unsafe {
mem::transmute(v)
}
}
pub fn std_dev(&self, mean: Option<A>) -> A {
self.var(mean).sqrt()
}
pub fn std_dev_pct(&self) -> A {
let _100 = A::cast(100);
let mean = self.mean();
let std_dev = self.std_dev(Some(mean));
(std_dev / mean) * _100
}
pub fn sum(&self) -> A {
::sum(self.as_slice())
}
pub fn t(&self, other: &Sample<A>) -> A {
let (x_bar, y_bar) = (self.mean(), other.mean());
let (s2_x, s2_y) = (self.var(Some(x_bar)), other.var(Some(y_bar)));
let n_x = A::cast(self.as_slice().len());
let n_y = A::cast(other.as_slice().len());
let num = x_bar - y_bar;
let den = (s2_x / n_x + s2_y / n_y).sqrt();
num / den
}
pub fn var(&self, mean: Option<A>) -> A {
use std::ops::Add;
let mean = mean.unwrap_or_else(|| self.mean());
let slice = self.as_slice();
let sum = slice.iter().map(|&x| (x - mean).powi(2)).fold(A::cast(0), Add::add);
sum / A::cast(slice.len() - 1)
}
pub fn bootstrap<T, S>(
&self,
nresamples: usize,
statistic: S,
) -> T::Distributions where
S: Fn(&Sample<A>) -> T,
S: Sync,
T: Tuple,
T::Distributions: Send,
{
let ncpus = num_cpus::get();
unsafe {
if ncpus > 1 && nresamples > self.as_slice().len() {
let granularity = nresamples / ncpus + 1;
let statistic = &statistic;
let mut distributions: T::Distributions =
TupledDistributions::uninitialized(nresamples);
let _ = (0..ncpus).map(|i| {
let mut ptr = Unique::new_unchecked(&mut distributions);
let mut resamples = Resamples::new(self);
let offset = i * granularity;
thread::scoped(move || {
let distributions: &mut T::Distributions = ptr.as_mut();
for i in offset..cmp::min(offset + granularity, nresamples) {
distributions.set_unchecked(i, statistic(resamples.next()))
}
})
}).collect::<Vec<_>>();
distributions
} else {
let mut distributions: T::Distributions =
TupledDistributions::uninitialized(nresamples);
let mut resamples = Resamples::new(self);
for i in 0..nresamples {
distributions.set_unchecked(i, statistic(resamples.next()));
}
distributions
}
}
}
#[cfg(test)]
pub fn iqr(&self) -> A where
usize: cast::From<A, Output=Result<usize, cast::Error>>,
{
self.percentiles().iqr()
}
#[cfg(test)]
pub fn median(&self) -> A where
usize: cast::From<A, Output=Result<usize, cast::Error>>,
{
self.percentiles().median()
}
}
#[cfg(test)]
mod test {
macro_rules! stat {
($ty:ident <- $($stat:ident),+) => {
$(
#[quickcheck]
fn $stat(size: usize, start: usize) -> TestResult {
if let Some(v) = ::test::vec::<$ty>(size, start) {
let slice = &v[start..];
let lhs = ::univariate::Sample::new(slice).$stat();
let rhs = slice.$stat();
TestResult::from_bool(relative_eq!(lhs, rhs, max_relative = 1e-5))
} else {
TestResult::discard()
}
}
)+
}
}
macro_rules! stat_none {
($ty:ident <- $($stat:ident),+) => {
$(
#[quickcheck]
fn $stat(size: usize, start: usize) -> TestResult {
if let Some(v) = ::test::vec::<$ty>(size, start) {
let slice = &v[start..];
let lhs = ::univariate::Sample::new(slice).$stat(None);
let rhs = slice.$stat();
TestResult::from_bool(relative_eq!(lhs, rhs, max_relative = 1e-5))
} else {
TestResult::discard()
}
}
)+
}
}
macro_rules! fast_stat {
($ty:ident <- $(($stat:ident, $aux_stat:ident)),+) => {
$(
#[quickcheck]
fn $stat(size: usize, start: usize) -> TestResult {
if let Some(v) = ::test::vec::<$ty>(size, start) {
let slice = &v[start..];
let lhs = {
let s = ::univariate::Sample::new(slice);
s.$stat(Some(s.$aux_stat()))
};
let rhs = slice.$stat();
TestResult::from_bool(relative_eq!(lhs, rhs, max_relative = 1e-5))
} else {
TestResult::discard()
}
}
)+
}
}
macro_rules! test {
($ty:ident) => {
mod $ty {
use stdtest::stats::Stats;
use quickcheck::TestResult;
stat!($ty <- iqr, max, mean, median, median_abs_dev_pct, min,
std_dev_pct, sum);
stat_none!($ty <- median_abs_dev, std_dev, var);
mod fast {
use stdtest::stats::Stats;
use quickcheck::TestResult;
fast_stat!($ty <- (median_abs_dev, median), (std_dev, mean), (var, mean));
}
}
}
}
test!(f64);
}
#[cfg(test)]
mod bench {
macro_rules! std_stat {
($ty:ident <- $($stat:ident),+) => {
$(
#[bench]
fn $stat(b: &mut Bencher) {
let v = ::bench::vec::<$ty>();
b.iter(|| v.$stat());
}
)+
}
}
macro_rules! stat {
($ty:ident <- $($stat:ident),+) => {
$(
#[bench]
fn $stat(b: &mut Bencher) {
let v = ::bench::vec::<$ty>();
let s = ::univariate::Sample::new(&v);
b.iter(|| s.$stat());
}
)+
}
}
macro_rules! stat_none {
($ty:ident <- $($stat:ident),+) => {
$(
#[bench]
fn $stat(b: &mut Bencher) {
let v = ::bench::vec::<$ty>();
let s = ::univariate::Sample::new(&v);
b.iter(|| s.$stat(None));
}
)+
}
}
macro_rules! fast_stat {
($ty:ident <- $(($stat:ident, $aux_stat:ident)),+) => {
$(
#[bench]
fn $stat(b: &mut Bencher) {
let v = ::bench::vec::<$ty>();
let s = ::univariate::Sample::new(&v);
let aux = Some(s.$aux_stat());
b.iter(|| s.$stat(aux));
}
)+
}
}
macro_rules! bench {
($ty:ident) => {
mod $ty {
use stdtest::Bencher;
stat!($ty <- iqr, max, mean, median, median_abs_dev_pct, min, std_dev_pct, sum);
stat_none!($ty <- median_abs_dev, std_dev, var);
mod fast {
use stdtest::Bencher;
fast_stat!($ty <- (median_abs_dev, median), (std_dev, mean), (var, mean));
}
mod std {
use stdtest::Bencher;
use stdtest::stats::Stats;
std_stat!($ty <- iqr, max, mean, median, median_abs_dev, median_abs_dev_pct,
min, std_dev, std_dev_pct, sum, var);
}
}
}
}
bench!(f64);
}