* $data1 = array (1,2,1,1,1,1,3,3,4.1,3,2,2,4.1,1,1,2,3,3,2,2,1,1,2,2); *
* $data2 = array('1'=>9, '2'=>8, '3'=>5, '4.1'=>2); *
* include_once 'Math/Stats.php'; * $s = new Math_Stats(); * $s->setData($data1); * // or * // $s->setData($data2, STATS_DATA_CUMMULATIVE); * $stats = $s->calcBasic(); * echo 'Mean: '.$stats['mean'].' StDev: '.$stats['stdev'].' \n'; * * // using data with nulls * // first ignoring them: * $data3 = array(1.2, 'foo', 2.4, 3.1, 4.2, 3.2, null, 5.1, 6.2); * $s->setNullOption(STATS_IGNORE_NULL); * $s->setData($data3); * $stats3 = $s->calcFull(); * * // and then assuming nulls == 0 * $s->setNullOption(STATS_USE_NULL_AS_ZERO); * $s->setData($data3); * $stats3 = $s->calcFull(); *
) * * where: P = percentile * * @todo need to double check the equation * @access public * @return mixed a numeric value on success, a PEAR_Error otherwise * @see quartiles() */ function interquartileMean() {/*{{{*/ if (!array_key_exists('interquartileMean', $this->_calculatedValues)) { $quart = $this->quartiles(); if (PEAR::isError($quart)) { return $quart; } $q3 = $quart['75']; $q1 = $quart['25']; $sum = 0; $n = 0; foreach ($this->getData(true) as $val) { if ($val >= $q1 && $val <= $q3) { $sum += $val; ++$n; } } if ($n == 0) { return PEAR::raiseError('error calculating interquartile mean, '. 'empty interquartile range of values.'); } $this->_calculatedValues['interquartileMean'] = $sum / $n; } return $this->_calculatedValues['interquartileMean']; }/*}}}*/ /** * The interquartile range is the distance between the 75th and 25th * percentiles. Basically the range of the middle 50% of the data set, * and thus is not affected by outliers or extreme values. * * interquart range = P(75) - P(25) * * where: P = percentile * * @access public * @return mixed a numeric value on success, a PEAR_Error otherwise * @see quartiles() */ function interquartileRange() {/*{{{*/ if (!array_key_exists('interquartileRange', $this->_calculatedValues)) { $quart = $this->quartiles(); if (PEAR::isError($quart)) { return $quart; } $q3 = $quart['75']; $q1 = $quart['25']; $this->_calculatedValues['interquartileRange'] = $q3 - $q1; } return $this->_calculatedValues['interquartileRange']; }/*}}}*/ /** * The quartile deviation is half of the interquartile range value * * quart dev = (P(75) - P(25)) / 2 * * where: P = percentile * * @access public * @return mixed a numeric value on success, a PEAR_Error otherwise * @see quartiles() * @see interquartileRange() */ function quartileDeviation() {/*{{{*/ if (!array_key_exists('quartileDeviation', $this->_calculatedValues)) { $iqr = $this->interquartileRange(); if (PEAR::isError($iqr)) { return $iqr; } $this->_calculatedValues['quartileDeviation'] = $iqr / 2; } return $this->_calculatedValues['quartileDeviation']; }/*}}}*/ /** * The quartile variation coefficient is defines as follows: * * quart var coeff = 100 * (P(75) - P(25)) / (P(75) + P(25)) * * where: P = percentile * * @todo need to double check the equation * @access public * @return mixed a numeric value on success, a PEAR_Error otherwise * @see quartiles() */ function quartileVariationCoefficient() {/*{{{*/ if (!array_key_exists('quartileVariationCoefficient', $this->_calculatedValues)) { $quart = $this->quartiles(); if (PEAR::isError($quart)) { return $quart; } $q3 = $quart['75']; $q1 = $quart['25']; $d = $q3 - $q1; $s = $q3 + $q1; $this->_calculatedValues['quartileVariationCoefficient'] = 100 * $d / $s; } return $this->_calculatedValues['quartileVariationCoefficient']; }/*}}}*/ /** * The quartile skewness coefficient (also known as Bowley Skewness), * is defined as follows: * * quart skewness coeff = (P(25) - 2*P(50) + P(75)) / (P(75) - P(25)) * * where: P = percentile * * @todo need to double check the equation * @access public * @return mixed a numeric value on success, a PEAR_Error otherwise * @see quartiles() */ function quartileSkewnessCoefficient() {/*{{{*/ if (!array_key_exists('quartileSkewnessCoefficient', $this->_calculatedValues)) { $quart = $this->quartiles(); if (PEAR::isError($quart)) { return $quart; } $q3 = $quart['75']; $q2 = $quart['50']; $q1 = $quart['25']; $d = $q3 - 2*$q2 + $q1; $s = $q3 - $q1; $this->_calculatedValues['quartileSkewnessCoefficient'] = $d / $s; } return $this->_calculatedValues['quartileSkewnessCoefficient']; }/*}}}*/ /** * The pth percentile is the value such that p% of the a sorted data set * is smaller than it, and (100 - p)% of the data is larger. * * A quick algorithm to pick the appropriate value from a sorted data * set is as follows: * * - Count the number of values: n * - Calculate the position of the value in the data list: i = p * (n + 1) * - if i is an integer, return the data at that position * - if i < 1, return the minimum of the data set * - if i > n, return the maximum of the data set * - otherwise, average the entries at adjacent positions to i * * The median is the 50th percentile value. * * @todo need to double check generality of the algorithm * * @access public * @param numeric $p the percentile to estimate, e.g. 25 for 25th percentile * @return mixed a numeric value on success, a PEAR_Error otherwise * @see quartiles() * @see median() */ function percentile($p) {/*{{{*/ $count = $this->count(); if (PEAR::isError($count)) { return $count; } if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { $data =& $this->_dataExpanded; } else { $data =& $this->_data; } $obsidx = $p * ($count + 1) / 100; if (intval($obsidx) == $obsidx) { return $data[($obsidx - 1)]; } elseif ($obsidx < 1) { return $data[0]; } elseif ($obsidx > $count) { return $data[($count - 1)]; } else { $left = floor($obsidx - 1); $right = ceil($obsidx - 1); return ($data[$left] + $data[$right]) / 2; } }/*}}}*/ // private methods /** * Utility function to calculate: SUM { (xi - mean)^n } * * @access private * @param numeric $power the exponent * @param optional double $mean the data set mean value * @return mixed the sum on success, a PEAR_Error object otherwise * * @see stDev() * @see variaceWithMean(); * @see skewness(); * @see kurtosis(); */ function __sumdiff($power, $mean=null) {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if (is_null($mean)) { $mean = $this->mean(); if (PEAR::isError($mean)) { return $mean; } } $sdiff = 0; if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { foreach ($this->_data as $val=>$freq) { $sdiff += $freq * pow((double)($val - $mean), (double)$power); } } else { foreach ($this->_data as $val) $sdiff += pow((double)($val - $mean), (double)$power); } return $sdiff; }/*}}}*/ /** * Utility function to calculate the variance with or without * a fixed mean * * @access private * @param $mean the fixed mean to use, null as default * @return mixed a numeric value on success, a PEAR_Error otherwise * @see variance() * @see varianceWithMean() */ function __calcVariance($mean = null) {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } $sumdiff2 = $this->__sumdiff(2, $mean); if (PEAR::isError($sumdiff2)) { return $sumdiff2; } $count = $this->count(); if (PEAR::isError($count)) { return $count; } if ($count == 1) { return PEAR::raiseError('cannot calculate variance of a singe data point'); } return ($sumdiff2 / ($count - 1)); }/*}}}*/ /** * Utility function to calculate the absolute deviation with or without * a fixed mean * * @access private * @param $mean the fixed mean to use, null as default * @return mixed a numeric value on success, a PEAR_Error otherwise * @see absDev() * @see absDevWithMean() */ function __calcAbsoluteDeviation($mean = null) {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } $count = $this->count(); if (PEAR::isError($count)) { return $count; } $sumabsdev = $this->__sumabsdev($mean); if (PEAR::isError($sumabsdev)) { return $sumabsdev; } return $sumabsdev / $count; }/*}}}*/ /** * Utility function to calculate: SUM { | xi - mean | } * * @access private * @param optional double $mean the mean value for the set or population * @return mixed the sum on success, a PEAR_Error object otherwise * * @see absDev() * @see absDevWithMean() */ function __sumabsdev($mean=null) {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if (is_null($mean)) { $mean = $this->mean(); } $sdev = 0; if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { foreach ($this->_data as $val=>$freq) { $sdev += $freq * abs($val - $mean); } } else { foreach ($this->_data as $val) { $sdev += abs($val - $mean); } } return $sdev; }/*}}}*/ /** * Utility function to format a PEAR_Error to be used by calc(), * calcBasic() and calcFull() * * @access private * @param mixed $v value to be formatted * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), * or only the error message will be returned (when false) * @return mixed if the value is a PEAR_Error object, and $useErrorObject * is false, then a string with the error message will be returned, * otherwise the value will not be modified and returned as passed. */ function __format($v, $useErrorObject=true) {/*{{{*/ if (PEAR::isError($v) && $useErrorObject == false) { return $v->getMessage(); } else { return $v; } }/*}}}*/ /** * Utility function to validate the data and modify it * according to the current null handling option * * @access private * @return mixed true on success, a PEAR_Error object otherwise * * @see setData() */ function _validate() {/*{{{*/ $flag = ($this->_dataOption == STATS_DATA_CUMMULATIVE); foreach ($this->_data as $key=>$value) { $d = ($flag) ? $key : $value; $v = ($flag) ? $value : $key; if (!is_numeric($d)) { switch ($this->_nullOption) { case STATS_IGNORE_NULL : unset($this->_data["$key"]); break; case STATS_USE_NULL_AS_ZERO: if ($flag) { unset($this->_data["$key"]); $this->_data[0] += $v; } else { $this->_data[$key] = 0; } break; case STATS_REJECT_NULL : default: return PEAR::raiseError('data rejected, contains NULL values'); break; } } } if ($flag) { ksort($this->_data); $this->_dataExpanded = array(); foreach ($this->_data as $val=>$freq) { $this->_dataExpanded = array_pad($this->_dataExpanded, count($this->_dataExpanded) + $freq, $val); } sort($this->_dataExpanded); } else { sort($this->_data); } return true; }/*}}}*/ }/*}}}*/ // vim: ts=4:sw=4:et: // vim6: fdl=1: fdm=marker: ?>