下面列出了怎么用org.apache.commons.math3.distribution.FDistribution的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Runs the regression model for the given dependent and independent variables
* The Y and X variables must be transformed, if necessary, to meet Gauss Markov assumptions
* @param y the dependent variable, which may be a transformed version of the raw data
* @param x the independent variable(s), which may be a transformed version of the raw data
*/
protected void compute(RealVector y, RealMatrix x) {
final int n = frame.rows().count();
final int p = regressors.size() + (hasIntercept() ? 1 : 0);
final int dfModel = regressors.size();
final RealMatrix betaMatrix = computeBeta(y, x);
final RealVector betaCoefficients = betaMatrix.getColumnVector(0);
final RealVector betaVariance = betaMatrix.getColumnVector(1);
this.tss = computeTSS(y);
this.ess = tss - rss;
this.fValue = (ess / dfModel) / (rss / (n - p));
this.fValueProbability = 1d - new FDistribution(dfModel, n-p).cumulativeProbability(fValue);
this.rSquared = 1d - (rss / tss);
this.rSquaredAdj = 1d - (rss * (n - (hasIntercept() ? 1 : 0))) / (tss * (n - p));
this.computeParameterStdErrors(betaVariance);
this.computeParameterSignificance(betaCoefficients);
}
private double getProbability(PerformanceCriterion pc1, PerformanceCriterion pc2) {
double totalDeviation = ((pc1.getAverageCount() - 1) * pc1.getVariance() + (pc2.getAverageCount() - 1)
* pc2.getVariance())
/ (pc1.getAverageCount() + pc2.getAverageCount() - 2);
double factor = 1.0d / (1.0d / pc1.getAverageCount() + 1.0d / pc2.getAverageCount());
double diff = pc1.getAverage() - pc2.getAverage();
double t = factor * diff * diff / totalDeviation;
int secondDegreeOfFreedom = pc1.getAverageCount() + pc2.getAverageCount() - 2;
double prob;
// make sure the F-distribution is well defined
if (secondDegreeOfFreedom > 0) {
FDistribution fDist = new FDistribution(1, secondDegreeOfFreedom);
prob = 1 - fDist.cumulativeProbability(t);
} else {
// in this case the probability cannot calculated correctly and a 1 is returned, as
// this result is not significant
prob = 1;
}
return prob;
}
/** {@inheritDoc} */
public ConfidenceInterval createInterval(int numberOfTrials, int numberOfSuccesses,
double confidenceLevel) {
IntervalUtils.checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
double lowerBound = 0;
double upperBound = 0;
final double alpha = (1.0 - confidenceLevel) / 2.0;
final FDistribution distributionLowerBound = new FDistribution(2 * (numberOfTrials - numberOfSuccesses + 1),
2 * numberOfSuccesses);
final double fValueLowerBound = distributionLowerBound.inverseCumulativeProbability(1 - alpha);
if (numberOfSuccesses > 0) {
lowerBound = numberOfSuccesses /
(numberOfSuccesses + (numberOfTrials - numberOfSuccesses + 1) * fValueLowerBound);
}
final FDistribution distributionUpperBound = new FDistribution(2 * (numberOfSuccesses + 1),
2 * (numberOfTrials - numberOfSuccesses));
final double fValueUpperBound = distributionUpperBound.inverseCumulativeProbability(1 - alpha);
if (numberOfSuccesses > 0) {
upperBound = (numberOfSuccesses + 1) * fValueUpperBound /
(numberOfTrials - numberOfSuccesses + (numberOfSuccesses + 1) * fValueUpperBound);
}
return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
}
/** {@inheritDoc} */
public ConfidenceInterval createInterval(int numberOfTrials, int numberOfSuccesses,
double confidenceLevel) {
IntervalUtils.checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
double lowerBound = 0;
double upperBound = 0;
final double alpha = (1.0 - confidenceLevel) / 2.0;
final FDistribution distributionLowerBound = new FDistribution(2 * (numberOfTrials - numberOfSuccesses + 1),
2 * numberOfSuccesses);
final double fValueLowerBound = distributionLowerBound.inverseCumulativeProbability(1 - alpha);
if (numberOfSuccesses > 0) {
lowerBound = numberOfSuccesses /
(numberOfSuccesses + (numberOfTrials - numberOfSuccesses + 1) * fValueLowerBound);
}
final FDistribution distributionUpperBound = new FDistribution(2 * (numberOfSuccesses + 1),
2 * (numberOfTrials - numberOfSuccesses));
final double fValueUpperBound = distributionUpperBound.inverseCumulativeProbability(1 - alpha);
if (numberOfSuccesses > 0) {
upperBound = (numberOfSuccesses + 1) * fValueUpperBound /
(numberOfTrials - numberOfSuccesses + (numberOfSuccesses + 1) * fValueUpperBound);
}
return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
}
/**
* Returns the PValue of the attributeIndex-th attribute that expresses the probability that the
* coefficient is only random.
*
* @throws ProcessStoppedException
*/
protected double getPValue(double coefficient, int attributeIndex, LinearRegression regression, boolean useBias,
double ridge, ExampleSet exampleSet, boolean[] isUsedAttribute, double[] standardDeviations,
double labelStandardDeviation, FDistribution fdistribution, double generalCorrelation)
throws UndefinedParameterError, ProcessStoppedException {
double tolerance = regression.getTolerance(exampleSet, isUsedAttribute, attributeIndex, ridge, useBias);
double standardError = Math.sqrt((1.0d - generalCorrelation)
/ (tolerance * (exampleSet.size() - exampleSet.getAttributes().size() - 1.0d)))
* labelStandardDeviation / standardDeviations[attributeIndex];
// calculating other statistics
double tStatistics = coefficient / standardError;
double probability = fdistribution.cumulativeProbability(tStatistics * tStatistics);
return probability;
}
public AnovaSignificanceTestResult(double sumSquaresBetween, double sumSquaresResiduals, int df1, int df2,
double alpha) {
this.sumSquaresBetween = sumSquaresBetween;
this.sumSquaresResiduals = sumSquaresResiduals;
this.df1 = df1;
this.df2 = df2;
this.alpha = alpha;
this.meanSquaresBetween = sumSquaresBetween / df1;
this.meanSquaresResiduals = sumSquaresResiduals / df2;
this.fValue = meanSquaresBetween / meanSquaresResiduals;
FDistribution fDist = new FDistribution(df1, df2);
this.prob = 1.0d - fDist.cumulativeProbability(this.fValue);
}
@Test
public void testNextF() {
double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
long[] counts = new long[4];
randomData.reSeed(1000);
for (int i = 0; i < 1000; i++) {
double value = randomData.nextF(12, 5);
TestUtils.updateCounts(value, counts, quartiles);
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextF() {
double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
long[] counts = new long[4];
randomData.reSeed(1000);
for (int i = 0; i < 1000; i++) {
double value = randomData.nextF(12, 5);
TestUtils.updateCounts(value, counts, quartiles);
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextF() {
double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
long[] counts = new long[4];
randomData.reSeed(1000);
for (int i = 0; i < 1000; i++) {
double value = randomData.nextF(12, 5);
TestUtils.updateCounts(value, counts, quartiles);
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextF() {
double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
long[] counts = new long[4];
randomData.reSeed(1000);
for (int i = 0; i < 1000; i++) {
double value = randomData.nextF(12, 5);
TestUtils.updateCounts(value, counts, quartiles);
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextF() throws Exception {
double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
long[] counts = new long[4];
randomData.reSeed(1000);
for (int i = 0; i < 1000; i++) {
double value = randomData.nextF(12, 5);
TestUtils.updateCounts(value, counts, quartiles);
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextF() {
double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
long[] counts = new long[4];
randomData.reSeed(1000);
for (int i = 0; i < 1000; i++) {
double value = randomData.nextF(12, 5);
TestUtils.updateCounts(value, counts, quartiles);
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextF() {
double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
long[] counts = new long[4];
randomData.reSeed(1000);
for (int i = 0; i < 1000; i++) {
double value = randomData.nextF(12, 5);
TestUtils.updateCounts(value, counts, quartiles);
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextF() {
double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
long[] counts = new long[4];
randomData.reSeed(1000);
for (int i = 0; i < 1000; i++) {
double value = randomData.nextF(12, 5);
TestUtils.updateCounts(value, counts, quartiles);
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Nullable
private Double oneWayAnova(@Nonnull double[][] intensityGroups) {
int numGroups = intensityGroups.length;
long numIntensities = Arrays.stream(intensityGroups).flatMapToDouble(Arrays::stream).count();
double[] groupMeans = Arrays.stream(intensityGroups)
.mapToDouble(intensities -> Arrays.stream(intensities).average().orElse(0.0)).toArray();
double overallMean =
Arrays.stream(intensityGroups).flatMapToDouble(Arrays::stream).average().orElse(0.0);
double sumOfSquaresOfError = IntStream.range(0, intensityGroups.length).mapToDouble(
i -> Arrays.stream(intensityGroups[i]).map(x -> x - groupMeans[i]).map(x -> x * x).sum())
.sum();
double sumOfSquaresOfTreatment =
(numGroups - 1) * Arrays.stream(groupMeans).map(x -> x - overallMean).map(x -> x * x).sum();
long degreesOfFreedomOfTreatment = numGroups - 1;
long degreesOfFreedomOfError = numIntensities - numGroups;
if (degreesOfFreedomOfTreatment <= 0 || degreesOfFreedomOfError <= 0) {
return null;
}
double meanSquareOfTreatment = sumOfSquaresOfTreatment / degreesOfFreedomOfTreatment;
double meanSquareOfError = sumOfSquaresOfError / degreesOfFreedomOfError;
if (meanSquareOfError == 0.0) {
return null;
}
double anovaStatistics = meanSquareOfTreatment / meanSquareOfError;
Double pValue = null;
try {
FDistribution distribution =
new FDistribution(degreesOfFreedomOfTreatment, degreesOfFreedomOfError);
pValue = 1.0 - distribution.cumulativeProbability(anovaStatistics);
} catch (MathIllegalArgumentException ex) {
logger.warning("Error during F-distribution calculation: " + ex.getMessage());
}
return pValue;
}
@Override
public Molecule fit(SubImage subimage) {
double[] fittedParams = null;
MultiPSF model, modelBest = null;
double chi2, chi2Best = 0.0, pValue;
if(maxN > 1) {
// model selection - how many molecules?
for(int n = 1; n <= maxN; n++) {
GUI.checkIJEscapePressed();
model = new MultiPSF(n, defaultSigma, basePsfModel, fittedParams);
model.setIntensityRange(expectedIntensity);
model.setFixedIntensities(sameI);
LSQFitter fitter = new LSQFitter(model, weightedLSQ, MODEL_SELECTION_ITERATIONS, -1);
fitter.fit(subimage);
fittedParams = fitter.fittedParameters;
chi2 = model.getChiSquared(subimage.xgrid, subimage.ygrid, subimage.values, fittedParams, weightedLSQ);
if(n > 1) {
try {
pValue = 1.0 - new FDistribution(model.getDoF() - modelBest.getDoF(), subimage.values.length - model.getDoF()).cumulativeProbability(((chi2Best - chi2) / (model.getDoF() - modelBest.getDoF())) / (chi2 / (subimage.values.length - model.getDoF())));
if(!Double.isNaN(pValue) && (pValue < pValueThr) ) {//&& !isOutOfRegion(mol, ((double)subimage.size) / 2.0)
modelBest = model;
chi2Best = chi2;
}
} catch(NotStrictlyPositiveException ex) {
int maxMol = (subimage.values.length - 2) / (int)(basePsfModel.getDoF()-2); // both intensity and offset are estimated for all molecules as a single parameter
throw new StoppedDueToErrorException(
"F-distribution `F(" + (int)(model.getDoF() - modelBest.getDoF()) + "," +
(int)(subimage.values.length - model.getDoF()) + ")` could not be created! " +
"There is too many molecules (degrees of freedom) in the model!\n The maximum number of " +
"molecules with the current settings (PSF model and fitting radius) is " + maxMol + ".", ex);
}
} else {
modelBest = model;
chi2Best = chi2;
}
}
} else {
modelBest = new MultiPSF(1, defaultSigma, basePsfModel, null);
modelBest.setIntensityRange(expectedIntensity);
modelBest.setFixedIntensities(sameI);
}
// fitting with the selected model
lastFitter = new LSQFitter(modelBest, weightedLSQ, Params.BACKGROUND);
Molecule mol = lastFitter.fit(subimage);
assert (mol != null); // this is implication of `assert(maxN >= 1)`
if(!mol.isSingleMolecule()) {
// copy background value to all molecules
double bkg = mol.getParam(PSFModel.Params.LABEL_BACKGROUND);
for(Molecule m : mol.getDetections()) {
m.setParam(PSFModel.Params.LABEL_BACKGROUND, bkg);
}
}
return eliminateBadFits(mol, ((double) subimage.size_x) / 2.0 - defaultSigma / 2.0, ((double) subimage.size_y) / 2.0 - defaultSigma / 2.0);
}
@Nullable
private Double oneWayAnova(@Nonnull double[][] intensityGroups) {
int numGroups = intensityGroups.length;
long numIntensities = Arrays.stream(intensityGroups)
.flatMapToDouble(Arrays::stream)
.count();
double[] groupMeans = Arrays.stream(intensityGroups)
.mapToDouble(intensities -> Arrays.stream(intensities).average().orElse(0.0))
.toArray();
double overallMean = Arrays.stream(intensityGroups)
.flatMapToDouble(Arrays::stream)
.average()
.orElse(0.0);
double sumOfSquaresOfError = IntStream.range(0, intensityGroups.length)
.mapToDouble(i -> Arrays
.stream(intensityGroups[i])
.map(x -> x - groupMeans[i])
.map(x -> x * x)
.sum())
.sum();
double sumOfSquaresOfTreatment = (numGroups - 1) * Arrays.stream(groupMeans)
.map(x -> x - overallMean)
.map(x -> x * x)
.sum();
long degreesOfFreedomOfTreatment = numGroups - 1;
long degreesOfFreedomOfError = numIntensities - numGroups;
if (degreesOfFreedomOfTreatment <= 0 || degreesOfFreedomOfError <= 0) {
return null;
}
double meanSquareOfTreatment = sumOfSquaresOfTreatment / degreesOfFreedomOfTreatment;
double meanSquareOfError = sumOfSquaresOfError / degreesOfFreedomOfError;
if (meanSquareOfError == 0.0) {
return null;
}
double anovaStatistics = meanSquareOfTreatment / meanSquareOfError;
Double pValue = null;
try {
FDistribution distribution = new FDistribution(
degreesOfFreedomOfTreatment, degreesOfFreedomOfError);
pValue = 1.0 - distribution.cumulativeProbability(anovaStatistics);
}
catch (MathIllegalArgumentException ex) {
logger.warning("Error during F-distribution calculation: " + ex.getMessage());
}
return pValue;
}
/**
* Computes the ANOVA P-value for a collection of <code>double[]</code>
* arrays.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* <code>double[]</code> arrays.</li>
* <li> There must be at least two <code>double[]</code> arrays in the
* <code>categoryData</code> collection and each of these arrays must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of <code>double[]</code>
* arrays each containing data for one category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained <code>double[]</code> array does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
*/
public double anovaPValue(final Collection<double[]> categoryData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
final AnovaStats a = anovaStats(categoryData);
// No try-catch or advertised exception because args are valid
// pass a null rng to avoid unneeded overhead as we will not sample from this distribution
final FDistribution fdist = new FDistribution(null, a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* {@link SummaryStatistics}.</li>
* <li> There must be at least two {@link SummaryStatistics} in the
* <code>categoryData</code> collection and each of these statistics must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of {@link SummaryStatistics}
* each containing data for one category
* @param allowOneElementData if true, allow computation for one catagory
* only or for one data element per category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained {@link SummaryStatistics} does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
* @since 3.2
*/
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
final boolean allowOneElementData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
final AnovaStats a = anovaStats(categoryData, allowOneElementData);
// pass a null rng to avoid unneeded overhead as we will not sample from this distribution
final FDistribution fdist = new FDistribution(null, a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* Computes the ANOVA P-value for a collection of <code>double[]</code>
* arrays.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* <code>double[]</code> arrays.</li>
* <li> There must be at least two <code>double[]</code> arrays in the
* <code>categoryData</code> collection and each of these arrays must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of <code>double[]</code>
* arrays each containing data for one category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained <code>double[]</code> array does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
*/
public double anovaPValue(final Collection<double[]> categoryData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
AnovaStats a = anovaStats(categoryData);
FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* Computes the ANOVA P-value for a collection of <code>double[]</code>
* arrays.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* <code>double[]</code> arrays.</li>
* <li> There must be at least two <code>double[]</code> arrays in the
* <code>categoryData</code> collection and each of these arrays must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of <code>double[]</code>
* arrays each containing data for one category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained <code>double[]</code> array does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
*/
public double anovaPValue(final Collection<double[]> categoryData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
AnovaStats a = anovaStats(categoryData);
FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* Computes the ANOVA P-value for a collection of <code>double[]</code>
* arrays.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* <code>double[]</code> arrays.</li>
* <li> There must be at least two <code>double[]</code> arrays in the
* <code>categoryData</code> collection and each of these arrays must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of <code>double[]</code>
* arrays each containing data for one category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained <code>double[]</code> array does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
*/
public double anovaPValue(final Collection<double[]> categoryData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
AnovaStats a = anovaStats(categoryData);
// No try-catch or advertised exception because args are valid
FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* {@link SummaryStatistics}.</li>
* <li> There must be at least two {@link SummaryStatistics} in the
* <code>categoryData</code> collection and each of these statistics must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of {@link SummaryStatistics}
* each containing data for one category
* @param allowOneElementData if true, allow computation for one catagory
* only or for one data element per category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained {@link SummaryStatistics} does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
* @since 3.2
*/
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
final boolean allowOneElementData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
final AnovaStats a = anovaStats(categoryData, allowOneElementData);
final FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* Computes the ANOVA P-value for a collection of <code>double[]</code>
* arrays.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* <code>double[]</code> arrays.</li>
* <li> There must be at least two <code>double[]</code> arrays in the
* <code>categoryData</code> collection and each of these arrays must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of <code>double[]</code>
* arrays each containing data for one category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained <code>double[]</code> array does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
*/
public double anovaPValue(final Collection<double[]> categoryData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
AnovaStats a = anovaStats(categoryData);
FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* Computes the ANOVA P-value for a collection of <code>double[]</code>
* arrays.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* <code>double[]</code> arrays.</li>
* <li> There must be at least two <code>double[]</code> arrays in the
* <code>categoryData</code> collection and each of these arrays must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of <code>double[]</code>
* arrays each containing data for one category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained <code>double[]</code> array does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
*/
public double anovaPValue(final Collection<double[]> categoryData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
AnovaStats a = anovaStats(categoryData);
// No try-catch or advertised exception because args are valid
FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* {@link SummaryStatistics}.</li>
* <li> There must be at least two {@link SummaryStatistics} in the
* <code>categoryData</code> collection and each of these statistics must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of {@link SummaryStatistics}
* each containing data for one category
* @param allowOneElementData if true, allow computation for one catagory
* only or for one data element per category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained {@link SummaryStatistics} does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
*/
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
final boolean allowOneElementData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
final AnovaStats a = anovaStats(categoryData, allowOneElementData);
final FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* Computes the ANOVA P-value for a collection of <code>double[]</code>
* arrays.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* <code>double[]</code> arrays.</li>
* <li> There must be at least two <code>double[]</code> arrays in the
* <code>categoryData</code> collection and each of these arrays must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of <code>double[]</code>
* arrays each containing data for one category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained <code>double[]</code> array does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
*/
public double anovaPValue(final Collection<double[]> categoryData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
AnovaStats a = anovaStats(categoryData);
// No try-catch or advertised exception because args are valid
FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* {@link SummaryStatistics}.</li>
* <li> There must be at least two {@link SummaryStatistics} in the
* <code>categoryData</code> collection and each of these statistics must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of {@link SummaryStatistics}
* each containing data for one category
* @param allowOneElementData if true, allow computation for one catagory
* only or for one data element per category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained {@link SummaryStatistics} does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
* @since 3.2
*/
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
final boolean allowOneElementData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
final AnovaStats a = anovaStats(categoryData, allowOneElementData);
final FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* Computes the ANOVA P-value for a collection of <code>double[]</code>
* arrays.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* <code>double[]</code> arrays.</li>
* <li> There must be at least two <code>double[]</code> arrays in the
* <code>categoryData</code> collection and each of these arrays must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of <code>double[]</code>
* arrays each containing data for one category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained <code>double[]</code> array does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
*/
public double anovaPValue(final Collection<double[]> categoryData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
final AnovaStats a = anovaStats(categoryData);
// No try-catch or advertised exception because args are valid
// pass a null rng to avoid unneeded overhead as we will not sample from this distribution
final FDistribution fdist = new FDistribution(null, a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
*
* <p><strong>Preconditions</strong>: <ul>
* <li>The categoryData <code>Collection</code> must contain
* {@link SummaryStatistics}.</li>
* <li> There must be at least two {@link SummaryStatistics} in the
* <code>categoryData</code> collection and each of these statistics must
* contain at least two values.</li></ul></p><p>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*
* @param categoryData <code>Collection</code> of {@link SummaryStatistics}
* each containing data for one category
* @param allowOneElementData if true, allow computation for one catagory
* only or for one data element per category
* @return Pvalue
* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
* @throws DimensionMismatchException if the length of the <code>categoryData</code>
* array is less than 2 or a contained {@link SummaryStatistics} does not have
* at least two values
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
* @since 3.2
*/
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
final boolean allowOneElementData)
throws NullArgumentException, DimensionMismatchException,
ConvergenceException, MaxCountExceededException {
final AnovaStats a = anovaStats(categoryData, allowOneElementData);
// pass a null rng to avoid unneeded overhead as we will not sample from this distribution
final FDistribution fdist = new FDistribution(null, a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}