下面列出了怎么用weka.core.ContingencyTables的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Finds best split for nominal attribute and nominal class
* and returns value.
*
* @param index attribute index
* @return value of criterion for the best split
* @throws Exception if something goes wrong
*/
protected double findSplitNominalNominal(int index) throws Exception {
double bestVal = Double.MAX_VALUE, currVal;
double[][] counts = new double[m_Instances.attribute(index).numValues()
+ 1][m_Instances.numClasses()];
double[] sumCounts = new double[m_Instances.numClasses()];
double[][] bestDist = new double[3][m_Instances.numClasses()];
int numMissing = 0;
// Compute counts for all the values
for (int i = 0; i < m_Instances.numInstances(); i++) {
Instance inst = m_Instances.instance(i);
if (inst.isMissing(index)) {
numMissing++;
counts[m_Instances.attribute(index).numValues()]
[(int)inst.classValue()] += inst.weight();
} else {
counts[(int)inst.value(index)][(int)inst.classValue()] += inst
.weight();
}
}
// Compute sum of counts
for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
for (int j = 0; j < m_Instances.numClasses(); j++) {
sumCounts[j] += counts[i][j];
}
}
// Make split counts for each possible split and evaluate
System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0,
m_Distribution[2], 0, m_Instances.numClasses());
for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
for (int j = 0; j < m_Instances.numClasses(); j++) {
m_Distribution[0][j] = counts[i][j];
m_Distribution[1][j] = sumCounts[j] - counts[i][j];
}
currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
if (currVal < bestVal) {
bestVal = currVal;
m_SplitPoint = (double)i;
for (int j = 0; j < 3; j++) {
System.arraycopy(m_Distribution[j], 0, bestDist[j], 0,
m_Instances.numClasses());
}
}
}
// No missing values in training data.
if (numMissing == 0) {
System.arraycopy(sumCounts, 0, bestDist[2], 0,
m_Instances.numClasses());
}
m_Distribution = bestDist;
return bestVal;
}
/**
* Test using Fayyad and Irani's MDL criterion.
*
* @param priorCounts
* @param bestCounts
* @param numInstances
* @param numCutPoints
* @return true if the splits is acceptable
*/
private boolean FayyadAndIranisMDL(double[] priorCounts,
double[][] bestCounts,
double numInstances,
int numCutPoints) {
double priorEntropy, entropy, gain;
double entropyLeft, entropyRight, delta;
int numClassesTotal, numClassesRight, numClassesLeft;
// Compute entropy before split.
priorEntropy = ContingencyTables.entropy(priorCounts);
// Compute entropy after split.
entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);
// Compute information gain.
gain = priorEntropy - entropy;
// Number of classes occuring in the set
numClassesTotal = 0;
for (int i = 0; i < priorCounts.length; i++) {
if (priorCounts[i] > 0) {
numClassesTotal++;
}
}
// Number of classes occuring in the left subset
numClassesLeft = 0;
for (int i = 0; i < bestCounts[0].length; i++) {
if (bestCounts[0][i] > 0) {
numClassesLeft++;
}
}
// Number of classes occuring in the right subset
numClassesRight = 0;
for (int i = 0; i < bestCounts[1].length; i++) {
if (bestCounts[1][i] > 0) {
numClassesRight++;
}
}
// Entropy of the left and the right subsets
entropyLeft = ContingencyTables.entropy(bestCounts[0]);
entropyRight = ContingencyTables.entropy(bestCounts[1]);
// Compute terms for MDL formula
delta = Utils.log2(Math.pow(3, numClassesTotal) - 2) -
(((double) numClassesTotal * priorEntropy) -
(numClassesRight * entropyRight) -
(numClassesLeft * entropyLeft));
// Check if split is to be accepted
return (gain > (Utils.log2(numCutPoints) + delta) / (double)numInstances);
}
/**
* Computes value of splitting criterion before split.
*
* @param dist the distributions
* @return the splitting criterion
*/
protected double priorVal(double[][] dist) {
return ContingencyTables.entropyOverColumns(dist);
}
/**
* Computes value of splitting criterion after split.
*
* @param dist the distributions
* @param priorVal the splitting criterion
* @return the gain after the split
*/
protected double gain(double[][] dist, double priorVal) {
return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
/**
* Computes value of splitting criterion before split.
*
* @param dist the distributions
* @return the splitting criterion
*/
protected double priorVal(double[][] dist) {
return ContingencyTables.entropyOverColumns(dist);
}
/**
* Computes value of splitting criterion after split.
*
* @param dist the distributions
* @param priorVal the splitting criterion
* @return the gain after the split
*/
protected double gain(double[][] dist, double priorVal) {
return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
/**
* Computes value of splitting criterion before split.
*
* @param dist
* @return the splitting criterion
*/
protected double priorVal(double[][] dist) {
return ContingencyTables.entropyOverColumns(dist);
}
/**
* Computes value of splitting criterion after split.
*
* @param dist
* @param priorVal the splitting criterion
* @return the gain after splitting
*/
protected double gain(double[][] dist, double priorVal) {
return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
/**
* Computes value of splitting criterion before split.
*
* @param dist
* the distributions
* @return the splitting criterion
*/
protected double priorVal(double[][] dist) {
return ContingencyTables.entropyOverColumns(dist);
}
/**
* Computes value of splitting criterion after split.
*
* @param dist
* the distributions
* @param priorVal
* the splitting criterion
* @return the gain after the split
*/
protected double gain(double[][] dist, double priorVal) {
return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
/**
* Computes value of splitting criterion before split.
*
* @param dist
* the distributions
* @return the splitting criterion
*/
protected double priorVal(double[][] dist) {
return ContingencyTables.entropyOverColumns(dist);
}
/**
* Computes value of splitting criterion after split.
*
* @param dist
* the distributions
* @param priorVal
* the splitting criterion
* @return the gain after the split
*/
protected double gain(double[][] dist, double priorVal) {
return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
/**
* Computes value of splitting criterion before split.
*
* @param dist
* the distributions
* @return the splitting criterion
*/
protected double priorVal(double[][] dist) {
return ContingencyTables.entropyOverColumns(dist);
}
/**
* Computes value of splitting criterion after split.
*
* @param dist
* the distributions
* @param priorVal
* the splitting criterion
* @return the gain after the split
*/
protected double gain(double[][] dist, double priorVal) {
return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
/**
* Computes value of splitting criterion before split.
*
* @param dist the distribution
* @return prior val
*/
protected double priorVal(double[][] dist) {
return ContingencyTables.entropyOverColumns(dist);
}
/**
* Computes value of splitting criterion after split.
*
* @param dist the distribution
* @param priorVal the prior val
* @return the gain
*/
protected double gain(double[][] dist, double priorVal) {
return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}