类weka.core.ContingencyTables源码实例Demo

下面列出了怎么用weka.core.ContingencyTables的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: tsml   文件: DecisionStump.java
/**
  * Finds best split for nominal attribute and nominal class
  * and returns value.
  *
  * @param index attribute index
  * @return value of criterion for the best split
  * @throws Exception if something goes wrong
  */
 protected double findSplitNominalNominal(int index) throws Exception {

   double bestVal = Double.MAX_VALUE, currVal;
   double[][] counts = new double[m_Instances.attribute(index).numValues() 
			  + 1][m_Instances.numClasses()];
   double[] sumCounts = new double[m_Instances.numClasses()];
   double[][] bestDist = new double[3][m_Instances.numClasses()];
   int numMissing = 0;

   // Compute counts for all the values
   for (int i = 0; i < m_Instances.numInstances(); i++) {
     Instance inst = m_Instances.instance(i);
     if (inst.isMissing(index)) {
numMissing++;
counts[m_Instances.attribute(index).numValues()]
  [(int)inst.classValue()] += inst.weight();
     } else {
counts[(int)inst.value(index)][(int)inst.classValue()] += inst
  .weight();
     }
   }

   // Compute sum of counts
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
sumCounts[j] += counts[i][j];
     }
   }
   
   // Make split counts for each possible split and evaluate
   System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0,
	     m_Distribution[2], 0, m_Instances.numClasses());
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
m_Distribution[0][j] = counts[i][j];
m_Distribution[1][j] = sumCounts[j] - counts[i][j];
     }
     currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
     if (currVal < bestVal) {
bestVal = currVal;
m_SplitPoint = (double)i;
for (int j = 0; j < 3; j++) {
  System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, 
		   m_Instances.numClasses());
}
     }
   }

   // No missing values in training data.
   if (numMissing == 0) {
     System.arraycopy(sumCounts, 0, bestDist[2], 0, 
	       m_Instances.numClasses());
   }
  
   m_Distribution = bestDist;
   return bestVal;
 }
 
源代码2 项目: tsml   文件: Discretize.java
/**
  * Test using Fayyad and Irani's MDL criterion.
  *
  * @param priorCounts
  * @param bestCounts
  * @param numInstances
  * @param numCutPoints
  * @return true if the splits is acceptable
  */
 private boolean FayyadAndIranisMDL(double[] priorCounts,
			     double[][] bestCounts,
			     double numInstances,
			     int numCutPoints) {

   double priorEntropy, entropy, gain;
   double entropyLeft, entropyRight, delta;
   int numClassesTotal, numClassesRight, numClassesLeft;

   // Compute entropy before split.
   priorEntropy = ContingencyTables.entropy(priorCounts);

   // Compute entropy after split.
   entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);

   // Compute information gain.
   gain = priorEntropy - entropy;

   // Number of classes occuring in the set
   numClassesTotal = 0;
   for (int i = 0; i < priorCounts.length; i++) {
     if (priorCounts[i] > 0) {
numClassesTotal++;
     }
   }

   // Number of classes occuring in the left subset
   numClassesLeft = 0;
   for (int i = 0; i < bestCounts[0].length; i++) {
     if (bestCounts[0][i] > 0) {
numClassesLeft++;
     }
   }

   // Number of classes occuring in the right subset
   numClassesRight = 0;
   for (int i = 0; i < bestCounts[1].length; i++) {
     if (bestCounts[1][i] > 0) {
numClassesRight++;
     }
   }

   // Entropy of the left and the right subsets
   entropyLeft = ContingencyTables.entropy(bestCounts[0]);
   entropyRight = ContingencyTables.entropy(bestCounts[1]);

   // Compute terms for MDL formula
   delta = Utils.log2(Math.pow(3, numClassesTotal) - 2) -
     (((double) numClassesTotal * priorEntropy) -
      (numClassesRight * entropyRight) -
      (numClassesLeft * entropyLeft));

   // Check if split is to be accepted
   return (gain > (Utils.log2(numCutPoints) + delta) / (double)numInstances);
 }
 
源代码3 项目: tsml   文件: LPS.java
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
 
源代码4 项目: tsml   文件: LPS.java
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist the distributions
 * @param priorVal the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
源代码5 项目: tsml   文件: RandomTree.java
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
 
源代码6 项目: tsml   文件: RandomTree.java
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist the distributions
 * @param priorVal the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
源代码7 项目: tsml   文件: REPTree.java
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
 
源代码8 项目: tsml   文件: REPTree.java
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 * @param priorVal the splitting criterion
 * @return the gain after splitting
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
源代码9 项目: KEEL   文件: RandomTree.java
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 *            the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
 
源代码10 项目: KEEL   文件: RandomTree.java
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
源代码11 项目: KEEL   文件: RandomTree.java
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 *            the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
 
源代码12 项目: KEEL   文件: RandomTree.java
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
源代码13 项目: KEEL   文件: RandomTree.java
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 *            the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
 
源代码14 项目: KEEL   文件: RandomTree.java
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
/**
 * Computes value of splitting criterion before split.
 *
 * @param dist	the distribution
 * @return		prior val
 */
protected double priorVal(double[][] dist) {
  return ContingencyTables.entropyOverColumns(dist);
}
 
/**
 * Computes value of splitting criterion after split.
 *
 * @param dist	the distribution
 * @param priorVal	the prior val
 * @return		the gain
 */
protected double gain(double[][] dist, double priorVal) {
  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}