下面列出了怎么用weka.core.AttributeStats的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Update attribute stats using the supplied instance.
*
* @param updateInstance the instance for updating
* @param delete true if the values of the supplied instance are
* to be removed from the statistics
*/
protected void updateStats(Instance updateInstance,
boolean delete) {
if (m_attStats == null) {
m_attStats = new AttributeStats[m_numAttributes];
for (int i = 0; i < m_numAttributes; i++) {
m_attStats[i] = new AttributeStats();
if (m_clusterInstances.attribute(i).isNominal()) {
m_attStats[i].nominalCounts =
new int [m_clusterInstances.attribute(i).numValues()];
} else {
m_attStats[i].numericStats = new Stats();
}
}
}
for (int i = 0; i < m_numAttributes; i++) {
if (!updateInstance.isMissing(i)) {
double value = updateInstance.value(i);
if (m_clusterInstances.attribute(i).isNominal()) {
m_attStats[i].nominalCounts[(int)value] += (delete) ?
(-1.0 * updateInstance.weight()) :
updateInstance.weight();
m_attStats[i].totalCount += (delete) ?
(-1.0 * updateInstance.weight()) :
updateInstance.weight();
} else {
if (delete) {
m_attStats[i].numericStats.subtract(value,
updateInstance.weight());
} else {
m_attStats[i].numericStats.add(value, updateInstance.weight());
}
}
}
}
m_totalInstances += (delete)
? (-1.0 * updateInstance.weight())
: (updateInstance.weight());
}
/**
* builds the classifier
*
* @throws Exception if something goes wrong
*/
@Override
protected void build() throws Exception {
AttributeStats stats;
int i;
// determine class distribution
m_ClassDistribution = new double[2];
stats = m_Trainset.attributeStats(m_Trainset.classIndex());
for (i = 0; i < 2; i++)
m_ClassDistribution[i] = stats.nominalCounts[i] / stats.totalCount;
// the number of instances added to the training set in each iteration
m_InstancesPerIteration = (double) m_Testset.numInstances()
/ getFolds();
if (getDebug())
System.out.println("InstancesPerIteration: " + m_InstancesPerIteration);
// build classifier
m_Random = new Random(getSeed());
for (i = 0; i <= getFolds(); i++) {
if (getVerbose() || getDebug()) {
if (getCutOff() > 0)
System.out.println( "\nFold " + i + "/" + getFolds()
+ " (CutOff at " + getCutOff() + ")");
else
System.out.println("\nFold " + i + "/" + getFolds());
}
buildTrainSet(i);
buildClassifier();
// cutoff of folds reached?
if ( (i > 0) && (i == getCutOff()) )
break;
}
}
/**
* sets the class probabilities based on the given data
*
* @param data the data to get the class probabilities from
*/
public void setClassProbabilities(Instances data) {
AttributeStats stats;
int total;
int i;
stats = data.attributeStats(data.classIndex());
total = Utils.sum(stats.nominalCounts);
m_ClassProbs = new double[data.classAttribute().numValues()];
for (i = 0; i < m_ClassProbs.length; i++)
m_ClassProbs[i] = (double) stats.nominalCounts[i] / (double) total;
}
/**
* randomly initializes the class labels in the given set according to the
* class distribution in the training set
* @param train the training instances to retrieve the class
* distribution from
* @param instances the instances to initialize
* @param from the first instance to initialize
* @param count the number of instances to initialize
* @return the initialize instances
* @throws Exception if something goes wrong
*/
public Instances initializeLabels( Instances train, Instances instances,
int from, int count )
throws Exception {
int i;
AttributeStats stats;
Attribute classAttr;
double percentage;
// reset flip count
m_FlippedLabels = 0;
// explicitly set labels to "missing"
for (i = from; i < from + count; i++)
instances.instance(i).setClassMissing();
// determining the percentage of the first class
stats = train.attributeStats(train.classIndex());
percentage = (double) stats.nominalCounts[0] / (double) stats.totalCount;
// set lables
classAttr = instances.attribute(instances.classIndex());
for (i = from; i < from + count; i++) {
// random class
if (m_Random.nextDouble() < percentage)
instances.instance(i).setClassValue(classAttr.value(0));
else
instances.instance(i).setClassValue(classAttr.value(1));
}
return instances;
}
/**
* Update attribute stats using the supplied instance.
*
* @param updateInstance the instance for updating
* @param delete true if the values of the supplied instance are
* to be removed from the statistics
*/
protected void updateStats(Instance updateInstance,
boolean delete) {
if (m_attStats == null) {
m_attStats = new AttributeStats[m_numAttributes];
for (int i = 0; i < m_numAttributes; i++) {
m_attStats[i] = new AttributeStats();
if (m_clusterInstances.attribute(i).isNominal()) {
m_attStats[i].nominalCounts =
new int[m_clusterInstances.attribute(i).numValues()];
} else {
m_attStats[i].numericStats = new Stats();
}
}
}
for (int i = 0; i < m_numAttributes; i++) {
if (!updateInstance.isMissing(i)) {
double value = updateInstance.value(i);
if (m_clusterInstances.attribute(i).isNominal()) {
m_attStats[i].nominalCounts[(int) value] += (delete)
? (-1.0 * updateInstance.weight())
: updateInstance.weight();
m_attStats[i].totalCount += (delete)
? (-1.0 * updateInstance.weight())
: updateInstance.weight();
} else {
if (delete) {
m_attStats[i].numericStats.subtract(value,
updateInstance.weight());
} else {
m_attStats[i].numericStats.add(value, updateInstance.weight());
}
}
}
}
m_totalInstances += (delete)
? (-1.0 * updateInstance.weight())
: (updateInstance.weight());
}
/**
* Removes columns that are all missing from the data
*
* @param instances the instances
* @return a new set of instances with all missing columns removed
* @throws Exception if something goes wrong
*/
protected Instances removeMissingColumns(Instances instances)
throws Exception {
int numInstances = instances.numInstances();
StringBuffer deleteString = new StringBuffer();
int removeCount = 0;
boolean first = true;
int maxCount = 0;
for (int i = 0; i < instances.numAttributes(); i++) {
AttributeStats as = instances.attributeStats(i);
if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
// see if we can decrease this by looking for the most frequent value
int[] counts = as.nominalCounts;
if (counts[Utils.maxIndex(counts)] > maxCount) {
maxCount = counts[Utils.maxIndex(counts)];
}
}
if (as.missingCount == numInstances) {
if (first) {
deleteString.append((i + 1));
first = false;
} else {
deleteString.append("," + (i + 1));
}
removeCount++;
}
}
if (m_verbose) {
System.err.println("Removed : " + removeCount
+ " columns with all missing " + "values.");
}
if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
m_upperBoundMinSupport = (double) maxCount / (double) numInstances;
if (m_verbose) {
System.err.println("Setting upper bound min support to : "
+ m_upperBoundMinSupport);
}
}
if (deleteString.toString().length() > 0) {
Remove af = new Remove();
af.setAttributeIndices(deleteString.toString());
af.setInvertSelection(false);
af.setInputFormat(instances);
Instances newInst = Filter.useFilter(instances, af);
return newInst;
}
return instances;
}
/**
* Signify that this batch of input to the filter is finished.
*
* @return true if there are instances pending output
* @throws Exception if no input format defined
*/
public boolean batchFinished() throws Exception {
if (getInputFormat() == null) {
throw new IllegalStateException("No input instance format defined");
}
if (m_removeFilter == null) {
// establish attributes to remove from first batch
Instances toFilter = getInputFormat();
int[] attsToDelete = new int[toFilter.numAttributes()];
int numToDelete = 0;
for(int i = 0; i < toFilter.numAttributes(); i++) {
if (i==toFilter.classIndex()) continue; // skip class
AttributeStats stats = toFilter.attributeStats(i);
if (stats.missingCount == toFilter.numInstances()) {
attsToDelete[numToDelete++] = i;
} else if (stats.distinctCount < 2) {
// remove constant attributes
attsToDelete[numToDelete++] = i;
} else if (toFilter.attribute(i).isNominal()) {
// remove nominal attributes that vary too much
double variancePercent = (double) stats.distinctCount
/ (double)(stats.totalCount - stats.missingCount) * 100.0;
if (variancePercent > m_maxVariancePercentage) {
attsToDelete[numToDelete++] = i;
}
}
}
int[] finalAttsToDelete = new int[numToDelete];
System.arraycopy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete);
m_removeFilter = new Remove();
m_removeFilter.setAttributeIndicesArray(finalAttsToDelete);
m_removeFilter.setInvertSelection(false);
m_removeFilter.setInputFormat(toFilter);
for (int i = 0; i < toFilter.numInstances(); i++) {
m_removeFilter.input(toFilter.instance(i));
}
m_removeFilter.batchFinished();
Instance processed;
Instances outputDataset = m_removeFilter.getOutputFormat();
// restore old relation name to hide attribute filter stamp
outputDataset.setRelationName(toFilter.relationName());
setOutputFormat(outputDataset);
while ((processed = m_removeFilter.output()) != null) {
processed.setDataset(outputDataset);
push(processed);
}
}
flushInput();
m_NewBatch = true;
return (numPendingOutput() != 0);
}
/**
* determines the values to retain, it is always at least 1
* and up to the maximum number of distinct values
*
* @param inst the Instances to determine the values from which are kept
*/
public void determineValues(Instances inst) {
int i;
AttributeStats stats;
int attIdx;
int min;
int max;
int count;
m_AttIndex.setUpper(inst.numAttributes() - 1);
attIdx = m_AttIndex.getIndex();
// init names
m_Values = new HashSet();
if (inst == null)
return;
// number of values to retain
stats = inst.attributeStats(attIdx);
if (m_Invert)
count = stats.nominalCounts.length - m_NumValues;
else
count = m_NumValues;
// out of bounds? -> fix
if (count < 1)
count = 1; // at least one value!
if (count > stats.nominalCounts.length)
count = stats.nominalCounts.length; // at max the existing values
// determine min/max occurences
Arrays.sort(stats.nominalCounts);
if (m_LeastValues) {
min = stats.nominalCounts[0];
max = stats.nominalCounts[count - 1];
}
else {
min = stats.nominalCounts[(stats.nominalCounts.length - 1) - count + 1];
max = stats.nominalCounts[stats.nominalCounts.length - 1];
}
// add values if they are inside min/max (incl. borders) and not more than count
stats = inst.attributeStats(attIdx);
for (i = 0; i < stats.nominalCounts.length; i++) {
if ( (stats.nominalCounts[i] >= min) && (stats.nominalCounts[i] <= max) && (m_Values.size() < count) )
m_Values.add(inst.attribute(attIdx).value(i));
}
}
/**
* here initialization and building, possible iterations will happen
*
* @throws Exception if something goes wrong
*/
@Override
protected void build() throws Exception {
AttributeStats stats;
int i;
// determine number of features to be selected
m_KValue = getNumFeatures();
if (m_KValue < 1)
m_KValue = (int) Utils.log2(m_Trainset.numAttributes()) + 1;
// determine class distribution
m_ClassDistribution = new double[2];
stats = m_Trainset.attributeStats(m_Trainset.classIndex());
for (i = 0; i < 2; i++) {
if (stats.totalCount > 0)
m_ClassDistribution[i] = stats.nominalCounts[i] / stats.totalCount;
else
m_ClassDistribution[i] = 0;
}
// the number of instances added to the training set in each iteration
m_InstancesPerIteration = (double) m_Testset.numInstances()
/ getFolds();
if (getDebug())
System.out.println("InstancesPerIteration: " + m_InstancesPerIteration);
// build list of sorted test instances
m_List = new RankedList(m_Testset, m_ClassDistribution);
// build classifier
m_Random = new Random(getSeed());
for (i = 0; i <= getFolds(); i++) {
if (getVerbose()) {
if (getCutOff() > 0)
System.out.println( "\nFold " + i + "/" + getFolds()
+ " (CutOff at " + getCutOff() + ")");
else
System.out.println("\nFold " + i + "/" + getFolds());
}
buildTrainSet(i);
buildClassifier();
// cutoff of folds reached?
if ( (i > 0) && (i == getCutOff()) )
break;
}
}