下面列出了怎么用weka.core.FastVector的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Inserts a class index into the tree.
*
* @param classIndex the class index to insert
*/
protected void insertClassIndex(int classIndex) {
// Create new nodes
NDTree right = new NDTree();
if (m_left != null) {
m_right.m_parent = right;
m_left.m_parent = right;
right.m_right = m_right;
right.m_left = m_left;
}
m_right = right;
m_right.m_indices = (FastVector)m_indices.copy();
m_right.m_parent = this;
m_left = new NDTree();
m_left.insertClassIndexAtNode(classIndex);
m_left.m_parent = this;
// Propagate class Index
propagateClassIndex(classIndex);
}
/** space out set of nodes evenly between left and right most node in the list
* @param nodes list of indexes of nodes to space out
*/
public void spaceHorizontal(FastVector nodes) {
// update undo stack
if (m_bNeedsUndoAction) {
addUndoAction(new spaceHorizontalAction(nodes));
}
int nMinX = -1;
int nMaxX = -1;
for (int iNode = 0; iNode < nodes.size(); iNode++) {
int nX = getPositionX((Integer) nodes.elementAt(iNode));
if (nX < nMinX || iNode == 0) {
nMinX = nX;
}
if (nX > nMaxX || iNode == 0) {
nMaxX = nX;
}
}
for (int iNode = 0; iNode < nodes.size(); iNode++) {
int nNode = (Integer) nodes.elementAt(iNode);
m_nPositionX.setElementAt((int) (nMinX + iNode * (nMaxX - nMinX) / (nodes.size() - 1.0)), nNode);
}
}
/**
* generates a consequence of length 1 for a class association rule.
* @param instances the instances under consideration
* @return FastVector with consequences of length 1
*/
public static FastVector singleConsequence(Instances instances){
ItemSet consequence;
FastVector consequences = new FastVector();
for (int j = 0; j < (instances.classAttribute()).numValues(); j++) {
consequence = new ItemSet(instances.numInstances());
int[] consequenceItems = new int[instances.numAttributes()];
consequence.setItem(consequenceItems);
for (int k = 0; k < instances.numAttributes(); k++)
consequence.setItemAt(-1,k);
consequence.setItemAt(j,instances.classIndex());
consequences.addElement(consequence);
}
return consequences;
}
/**
* generates a consequence of length 1 for an association rule.
* @param instances the instances under consideration
* @param attNum an item that does not occur in the premise
* @param consequences FastVector that possibly already contains other consequences of length 1
* @return FastVector with consequences of length 1
*/
public static FastVector singleConsequence(Instances instances, int attNum, FastVector consequences){
ItemSet consequence;
for (int i = 0; i < instances.numAttributes(); i++) {
if( i == attNum){
for (int j = 0; j < instances.attribute(i).numValues(); j++) {
consequence = new ItemSet(instances.numInstances());
consequence.m_items = new int[instances.numAttributes()];
for (int k = 0; k < instances.numAttributes(); k++)
consequence.m_items[k] = -1;
consequence.m_items[i] = j;
consequences.addElement(consequence);
}
}
}
return consequences;
}
/**
* Converts the header info of the given set of instances into a set of item
* sets (singletons). The ordering of values in the header file determines the
* lexicographic order. Each item set knows its class label.
*
* @return a set of item sets, each containing a single item
* @param instancesNoClass instances without the class attribute
* @param classes the values of the class attribute sorted according to
* instances
* @exception Exception if singletons can't be generated successfully
*/
public static FastVector singletons(Instances instancesNoClass,
Instances classes) throws Exception {
FastVector cSet, setOfItemSets = new FastVector();
LabeledItemSet current;
// make singletons
for (int i = 0; i < instancesNoClass.numAttributes(); i++) {
if (instancesNoClass.attribute(i).isNumeric())
throw new Exception("Can't handle numeric attributes!");
for (int j = 0; j < instancesNoClass.attribute(i).numValues(); j++) {
for (int k = 0; k < (classes.attribute(0)).numValues(); k++) {
current = new LabeledItemSet(instancesNoClass.numInstances(), k);
current.m_items = new int[instancesNoClass.numAttributes()];
for (int l = 0; l < instancesNoClass.numAttributes(); l++)
current.m_items[l] = -1;
current.m_items[i] = j;
setOfItemSets.addElement(current);
}
}
}
return setOfItemSets;
}
/**
* Tests the ThresholdCurve generation from the command line.
* The classifier is currently hardcoded. Pipe in an arff file.
*
* @param args currently ignored
*/
public static void main(String [] args) {
try {
Instances inst = new Instances(new java.io.InputStreamReader(System.in));
if (false) {
System.out.println(ThresholdCurve.getNPointPrecision(inst, 11));
} else {
inst.setClassIndex(inst.numAttributes() - 1);
ThresholdCurve tc = new ThresholdCurve();
EvaluationUtils eu = new EvaluationUtils();
Classifier classifier = new weka.classifiers.functions.Logistic();
FastVector predictions = new FastVector();
for (int i = 0; i < 2; i++) { // Do two runs.
eu.setSeed(i);
predictions.appendElements(eu.getCVPredictions(classifier, inst, 10));
//System.out.println("\n\n\n");
}
Instances result = tc.getCurve(predictions);
System.out.println(result);
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* Method that finds all class association rules.
*
* @throws Exception if an attribute is numeric
*/
private void findCarRulesQuickly() throws Exception {
FastVector[] rules;
// Build rules
for (int j = 0; j < m_Ls.size(); j++) {
FastVector currentLabeledItemSets = (FastVector) m_Ls.elementAt(j);
Enumeration enumLabeledItemSets = currentLabeledItemSets.elements();
while (enumLabeledItemSets.hasMoreElements()) {
LabeledItemSet currentLabeledItemSet = (LabeledItemSet) enumLabeledItemSets
.nextElement();
rules = currentLabeledItemSet.generateRules(m_minMetric, false);
for (int k = 0; k < rules[0].size(); k++) {
m_allTheRules[0].addElement(rules[0].elementAt(k));
m_allTheRules[1].addElement(rules[1].elementAt(k));
m_allTheRules[2].addElement(rules[2].elementAt(k));
}
}
}
}
/**
* Set the output format. Changes the format of the specified date
* attribute.
*/
private void setOutputFormat() {
// Create new attributes
FastVector newAtts = new FastVector(getInputFormat().numAttributes());
for (int j = 0; j < getInputFormat().numAttributes(); j++) {
Attribute att = getInputFormat().attribute(j);
if (j == m_AttIndex.getIndex()) {
newAtts.addElement(new Attribute(att.name(), getDateFormat().toPattern()));
} else {
newAtts.addElement(att.copy());
}
}
// Create new header
Instances newData = new Instances(getInputFormat().relationName(), newAtts, 0);
newData.setClassIndex(getInputFormat().classIndex());
m_OutputAttribute = newData.attribute(m_AttIndex.getIndex());
setOutputFormat(newData);
}
DelValueAction(int nTargetNode, String sValue) {
try {
m_nTargetNode = nTargetNode;
m_sValue = sValue;
m_att = m_Instances.attribute(nTargetNode);
SerializedObject so = new SerializedObject(m_Distributions[nTargetNode]);
m_CPT = (Estimator[]) so.getObject();
;
m_children = new FastVector();
for (int iNode = 0; iNode < getNrOfNodes(); iNode++) {
if (m_ParentSets[iNode].contains(nTargetNode)) {
m_children.addElement(iNode);
}
}
m_childAtts = new Estimator[m_children.size()][];
for (int iChild = 0; iChild < m_children.size(); iChild++) {
int nChild = (Integer) m_children.elementAt(iChild);
m_childAtts[iChild] = m_Distributions[nChild];
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* Extracts the data sequences out of the original data set according to
* their sequence id attribute, which is removed after extraction.
*
* @param originalDataSet the original data set
* @param dataSeqID the squence ID to use
* @return set of distinct data sequences
*/
protected FastVector extractDataSequences (Instances originalDataSet, int dataSeqID) {
FastVector dataSequences = new FastVector();
int firstInstance = 0;
int lastInstance = 0;
Attribute seqIDAttribute = originalDataSet.attribute(dataSeqID);
for (int i = 0; i < seqIDAttribute.numValues(); i++) {
double sequenceID = originalDataSet.instance(firstInstance).value(dataSeqID);
while (lastInstance < originalDataSet.numInstances()
&& sequenceID == originalDataSet.instance(lastInstance).value(dataSeqID)) {
lastInstance++;
}
Instances dataSequence = new Instances(originalDataSet, firstInstance, (lastInstance)-firstInstance);
dataSequence.deleteAttributeAt(dataSeqID);
dataSequences.addElement(dataSequence);
firstInstance = lastInstance;
}
return dataSequences;
}
/**
* Calculates the cumulative margin distribution for the set of
* predictions, returning the result as a set of Instances. The
* structure of these Instances is as follows:<p> <ul>
* <li> <b>Margin</b> contains the margin value (which should be plotted
* as an x-coordinate)
* <li> <b>Current</b> contains the count of instances with the current
* margin (plot as y axis)
* <li> <b>Cumulative</b> contains the count of instances with margin
* less than or equal to the current margin (plot as y axis)
* </ul> <p>
*
* @return datapoints as a set of instances, null if no predictions
* have been made.
*/
public Instances getCurve(FastVector predictions) {
if (predictions.size() == 0) {
return null;
}
Instances insts = makeHeader();
double [] margins = getMargins(predictions);
int [] sorted = Utils.sort(margins);
int binMargin = 0;
int totalMargin = 0;
insts.add(makeInstance(-1, binMargin, totalMargin));
for (int i = 0; i < sorted.length; i++) {
double current = margins[sorted[i]];
double weight = ((NominalPrediction)predictions.elementAt(sorted[i]))
.weight();
totalMargin += weight;
binMargin += weight;
if (true) {
insts.add(makeInstance(current, binMargin, totalMargin));
binMargin = 0;
}
}
return insts;
}
/**
* Updates the support count of a set of Sequence candidates according to a
* given set of data sequences.
*
* @param candidates the set of candidates
* @param dataSequences the set of data sequences
*/
public static void updateSupportCount(FastVector candidates, FastVector dataSequences) {
Enumeration canEnumeration = candidates.elements();
while(canEnumeration.hasMoreElements()){
Enumeration dataSeqEnumeration = dataSequences.elements();
Sequence candidate = (Sequence) canEnumeration.nextElement();
while(dataSeqEnumeration.hasMoreElements()) {
Instances dataSequence = (Instances) dataSeqEnumeration.nextElement();
if (candidate.isSubsequenceOf(dataSequence)) {
candidate.setSupportCount(candidate.getSupportCount() + 1);
}
}
}
}
/**
* Set up the header for the PC->original space dataset
*
* @return the output format
* @throws Exception if something goes wrong
*/
private Instances setOutputFormatOriginal() throws Exception {
FastVector attributes = new FastVector();
for (int i = 0; i < m_numAttribs; i++) {
String att = m_trainInstances.attribute(i).name();
attributes.addElement(new Attribute(att));
}
if (m_hasClass) {
attributes.addElement(m_trainHeader.classAttribute().copy());
}
Instances outputFormat =
new Instances(m_trainHeader.relationName()+"->PC->original space",
attributes, 0);
// set the class to be the last attribute if necessary
if (m_hasClass) {
outputFormat.setClassIndex(outputFormat.numAttributes()-1);
}
return outputFormat;
}
/**
* Creates the weka data set for clustering of samples
*
* @param rawData Data extracted from selected Raw data files and rows.
* @return Weka library data set
*/
private Instances createSampleWekaDataset(double[][] rawData) {
FastVector attributes = new FastVector();
for (int i = 0; i < rawData[0].length; i++) {
String varName = "Var" + i;
Attribute var = new Attribute(varName);
attributes.addElement(var);
}
if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
Attribute name = new Attribute("name", (FastVector) null);
attributes.addElement(name);
}
Instances data = new Instances("Dataset", attributes, 0);
for (int i = 0; i < rawData.length; i++) {
double[] values = new double[data.numAttributes()];
System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);
if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
values[data.numAttributes() - 1] =
data.attribute("name").addStringValue(this.selectedRawDataFiles[i].getName());
}
Instance inst = new SparseInstance(1.0, values);
data.add(inst);
}
return data;
}
/**
* Sets the format of the input instances.
*
* @param instanceInfo an Instances object containing the input
* instance structure (any instances contained in the object are
* ignored - only the structure is required).
* @return true if the outputFormat may be collected immediately
* @throws Exception if the input format can't be set
* successfully
*/
public boolean setInputFormat(Instances instanceInfo)
throws Exception {
if (instanceInfo.attribute(0).type()!= Attribute.NOMINAL) {
throw new Exception("The first attribute type of the original propositional instance dataset must be Nominal!");
}
super.setInputFormat(instanceInfo);
/* create a new output format (multi-instance format) */
Instances newData = instanceInfo.stringFreeStructure();
Attribute attBagIndex = (Attribute) newData.attribute(0).copy();
Attribute attClass = (Attribute) newData.classAttribute().copy();
// remove the bagIndex attribute
newData.deleteAttributeAt(0);
// remove the class attribute
newData.setClassIndex(-1);
newData.deleteAttributeAt(newData.numAttributes() - 1);
FastVector attInfo = new FastVector(3);
attInfo.addElement(attBagIndex);
attInfo.addElement(new Attribute("bag", newData)); // relation-valued attribute
attInfo.addElement(attClass);
Instances data = new Instances("Multi-Instance-Dataset", attInfo, 0);
data.setClassIndex(data.numAttributes() - 1);
super.setOutputFormat(data.stringFreeStructure());
m_BagStringAtts = new StringLocator(data.attribute(1).relation());
m_BagRelAtts = new RelationalLocator(data.attribute(1).relation());
return true;
}
Instances formatIntervalInstances(Instances data){
//3 stats for whole subseries, start and end point, 3 stats per interval
int numFeatures=(3+2+3*numIntervals);
//Set up instances size and format.
FastVector atts=new FastVector();
String name;
for(int j=0;j<numFeatures;j++){
name = "F"+j;
atts.addElement(new Attribute(name));
}
//Get the class values as a fast vector
Attribute target =data.attribute(data.classIndex());
FastVector vals=new FastVector(target.numValues());
for(int j=0;j<target.numValues();j++)
vals.addElement(target.value(j));
atts.addElement(new Attribute(data.attribute(data.classIndex()).name(),vals));
//create blank instances with the correct class value
Instances result = new Instances("SubsequenceIntervals",atts,data.numInstances());
result.setClassIndex(result.numAttributes()-1);
for(int i=0;i<data.numInstances();i++){
double cval=data.instance(i).classValue();
for(int j=0;j<numSubSeries;j++){
DenseInstance in=new DenseInstance(result.numAttributes());
in.setValue(result.numAttributes()-1,cval);
result.add(in);
}
}
return result;
}
/**
* Generate random connected Bayesian network with discrete nodes
* having all the same cardinality.
*
* @throws Exception if something goes wrong
*/
public void generateRandomNetwork () throws Exception {
if (m_otherBayesNet == null) {
// generate from scratch
Init(m_nNrOfNodes, m_nCardinality);
generateRandomNetworkStructure(m_nNrOfNodes, m_nNrOfArcs);
generateRandomDistributions(m_nNrOfNodes, m_nCardinality);
} else {
// read from file, just copy parent sets and distributions
m_nNrOfNodes = m_otherBayesNet.getNrOfNodes();
m_ParentSets = m_otherBayesNet.getParentSets();
m_Distributions = m_otherBayesNet.getDistributions();
random = new Random(m_nSeed);
// initialize m_Instances
FastVector attInfo = new FastVector(m_nNrOfNodes);
// generate value strings
for (int iNode = 0; iNode < m_nNrOfNodes; iNode++) {
int nValues = m_otherBayesNet.getCardinality(iNode);
FastVector nomStrings = new FastVector(nValues + 1);
for (int iValue = 0; iValue < nValues; iValue++) {
nomStrings.addElement(m_otherBayesNet.getNodeValue(iNode, iValue));
}
Attribute att = new Attribute(m_otherBayesNet.getNodeName(iNode), nomStrings);
attInfo.addElement(att);
}
m_Instances = new Instances(m_otherBayesNet.getName(), attInfo, 100);
m_Instances.setClassIndex(m_nNrOfNodes - 1);
}
}
/**
* Delete nodes with indexes in selection from the network, updating instances, parentsets,
* distributions Conditional distributions are condensed by taking the
* values for the target node to be its first value. Used for manual
* manipulation of the Bayesian network.
*
* @param nodes
* array of indexes of nodes to delete.
* @throws Exception
*/
public void deleteSelection(FastVector nodes) {
// sort before proceeding
for (int i = 0; i < nodes.size(); i++) {
for (int j = i + 1; j < nodes.size(); j++) {
if ((Integer) nodes.elementAt(i) > (Integer) nodes.elementAt(j)) {
int h = (Integer) nodes.elementAt(i);
nodes.setElementAt(nodes.elementAt(j), i);
nodes.setElementAt(h, j);
}
}
}
// update undo stack
if (m_bNeedsUndoAction) {
addUndoAction(new DeleteSelectionAction(nodes));
}
boolean bNeedsUndoAction = m_bNeedsUndoAction;
m_bNeedsUndoAction = false;
try {
for (int iNode = nodes.size() - 1; iNode >= 0; iNode--) {
deleteNode((Integer) nodes.elementAt(iNode));
}
} catch (Exception e) {
e.printStackTrace();
}
m_bNeedsUndoAction = bNeedsUndoAction;
}
/**
* Checks whether nominal schemes can handle more than two classes.
* If a scheme is only designed for two-class problems it should
* throw an appropriate exception for multi-class problems.
*
* @param nominalPredictor if true use nominal predictor attributes
* @param numericPredictor if true use numeric predictor attributes
* @param stringPredictor if true use string predictor attributes
* @param datePredictor if true use date predictor attributes
* @param relationalPredictor if true use relational predictor attributes
* @param multiInstance whether multi-instance is needed
* @param numClasses the number of classes to test
* @return index 0 is true if the test was passed, index 1 is true if test
* was acceptable
*/
protected boolean[] canHandleNClasses(
boolean nominalPredictor,
boolean numericPredictor,
boolean stringPredictor,
boolean datePredictor,
boolean relationalPredictor,
boolean multiInstance,
int numClasses) {
print("more than two class problems");
printAttributeSummary(
nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, Attribute.NOMINAL);
print("...");
FastVector accepts = new FastVector();
accepts.addElement("number");
accepts.addElement("class");
int numTrain = getNumInstances(), missingLevel = 0;
boolean predictorMissing = false, classMissing = false;
return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
datePredictor, relationalPredictor,
multiInstance,
Attribute.NOMINAL,
missingLevel, predictorMissing, classMissing,
numTrain, numClasses,
accepts);
}
/**
* Generates a new rule for the decision list
* and classifies the new example.
*
* @param random random number generator
* @param example the instance to classify
* @return a list of tests
* @throws Exception if dataset format not defined
*/
private FastVector generateTestList(Random random, Instance example)
throws Exception {
Instances format = getDatasetFormat();
if (format == null)
throw new Exception("Dataset format not defined.");
int numTests = getNumAttributes() - getNumIrrelevant();
FastVector TestList = new FastVector(numTests);
boolean[] irrelevant = getAttList_Irr();
for (int i = 0; i < getNumAttributes(); i++) {
if (!irrelevant[i]) {
Test newTest = null;
Attribute att = example.attribute(i);
if (att.isNumeric()) {
double newSplit = random.nextDouble();
boolean newNot = newSplit < example.value(i);
newTest = new Test(i, newSplit, format, newNot);
} else {
newTest = new Test(i, example.value(i), format, false);
}
TestList.addElement (newTest);
}
}
return TestList;
}
/**
* Checks whether the scheme can handle zero training instances.
*
* @param nominalPredictor if true use nominal predictor attributes
* @param numericPredictor if true use numeric predictor attributes
* @param stringPredictor if true use string predictor attributes
* @param datePredictor if true use date predictor attributes
* @param relationalPredictor if true use relational predictor attributes
* @param multiInstance whether multi-instance is needed
* @return index 0 is true if the test was passed, index 1 is true if test
* was acceptable
*/
protected boolean[] canHandleZeroTraining(
boolean nominalPredictor,
boolean numericPredictor,
boolean stringPredictor,
boolean datePredictor,
boolean relationalPredictor,
boolean multiInstance) {
print("handle zero training instances");
printAttributeSummary(
nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance);
print("...");
FastVector accepts = new FastVector();
accepts.addElement("train");
accepts.addElement("value");
int numTrain = 0, missingLevel = 0;
boolean predictorMissing = false;
return runBasicTest(
nominalPredictor, numericPredictor, stringPredictor,
datePredictor, relationalPredictor,
multiInstance,
missingLevel, predictorMissing,
numTrain,
accepts);
}
/**
* Checks whether the scheme can handle class attributes as Nth attribute.
*
* @param nominalPredictor if true use nominal predictor attributes
* @param numericPredictor if true use numeric predictor attributes
* @param stringPredictor if true use string predictor attributes
* @param datePredictor if true use date predictor attributes
* @param relationalPredictor if true use relational predictor attributes
* @param multiInstance whether multi-instance is needed
* @param classType the class type (NUMERIC, NOMINAL, etc.)
* @param classIndex the index of the class attribute (0-based, -1 means last attribute)
* @return index 0 is true if the test was passed, index 1 is true if test
* was acceptable
* @see TestInstances#CLASS_IS_LAST
*/
protected boolean[] canHandleClassAsNthAttribute(
boolean nominalPredictor,
boolean numericPredictor,
boolean stringPredictor,
boolean datePredictor,
boolean relationalPredictor,
boolean multiInstance,
int classType,
int classIndex) {
if (classIndex == TestInstances.CLASS_IS_LAST)
print("class attribute as last attribute");
else
print("class attribute as " + (classIndex + 1) + ". attribute");
printAttributeSummary(
nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
print("...");
FastVector accepts = new FastVector();
int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
boolean predictorMissing = false, classMissing = false;
return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
datePredictor, relationalPredictor,
multiInstance,
classType,
classIndex,
missingLevel, predictorMissing, classMissing,
numTrain, numClasses,
accepts);
}
/**
* Collects the classifier predictions using the specified evaluation method.
*
* @param instances the set of <code>Instances</code> to generate
* predictions for.
* @param mode the evaluation mode.
* @param numFolds the number of folds to use if not evaluating on the
* full training set.
* @return a <code>FastVector</code> containing the predictions.
* @throws Exception if an error occurs generating the predictions.
*/
protected FastVector getPredictions(Instances instances, int mode, int numFolds)
throws Exception {
EvaluationUtils eu = new EvaluationUtils();
eu.setSeed(m_Seed);
switch (mode) {
case EVAL_TUNED_SPLIT:
Instances trainData = null, evalData = null;
Instances data = new Instances(instances);
Random random = new Random(m_Seed);
data.randomize(random);
data.stratify(numFolds);
// Make sure that both subsets contain at least one positive instance
for (int subsetIndex = 0; subsetIndex < numFolds; subsetIndex++) {
trainData = data.trainCV(numFolds, subsetIndex, random);
evalData = data.testCV(numFolds, subsetIndex);
if (checkForInstance(trainData) && checkForInstance(evalData)) {
break;
}
}
return eu.getTrainTestPredictions(m_Classifier, trainData, evalData);
case EVAL_TRAINING_SET:
return eu.getTrainTestPredictions(m_Classifier, instances, instances);
case EVAL_CROSS_VALIDATION:
return eu.getCVPredictions(m_Classifier, instances, numFolds);
default:
throw new RuntimeException("Unrecognized evaluation mode");
}
}
/**
* Deletes all item sets that don't have minimum support and have more than
* maximum support
*
* @return the reduced set of item sets
* @param maxSupport the maximum support
* @param itemSets the set of item sets to be pruned
* @param minSupport the minimum number of transactions to be covered
*/
public static FastVector deleteItemSets(FastVector itemSets, int minSupport,
int maxSupport) {
FastVector newVector = new FastVector(itemSets.size());
for (int i = 0; i < itemSets.size(); i++) {
LabeledItemSet current = (LabeledItemSet) itemSets.elementAt(i);
if ((current.m_ruleSupCounter >= minSupport)
&& (current.m_ruleSupCounter <= maxSupport))
newVector.addElement(current);
}
return newVector;
}
/**
* Return a hashtable filled with the given item sets.
*
* @param itemSets the set of item sets to be used for filling the hash table
* @param initialSize the initial size of the hashtable
* @return the generated hashtable
*/
public static Hashtable getHashtable(FastVector itemSets, int initialSize) {
Hashtable hashtable = new Hashtable(initialSize);
for (int i = 0; i < itemSets.size(); i++) {
LabeledItemSet current = (LabeledItemSet) itemSets.elementAt(i);
hashtable.put(current, new Integer(current.m_classLabel));
}
return hashtable;
}
/**
* Prunes a set of (k)-item sets using the given (k-1)-item sets.
*
* @param toPrune the set of (k)-item sets to be pruned
* @param kMinusOne the (k-1)-item sets to be used for pruning
* @return the pruned set of item sets
*/
public static FastVector pruneItemSets(FastVector toPrune, Hashtable kMinusOne) {
FastVector newVector = new FastVector(toPrune.size());
int help, j;
for (int i = 0; i < toPrune.size(); i++) {
LabeledItemSet current = (LabeledItemSet) toPrune.elementAt(i);
for (j = 0; j < current.m_items.length; j++) {
if (current.m_items[j] != -1) {
help = current.m_items[j];
current.m_items[j] = -1;
if (kMinusOne.get(current) != null
&& (current.m_classLabel == (((Integer) kMinusOne.get(current))
.intValue())))
current.m_items[j] = help;
else {
current.m_items[j] = help;
break;
}
}
}
if (j == current.m_items.length)
newVector.addElement(current);
}
return newVector;
}
/**
* Updates counter of a specific item set
*
* @param itemSets an item sets
* @param instancesNoClass instances without the class attribute
* @param instancesClass the values of the class attribute sorted according to
* instances
*/
public static void upDateCounters(FastVector itemSets,
Instances instancesNoClass, Instances instancesClass) {
for (int i = 0; i < instancesNoClass.numInstances(); i++) {
Enumeration enu = itemSets.elements();
while (enu.hasMoreElements())
((LabeledItemSet) enu.nextElement()).upDateCounter(
instancesNoClass.instance(i), instancesClass.instance(i));
}
}
/**
* Sets the format of the input instances.
*
* @param instanceInfo an Instances object containing the input instance
* structure (any instances contained in the object are ignored - only the
* structure is required).
* @return true if the outputFormat may be collected immediately
* @throws Exception if the format couldn't be set successfully
*/
public boolean setInputFormat(Instances instanceInfo) throws Exception {
super.setInputFormat(instanceInfo);
m_SelectCols.setUpper(instanceInfo.numAttributes() - 1);
// Create the output buffer
FastVector attributes = new FastVector();
int outputClass = -1;
m_SelectedAttributes = m_SelectCols.getSelection();
for (int i = 0; i < m_SelectedAttributes.length; i++) {
int current = m_SelectedAttributes[i];
if (instanceInfo.classIndex() == current) {
outputClass = attributes.size();
}
Attribute keep = (Attribute)instanceInfo.attribute(current).copy();
attributes.addElement(keep);
}
//initInputLocators(instanceInfo, m_SelectedAttributes);
initInputLocators(getInputFormat(), m_SelectedAttributes);
Instances outputFormat = new Instances(instanceInfo.relationName(),
attributes, 0);
outputFormat.setClassIndex(outputClass);
setOutputFormat(outputFormat);
return true;
}
/**
* Builds a single rule learner with REP dealing with 2 classes.
* This rule learner always tries to predict the class with label
* m_Class.
*
* @param instances the training data
* @throws Exception if classifier can't be built successfully
*/
public void buildClassifier(Instances instances) throws Exception {
m_ClassAttribute = instances.classAttribute();
if (!m_ClassAttribute.isNominal())
throw new UnsupportedClassTypeException(" Only nominal class, please.");
if(instances.numClasses() != 2)
throw new Exception(" Only 2 classes, please.");
Instances data = new Instances(instances);
if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" No training data.");
data.deleteWithMissingClass();
if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" The class labels of all the training data are missing.");
if(data.numInstances() < m_Folds)
throw new Exception(" Not enough data for REP.");
m_Antds = new FastVector();
/* Split data into Grow and Prune*/
m_Random = new Random(m_Seed);
data.randomize(m_Random);
data.stratify(m_Folds);
Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
Instances pruneData=data.testCV(m_Folds, m_Folds-1);
grow(growData); // Build this rule
prune(pruneData); // Prune this rule
}
/**
* Checks whether the scheme can handle class attributes as Nth attribute.
*
* @param nominalPredictor if true use nominal predictor attributes
* @param numericPredictor if true use numeric predictor attributes
* @param stringPredictor if true use string predictor attributes
* @param datePredictor if true use date predictor attributes
* @param relationalPredictor if true use relational predictor attributes
* @param multiInstance whether multi-instance is needed
* @param classType the class type (NUMERIC, NOMINAL, etc.)
* @param classIndex the index of the class attribute (0-based, -1 means last attribute)
* @return index 0 is true if the test was passed, index 1 is true if test
* was acceptable
* @see TestInstances#CLASS_IS_LAST
*/
protected boolean[] canHandleClassAsNthAttribute(
boolean nominalPredictor,
boolean numericPredictor,
boolean stringPredictor,
boolean datePredictor,
boolean relationalPredictor,
boolean multiInstance,
int classType,
int classIndex) {
if (classIndex == TestInstances.CLASS_IS_LAST)
print("class attribute as last attribute");
else
print("class attribute as " + (classIndex + 1) + ". attribute");
printAttributeSummary(
nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
print("...");
FastVector accepts = new FastVector();
int numTrain = getNumInstances(), numClasses = 2,
missingLevel = 0;
boolean predictorMissing = false, classMissing = false;
return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
datePredictor, relationalPredictor,
multiInstance,
classType,
classIndex,
missingLevel, predictorMissing, classMissing,
numTrain, numClasses,
accepts);
}