类weka.core.FastVector源码实例Demo

下面列出了怎么用weka.core.FastVector的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: tsml   文件: ND.java
/**
    * Inserts a class index into the tree. 
    * 
    * @param classIndex the class index to insert
    */
   protected void insertClassIndex(int classIndex) {

     // Create new nodes
     NDTree right = new NDTree();
     if (m_left != null) {
m_right.m_parent = right;
m_left.m_parent = right;
right.m_right = m_right;
right.m_left = m_left;
     }
     m_right = right;
     m_right.m_indices = (FastVector)m_indices.copy();
     m_right.m_parent = this;
     m_left = new NDTree();
     m_left.insertClassIndexAtNode(classIndex);
     m_left.m_parent = this; 

     // Propagate class Index
     propagateClassIndex(classIndex);
   }
 
源代码2 项目: tsml   文件: EditableBayesNet.java
/** space out set of nodes evenly between left and right most node in the list
 * @param nodes list of indexes of nodes to space out
 */
public void spaceHorizontal(FastVector nodes) {
	// update undo stack
	if (m_bNeedsUndoAction) {
		addUndoAction(new spaceHorizontalAction(nodes));
	}
	int nMinX = -1;
	int nMaxX = -1;
	for (int iNode = 0; iNode < nodes.size(); iNode++) {
		int nX = getPositionX((Integer) nodes.elementAt(iNode));
		if (nX < nMinX || iNode == 0) {
			nMinX = nX;
		}
		if (nX > nMaxX || iNode == 0) {
			nMaxX = nX;
		}
	}
	for (int iNode = 0; iNode < nodes.size(); iNode++) {
		int nNode = (Integer) nodes.elementAt(iNode);
		m_nPositionX.setElementAt((int) (nMinX + iNode * (nMaxX - nMinX) / (nodes.size() - 1.0)), nNode);
	}
}
 
源代码3 项目: tsml   文件: CaRuleGeneration.java
/**
  * generates a consequence of length 1 for a class association rule.
  * @param instances the instances under consideration
  * @return FastVector with consequences of length 1
  */  
 public static FastVector singleConsequence(Instances instances){

   ItemSet consequence;
   FastVector consequences = new FastVector();

   for (int j = 0; j < (instances.classAttribute()).numValues(); j++) {
     consequence = new ItemSet(instances.numInstances());
     int[] consequenceItems = new int[instances.numAttributes()];
     consequence.setItem(consequenceItems);
     for (int k = 0; k < instances.numAttributes(); k++) 
consequence.setItemAt(-1,k);
     consequence.setItemAt(j,instances.classIndex());
     consequences.addElement(consequence);
   }
   return consequences;

 }
 
源代码4 项目: tsml   文件: RuleGeneration.java
/**
  * generates a consequence of length 1 for an association rule.
  * @param instances the instances under consideration
  * @param attNum an item that does not occur in the premise
  * @param consequences FastVector that possibly already contains other consequences of length 1
  * @return FastVector with consequences of length 1
  */  
 public static FastVector singleConsequence(Instances instances, int attNum, FastVector consequences){

   ItemSet consequence;

   for (int i = 0; i < instances.numAttributes(); i++) {
     if( i == attNum){
for (int j = 0; j < instances.attribute(i).numValues(); j++) {
  consequence = new ItemSet(instances.numInstances());
  consequence.m_items = new int[instances.numAttributes()];
  for (int k = 0; k < instances.numAttributes(); k++) 
    consequence.m_items[k] = -1;
  consequence.m_items[i] = j;
  consequences.addElement(consequence);
}
     }
   }
   return consequences;

 }
 
源代码5 项目: tsml   文件: LabeledItemSet.java
/**
 * Converts the header info of the given set of instances into a set of item
 * sets (singletons). The ordering of values in the header file determines the
 * lexicographic order. Each item set knows its class label.
 * 
 * @return a set of item sets, each containing a single item
 * @param instancesNoClass instances without the class attribute
 * @param classes the values of the class attribute sorted according to
 *          instances
 * @exception Exception if singletons can't be generated successfully
 */
public static FastVector singletons(Instances instancesNoClass,
    Instances classes) throws Exception {

  FastVector cSet, setOfItemSets = new FastVector();
  LabeledItemSet current;

  // make singletons
  for (int i = 0; i < instancesNoClass.numAttributes(); i++) {
    if (instancesNoClass.attribute(i).isNumeric())
      throw new Exception("Can't handle numeric attributes!");
    for (int j = 0; j < instancesNoClass.attribute(i).numValues(); j++) {
      for (int k = 0; k < (classes.attribute(0)).numValues(); k++) {
        current = new LabeledItemSet(instancesNoClass.numInstances(), k);
        current.m_items = new int[instancesNoClass.numAttributes()];
        for (int l = 0; l < instancesNoClass.numAttributes(); l++)
          current.m_items[l] = -1;
        current.m_items[i] = j;
        setOfItemSets.addElement(current);
      }
    }
  }
  return setOfItemSets;
}
 
源代码6 项目: tsml   文件: ThresholdCurve.java
/**
 * Tests the ThresholdCurve generation from the command line.
 * The classifier is currently hardcoded. Pipe in an arff file.
 *
 * @param args currently ignored
 */
public static void main(String [] args) {

  try {
    
    Instances inst = new Instances(new java.io.InputStreamReader(System.in));
    if (false) {
      System.out.println(ThresholdCurve.getNPointPrecision(inst, 11));
    } else {
      inst.setClassIndex(inst.numAttributes() - 1);
      ThresholdCurve tc = new ThresholdCurve();
      EvaluationUtils eu = new EvaluationUtils();
      Classifier classifier = new weka.classifiers.functions.Logistic();
      FastVector predictions = new FastVector();
      for (int i = 0; i < 2; i++) { // Do two runs.
        eu.setSeed(i);
        predictions.appendElements(eu.getCVPredictions(classifier, inst, 10));
        //System.out.println("\n\n\n");
      }
      Instances result = tc.getCurve(predictions);
      System.out.println(result);
    }
  } catch (Exception ex) {
    ex.printStackTrace();
  }
}
 
源代码7 项目: tsml   文件: Apriori.java
/**
 * Method that finds all class association rules.
 * 
 * @throws Exception if an attribute is numeric
 */
private void findCarRulesQuickly() throws Exception {

  FastVector[] rules;

  // Build rules
  for (int j = 0; j < m_Ls.size(); j++) {
    FastVector currentLabeledItemSets = (FastVector) m_Ls.elementAt(j);
    Enumeration enumLabeledItemSets = currentLabeledItemSets.elements();
    while (enumLabeledItemSets.hasMoreElements()) {
      LabeledItemSet currentLabeledItemSet = (LabeledItemSet) enumLabeledItemSets
          .nextElement();
      rules = currentLabeledItemSet.generateRules(m_minMetric, false);
      for (int k = 0; k < rules[0].size(); k++) {
        m_allTheRules[0].addElement(rules[0].elementAt(k));
        m_allTheRules[1].addElement(rules[1].elementAt(k));
        m_allTheRules[2].addElement(rules[2].elementAt(k));
      }
    }
  }
}
 
源代码8 项目: tsml   文件: ChangeDateFormat.java
/**
  * Set the output format. Changes the format of the specified date
  * attribute.
  */
 private void setOutputFormat() {
   
   // Create new attributes
   FastVector newAtts = new FastVector(getInputFormat().numAttributes());
   for (int j = 0; j < getInputFormat().numAttributes(); j++) {
     Attribute att = getInputFormat().attribute(j);
     if (j == m_AttIndex.getIndex()) {
newAtts.addElement(new Attribute(att.name(), getDateFormat().toPattern()));  
     } else {
newAtts.addElement(att.copy()); 
     }
   }
     
   // Create new header
   Instances newData = new Instances(getInputFormat().relationName(), newAtts, 0);
   newData.setClassIndex(getInputFormat().classIndex());
   m_OutputAttribute = newData.attribute(m_AttIndex.getIndex());
   setOutputFormat(newData);
 }
 
源代码9 项目: tsml   文件: EditableBayesNet.java
DelValueAction(int nTargetNode, String sValue) {
	try {
		m_nTargetNode = nTargetNode;
		m_sValue = sValue;
		m_att = m_Instances.attribute(nTargetNode);
		SerializedObject so = new SerializedObject(m_Distributions[nTargetNode]);
		m_CPT = (Estimator[]) so.getObject();
		;
		m_children = new FastVector();
		for (int iNode = 0; iNode < getNrOfNodes(); iNode++) {
			if (m_ParentSets[iNode].contains(nTargetNode)) {
				m_children.addElement(iNode);
			}
		}
		m_childAtts = new Estimator[m_children.size()][];
		for (int iChild = 0; iChild < m_children.size(); iChild++) {
			int nChild = (Integer) m_children.elementAt(iChild);
			m_childAtts[iChild] = m_Distributions[nChild];
		}
	} catch (Exception e) {
		e.printStackTrace();
	}
}
 
源代码10 项目: tsml   文件: GeneralizedSequentialPatterns.java
/**
  * Extracts the data sequences out of the original data set according to 
  * their sequence id attribute, which is removed after extraction.
  * 
  * @param originalDataSet 	the original data set
  * @param dataSeqID		the squence ID to use
  * @return 			set of distinct data sequences
  */
 protected FastVector extractDataSequences (Instances originalDataSet, int dataSeqID) {
   FastVector dataSequences = new FastVector();
   int firstInstance = 0;
   int lastInstance = 0;
   Attribute seqIDAttribute = originalDataSet.attribute(dataSeqID);

   for (int i = 0; i < seqIDAttribute.numValues(); i++) {
     double sequenceID = originalDataSet.instance(firstInstance).value(dataSeqID);
     while (lastInstance < originalDataSet.numInstances()
  && sequenceID == originalDataSet.instance(lastInstance).value(dataSeqID)) {
lastInstance++;
     }
     Instances dataSequence = new Instances(originalDataSet, firstInstance, (lastInstance)-firstInstance);
     dataSequence.deleteAttributeAt(dataSeqID);
     dataSequences.addElement(dataSequence);
     firstInstance = lastInstance;
   }
   return dataSequences;
 }
 
源代码11 项目: tsml   文件: MarginCurve.java
/**
 * Calculates the cumulative margin distribution for the set of
 * predictions, returning the result as a set of Instances. The
 * structure of these Instances is as follows:<p> <ul> 
 * <li> <b>Margin</b> contains the margin value (which should be plotted
 * as an x-coordinate) 
 * <li> <b>Current</b> contains the count of instances with the current 
 * margin (plot as y axis)
 * <li> <b>Cumulative</b> contains the count of instances with margin
 * less than or equal to the current margin (plot as y axis)
 * </ul> <p>
 *
 * @return datapoints as a set of instances, null if no predictions
 * have been made.  
 */
public Instances getCurve(FastVector predictions) {

  if (predictions.size() == 0) {
    return null;
  }

  Instances insts = makeHeader();
  double [] margins = getMargins(predictions);
  int [] sorted = Utils.sort(margins);
  int binMargin = 0;
  int totalMargin = 0;
  insts.add(makeInstance(-1, binMargin, totalMargin));
  for (int i = 0; i < sorted.length; i++) {
    double current = margins[sorted[i]];
    double weight = ((NominalPrediction)predictions.elementAt(sorted[i]))
      .weight();
    totalMargin += weight;
    binMargin += weight;
    if (true) {
      insts.add(makeInstance(current, binMargin, totalMargin));
      binMargin = 0;
    }
  }
  return insts;
}
 
源代码12 项目: tsml   文件: Sequence.java
/**
  * Updates the support count of a set of Sequence candidates according to a 
  * given set of data sequences.
  * 
  * @param candidates 		the set of candidates
  * @param dataSequences 	the set of data sequences
  */
 public static void updateSupportCount(FastVector candidates, FastVector dataSequences) {
   Enumeration canEnumeration = candidates.elements();

   while(canEnumeration.hasMoreElements()){
     Enumeration dataSeqEnumeration = dataSequences.elements();
     Sequence candidate = (Sequence) canEnumeration.nextElement();

     while(dataSeqEnumeration.hasMoreElements()) {
Instances dataSequence = (Instances) dataSeqEnumeration.nextElement();

if (candidate.isSubsequenceOf(dataSequence)) {
  candidate.setSupportCount(candidate.getSupportCount() + 1);
}
     }
   }
 }
 
源代码13 项目: tsml   文件: PrincipalComponents.java
/**
 * Set up the header for the PC->original space dataset
 * 
 * @return            the output format
 * @throws Exception  if something goes wrong
 */
private Instances setOutputFormatOriginal() throws Exception {
  FastVector attributes = new FastVector();
  
  for (int i = 0; i < m_numAttribs; i++) {
    String att = m_trainInstances.attribute(i).name();
    attributes.addElement(new Attribute(att));
  }
  
  if (m_hasClass) {
    attributes.addElement(m_trainHeader.classAttribute().copy());
  }

  Instances outputFormat = 
    new Instances(m_trainHeader.relationName()+"->PC->original space",
                  attributes, 0);
  
  // set the class to be the last attribute if necessary
  if (m_hasClass) {
    outputFormat.setClassIndex(outputFormat.numAttributes()-1);
  }

  return outputFormat;
}
 
源代码14 项目: mzmine3   文件: ClusteringTask.java
/**
 * Creates the weka data set for clustering of samples
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createSampleWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < rawData[0].length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < rawData.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);
    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      values[data.numAttributes() - 1] =
          data.attribute("name").addStringValue(this.selectedRawDataFiles[i].getName());
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}
 
源代码15 项目: tsml   文件: PropositionalToMultiInstance.java
/**
 * Sets the format of the input instances.
 *
 * @param instanceInfo an Instances object containing the input 
 * instance structure (any instances contained in the object are 
 * ignored - only the structure is required).
 * @return true if the outputFormat may be collected immediately
 * @throws Exception if the input format can't be set 
 * successfully
 */
public boolean setInputFormat(Instances instanceInfo) 
  throws Exception {

  if (instanceInfo.attribute(0).type()!= Attribute.NOMINAL) {
    throw new Exception("The first attribute type of the original propositional instance dataset must be Nominal!");
  }
  super.setInputFormat(instanceInfo);

  /* create a new output format (multi-instance format) */
  Instances newData = instanceInfo.stringFreeStructure();
  Attribute attBagIndex = (Attribute) newData.attribute(0).copy();
  Attribute attClass = (Attribute) newData.classAttribute().copy();
  // remove the bagIndex attribute
  newData.deleteAttributeAt(0);
  // remove the class attribute
  newData.setClassIndex(-1);
  newData.deleteAttributeAt(newData.numAttributes() - 1);

  FastVector attInfo = new FastVector(3); 
  attInfo.addElement(attBagIndex);
  attInfo.addElement(new Attribute("bag", newData)); // relation-valued attribute
  attInfo.addElement(attClass);
  Instances data = new Instances("Multi-Instance-Dataset", attInfo, 0); 
  data.setClassIndex(data.numAttributes() - 1);

  super.setOutputFormat(data.stringFreeStructure());

  m_BagStringAtts = new StringLocator(data.attribute(1).relation());
  m_BagRelAtts    = new RelationalLocator(data.attribute(1).relation());
  
  return true;
}
 
源代码16 项目: tsml   文件: TSBF.java
Instances formatIntervalInstances(Instances data){

        //3 stats for whole subseries, start and end point, 3 stats per interval
        int numFeatures=(3+2+3*numIntervals); 
        //Set up instances size and format. 
        FastVector atts=new FastVector();
        String name;
        for(int j=0;j<numFeatures;j++){
                name = "F"+j;
                atts.addElement(new Attribute(name));
        }
        //Get the class values as a fast vector			
        Attribute target =data.attribute(data.classIndex());
       FastVector vals=new FastVector(target.numValues());
        for(int j=0;j<target.numValues();j++)
                vals.addElement(target.value(j));
        atts.addElement(new Attribute(data.attribute(data.classIndex()).name(),vals));
//create blank instances with the correct class value                
        Instances result = new Instances("SubsequenceIntervals",atts,data.numInstances());
        result.setClassIndex(result.numAttributes()-1);
        for(int i=0;i<data.numInstances();i++){
            double cval=data.instance(i).classValue();
            for(int j=0;j<numSubSeries;j++){
                DenseInstance in=new DenseInstance(result.numAttributes());
                in.setValue(result.numAttributes()-1,cval);
                result.add(in);
            }
        }
        return result;
    }
 
源代码17 项目: tsml   文件: BayesNetGenerator.java
/** 
 * Generate random connected Bayesian network with discrete nodes
 * having all the same cardinality.
 * 
 * @throws Exception if something goes wrong
 */
public void generateRandomNetwork () throws Exception {
	if (m_otherBayesNet == null) {
		// generate from scratch
		Init(m_nNrOfNodes, m_nCardinality);
		generateRandomNetworkStructure(m_nNrOfNodes, m_nNrOfArcs);
		generateRandomDistributions(m_nNrOfNodes, m_nCardinality);
	} else {
		// read from file, just copy parent sets and distributions
		m_nNrOfNodes = m_otherBayesNet.getNrOfNodes();
		m_ParentSets = m_otherBayesNet.getParentSets();
		m_Distributions = m_otherBayesNet.getDistributions();


		random = new Random(m_nSeed);
		// initialize m_Instances
		FastVector attInfo = new FastVector(m_nNrOfNodes);
		// generate value strings

		for (int iNode = 0; iNode < m_nNrOfNodes; iNode++) {
			int nValues = m_otherBayesNet.getCardinality(iNode);
			FastVector nomStrings = new FastVector(nValues + 1);
			for (int iValue = 0; iValue < nValues; iValue++) {
				nomStrings.addElement(m_otherBayesNet.getNodeValue(iNode, iValue));
			}
			Attribute att = new Attribute(m_otherBayesNet.getNodeName(iNode), nomStrings);
			attInfo.addElement(att);
		}

		m_Instances = new Instances(m_otherBayesNet.getName(), attInfo, 100);
		m_Instances.setClassIndex(m_nNrOfNodes - 1);
	}
}
 
源代码18 项目: tsml   文件: EditableBayesNet.java
/**
 * Delete nodes with indexes in selection from the network, updating instances, parentsets,
 * distributions Conditional distributions are condensed by taking the
 * values for the target node to be its first value. Used for manual
 * manipulation of the Bayesian network.
 *
 * @param nodes
 *            array of indexes of nodes to delete.
 * @throws Exception
 */
public void deleteSelection(FastVector nodes) {
	// sort before proceeding
	for (int i = 0; i < nodes.size(); i++) {
		for (int j = i + 1; j < nodes.size(); j++) {
			if ((Integer) nodes.elementAt(i) > (Integer) nodes.elementAt(j)) {
				int h = (Integer) nodes.elementAt(i);
				nodes.setElementAt(nodes.elementAt(j), i);
				nodes.setElementAt(h, j);
			}
		}
	}
	// update undo stack
	if (m_bNeedsUndoAction) {
		addUndoAction(new DeleteSelectionAction(nodes));
	}
	boolean bNeedsUndoAction = m_bNeedsUndoAction;
	m_bNeedsUndoAction = false;
	try {
		for (int iNode = nodes.size() - 1; iNode >= 0; iNode--) {
			deleteNode((Integer) nodes.elementAt(iNode));
		}
	} catch (Exception e) {
		e.printStackTrace();
	}
	m_bNeedsUndoAction = bNeedsUndoAction;
}
 
源代码19 项目: tsml   文件: CheckAttributeSelection.java
/**
 * Checks whether nominal schemes can handle more than two classes.
 * If a scheme is only designed for two-class problems it should
 * throw an appropriate exception for multi-class problems.
 *
 * @param nominalPredictor if true use nominal predictor attributes
 * @param numericPredictor if true use numeric predictor attributes
 * @param stringPredictor if true use string predictor attributes
 * @param datePredictor if true use date predictor attributes
 * @param relationalPredictor if true use relational predictor attributes
 * @param multiInstance whether multi-instance is needed
 * @param numClasses the number of classes to test
 * @return index 0 is true if the test was passed, index 1 is true if test 
 *         was acceptable
 */
protected boolean[] canHandleNClasses(
    boolean nominalPredictor,
    boolean numericPredictor, 
    boolean stringPredictor, 
    boolean datePredictor,
    boolean relationalPredictor,
    boolean multiInstance,
    int numClasses) {
  
  print("more than two class problems");
  printAttributeSummary(
      nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, Attribute.NOMINAL);
  print("...");
  FastVector accepts = new FastVector();
  accepts.addElement("number");
  accepts.addElement("class");
  int numTrain = getNumInstances(), missingLevel = 0;
  boolean predictorMissing = false, classMissing = false;
  
  return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, 
                      datePredictor, relationalPredictor, 
                      multiInstance,
                      Attribute.NOMINAL,
                      missingLevel, predictorMissing, classMissing,
                      numTrain, numClasses, 
                      accepts);
}
 
源代码20 项目: tsml   文件: RDG1.java
/**
 * Generates a new rule for the decision list
 * and classifies the new example.
 *
 * @param random random number generator
 * @param example the instance to classify
 * @return a list of tests
 * @throws Exception if dataset format not defined
 */
private FastVector generateTestList(Random random, Instance example) 
 throws Exception {

  Instances format = getDatasetFormat();
  if (format == null) 
    throw new Exception("Dataset format not defined.");

  int numTests = getNumAttributes() - getNumIrrelevant();
  FastVector TestList = new FastVector(numTests);
  boolean[] irrelevant = getAttList_Irr();

  for (int i = 0; i < getNumAttributes(); i++) {
    if (!irrelevant[i]) {
      Test newTest = null;
      Attribute att = example.attribute(i);
      if (att.isNumeric()) {
        double newSplit = random.nextDouble();
        boolean newNot = newSplit < example.value(i);
        newTest = new Test(i, newSplit, format, newNot);
      } else {
        newTest = new Test(i, example.value(i), format, false);
      }
    TestList.addElement (newTest);     
    }
  }
  
  return TestList;
}
 
源代码21 项目: tsml   文件: CheckClusterer.java
/**
 * Checks whether the scheme can handle zero training instances.
 *
 * @param nominalPredictor if true use nominal predictor attributes
 * @param numericPredictor if true use numeric predictor attributes
 * @param stringPredictor if true use string predictor attributes
 * @param datePredictor if true use date predictor attributes
 * @param relationalPredictor if true use relational predictor attributes
 * @param multiInstance whether multi-instance is needed
 * @return index 0 is true if the test was passed, index 1 is true if test 
 *         was acceptable
 */
protected boolean[] canHandleZeroTraining(
    boolean nominalPredictor,
    boolean numericPredictor, 
    boolean stringPredictor, 
    boolean datePredictor,
    boolean relationalPredictor,
    boolean multiInstance) {
  
  print("handle zero training instances");
  printAttributeSummary(
      nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance);
  print("...");
  FastVector accepts = new FastVector();
  accepts.addElement("train");
  accepts.addElement("value");
  int numTrain = 0, missingLevel = 0;
  boolean predictorMissing = false;
  
  return runBasicTest(
            nominalPredictor, numericPredictor, stringPredictor, 
            datePredictor, relationalPredictor, 
            multiInstance,
            missingLevel, predictorMissing,
            numTrain, 
            accepts);
}
 
源代码22 项目: tsml   文件: CheckAttributeSelection.java
/**
 * Checks whether the scheme can handle class attributes as Nth attribute.
 *
 * @param nominalPredictor if true use nominal predictor attributes
 * @param numericPredictor if true use numeric predictor attributes
 * @param stringPredictor if true use string predictor attributes
 * @param datePredictor if true use date predictor attributes
 * @param relationalPredictor if true use relational predictor attributes
 * @param multiInstance whether multi-instance is needed
 * @param classType the class type (NUMERIC, NOMINAL, etc.)
 * @param classIndex the index of the class attribute (0-based, -1 means last attribute)
 * @return index 0 is true if the test was passed, index 1 is true if test 
 *         was acceptable
 * @see TestInstances#CLASS_IS_LAST
 */
protected boolean[] canHandleClassAsNthAttribute(
    boolean nominalPredictor,
    boolean numericPredictor, 
    boolean stringPredictor, 
    boolean datePredictor,
    boolean relationalPredictor,
    boolean multiInstance,
    int classType,
    int classIndex) {
  
  if (classIndex == TestInstances.CLASS_IS_LAST)
    print("class attribute as last attribute");
  else
    print("class attribute as " + (classIndex + 1) + ". attribute");
  printAttributeSummary(
      nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
  print("...");
  FastVector accepts = new FastVector();
  int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
  boolean predictorMissing = false, classMissing = false;
  
  return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, 
                      datePredictor, relationalPredictor, 
                      multiInstance,
                      classType,
                      classIndex,
                      missingLevel, predictorMissing, classMissing,
                      numTrain, numClasses, 
                      accepts);
}
 
源代码23 项目: tsml   文件: ThresholdSelector.java
/**
 * Collects the classifier predictions using the specified evaluation method.
 *
 * @param instances the set of <code>Instances</code> to generate
 * predictions for.
 * @param mode the evaluation mode.
 * @param numFolds the number of folds to use if not evaluating on the
 * full training set.
 * @return a <code>FastVector</code> containing the predictions.
 * @throws Exception if an error occurs generating the predictions.
 */
protected FastVector getPredictions(Instances instances, int mode, int numFolds) 
  throws Exception {

  EvaluationUtils eu = new EvaluationUtils();
  eu.setSeed(m_Seed);
  
  switch (mode) {
  case EVAL_TUNED_SPLIT:
    Instances trainData = null, evalData = null;
    Instances data = new Instances(instances);
    Random random = new Random(m_Seed);
    data.randomize(random);
    data.stratify(numFolds);
    
    // Make sure that both subsets contain at least one positive instance
    for (int subsetIndex = 0; subsetIndex < numFolds; subsetIndex++) {
      trainData = data.trainCV(numFolds, subsetIndex, random);
      evalData = data.testCV(numFolds, subsetIndex);
      if (checkForInstance(trainData) && checkForInstance(evalData)) {
        break;
      }
    }
    return eu.getTrainTestPredictions(m_Classifier, trainData, evalData);
  case EVAL_TRAINING_SET:
    return eu.getTrainTestPredictions(m_Classifier, instances, instances);
  case EVAL_CROSS_VALIDATION:
    return eu.getCVPredictions(m_Classifier, instances, numFolds);
  default:
    throw new RuntimeException("Unrecognized evaluation mode");
  }
}
 
源代码24 项目: tsml   文件: LabeledItemSet.java
/**
 * Deletes all item sets that don't have minimum support and have more than
 * maximum support
 * 
 * @return the reduced set of item sets
 * @param maxSupport the maximum support
 * @param itemSets the set of item sets to be pruned
 * @param minSupport the minimum number of transactions to be covered
 */
public static FastVector deleteItemSets(FastVector itemSets, int minSupport,
    int maxSupport) {

  FastVector newVector = new FastVector(itemSets.size());

  for (int i = 0; i < itemSets.size(); i++) {
    LabeledItemSet current = (LabeledItemSet) itemSets.elementAt(i);
    if ((current.m_ruleSupCounter >= minSupport)
        && (current.m_ruleSupCounter <= maxSupport))
      newVector.addElement(current);
  }
  return newVector;
}
 
源代码25 项目: tsml   文件: LabeledItemSet.java
/**
 * Return a hashtable filled with the given item sets.
 * 
 * @param itemSets the set of item sets to be used for filling the hash table
 * @param initialSize the initial size of the hashtable
 * @return the generated hashtable
 */
public static Hashtable getHashtable(FastVector itemSets, int initialSize) {

  Hashtable hashtable = new Hashtable(initialSize);
  for (int i = 0; i < itemSets.size(); i++) {
    LabeledItemSet current = (LabeledItemSet) itemSets.elementAt(i);
    hashtable.put(current, new Integer(current.m_classLabel));
  }

  return hashtable;
}
 
源代码26 项目: tsml   文件: LabeledItemSet.java
/**
 * Prunes a set of (k)-item sets using the given (k-1)-item sets.
 * 
 * @param toPrune the set of (k)-item sets to be pruned
 * @param kMinusOne the (k-1)-item sets to be used for pruning
 * @return the pruned set of item sets
 */
public static FastVector pruneItemSets(FastVector toPrune, Hashtable kMinusOne) {

  FastVector newVector = new FastVector(toPrune.size());
  int help, j;

  for (int i = 0; i < toPrune.size(); i++) {
    LabeledItemSet current = (LabeledItemSet) toPrune.elementAt(i);

    for (j = 0; j < current.m_items.length; j++) {
      if (current.m_items[j] != -1) {
        help = current.m_items[j];
        current.m_items[j] = -1;
        if (kMinusOne.get(current) != null
            && (current.m_classLabel == (((Integer) kMinusOne.get(current))
                .intValue())))
          current.m_items[j] = help;
        else {
          current.m_items[j] = help;
          break;
        }
      }
    }
    if (j == current.m_items.length)
      newVector.addElement(current);
  }
  return newVector;
}
 
源代码27 项目: tsml   文件: LabeledItemSet.java
/**
 * Updates counter of a specific item set
 * 
 * @param itemSets an item sets
 * @param instancesNoClass instances without the class attribute
 * @param instancesClass the values of the class attribute sorted according to
 *          instances
 */
public static void upDateCounters(FastVector itemSets,
    Instances instancesNoClass, Instances instancesClass) {

  for (int i = 0; i < instancesNoClass.numInstances(); i++) {
    Enumeration enu = itemSets.elements();
    while (enu.hasMoreElements())
      ((LabeledItemSet) enu.nextElement()).upDateCounter(
          instancesNoClass.instance(i), instancesClass.instance(i));
  }

}
 
源代码28 项目: tsml   文件: Remove.java
/**
  * Sets the format of the input instances.
  *
  * @param instanceInfo an Instances object containing the input instance
  * structure (any instances contained in the object are ignored - only the
  * structure is required).
  * @return true if the outputFormat may be collected immediately
  * @throws Exception if the format couldn't be set successfully
  */
 public boolean setInputFormat(Instances instanceInfo) throws Exception {

   super.setInputFormat(instanceInfo);
   
   m_SelectCols.setUpper(instanceInfo.numAttributes() - 1);

   // Create the output buffer
   FastVector attributes = new FastVector();
   int outputClass = -1;
   m_SelectedAttributes = m_SelectCols.getSelection();
   for (int i = 0; i < m_SelectedAttributes.length; i++) {
     int current = m_SelectedAttributes[i];
     if (instanceInfo.classIndex() == current) {
outputClass = attributes.size();
     }
     Attribute keep = (Attribute)instanceInfo.attribute(current).copy();
     attributes.addElement(keep);
   }
   //initInputLocators(instanceInfo, m_SelectedAttributes);
   initInputLocators(getInputFormat(), m_SelectedAttributes);
   Instances outputFormat = new Instances(instanceInfo.relationName(),
				   attributes, 0); 
   outputFormat.setClassIndex(outputClass);
   setOutputFormat(outputFormat);
   return true;
 }
 
源代码29 项目: tsml   文件: Ridor.java
/**
    * Builds a single rule learner with REP dealing with 2 classes.
    * This rule learner always tries to predict the class with label 
    * m_Class.
    *
    * @param instances the training data
    * @throws Exception if classifier can't be built successfully
    */
   public void buildClassifier(Instances instances) throws Exception {
     m_ClassAttribute = instances.classAttribute();
     if (!m_ClassAttribute.isNominal()) 
throw new UnsupportedClassTypeException(" Only nominal class, please.");
     if(instances.numClasses() != 2)
throw new Exception(" Only 2 classes, please.");
    
     Instances data = new Instances(instances);
     if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" No training data.");
    
     data.deleteWithMissingClass();
     if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" The class labels of all the training data are missing.");	
    
     if(data.numInstances() < m_Folds)
throw new Exception(" Not enough data for REP.");
    
     m_Antds = new FastVector();	
    
     /* Split data into Grow and Prune*/
     m_Random = new Random(m_Seed);
     data.randomize(m_Random);
     data.stratify(m_Folds);
     Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
     Instances pruneData=data.testCV(m_Folds, m_Folds-1);
    
     grow(growData);      // Build this rule
    
     prune(pruneData);    // Prune this rule
   }
 
源代码30 项目: tsml   文件: CheckAssociator.java
/**
 * Checks whether the scheme can handle class attributes as Nth attribute.
 *
 * @param nominalPredictor if true use nominal predictor attributes
 * @param numericPredictor if true use numeric predictor attributes
 * @param stringPredictor if true use string predictor attributes
 * @param datePredictor if true use date predictor attributes
 * @param relationalPredictor if true use relational predictor attributes
 * @param multiInstance whether multi-instance is needed
 * @param classType the class type (NUMERIC, NOMINAL, etc.)
 * @param classIndex the index of the class attribute (0-based, -1 means last attribute)
 * @return index 0 is true if the test was passed, index 1 is true if test 
 *         was acceptable
 * @see TestInstances#CLASS_IS_LAST
 */
protected boolean[] canHandleClassAsNthAttribute(
    boolean nominalPredictor,
    boolean numericPredictor, 
    boolean stringPredictor, 
    boolean datePredictor,
    boolean relationalPredictor,
    boolean multiInstance,
    int classType,
    int classIndex) {
  
  if (classIndex == TestInstances.CLASS_IS_LAST)
    print("class attribute as last attribute");
  else
    print("class attribute as " + (classIndex + 1) + ". attribute");
  printAttributeSummary(
      nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
  print("...");
  FastVector accepts = new FastVector();
  int numTrain = getNumInstances(), numClasses = 2, 
  missingLevel = 0;
  boolean predictorMissing = false, classMissing = false;
  
  return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, 
                      datePredictor, relationalPredictor, 
                      multiInstance,
                      classType,
                      classIndex,
                      missingLevel, predictorMissing, classMissing,
                      numTrain, numClasses, 
                      accepts);
}