类weka.core.SparseInstance源码实例Demo

下面列出了怎么用weka.core.SparseInstance的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: tsml   文件: CLOPE.java
/**
      * Add instance to cluster
      */
     public void AddInstance(Instance inst) {
if (inst instanceof SparseInstance) {
  //  System.out.println("AddSparceInstance");
  for (int i = 0; i < inst.numValues(); i++) {
    AddItem(inst.index(i));
    //  for(int i=0;i<inst.numAttributes();int++){
    // AddItem(inst.index(i)+inst.value(i));
  }
} else {
  for (int i = 0; i < inst.numAttributes(); i++) {

    if (!inst.isMissing(i)) {

      AddItem(i + inst.toString(i));
    }
  }
}
this.W = this.occ.size();
this.N++;
     }
 
源代码2 项目: tsml   文件: CLOPE.java
/**
      * Delete instance from cluster
      */
     public void DeleteInstance(Instance inst) {
if (inst instanceof SparseInstance) {
  //   System.out.println("DeleteSparceInstance");
  for (int i = 0; i < inst.numValues(); i++) {
    DeleteItem(inst.index(i));
  }
} else {
  for (int i = 0; i <= inst.numAttributes() - 1; i++) {

    if (!inst.isMissing(i)) {
      DeleteItem(i + inst.toString(i));
    }
  }
}
this.W = this.occ.size();
this.N--;
     }
 
源代码3 项目: tsml   文件: FPGrowth.java
private void processSingleton(Instance current, 
    ArrayList<BinaryItem> singletons) throws Exception {
  
  if (current instanceof SparseInstance) {
    for (int j = 0; j < current.numValues(); j++) {
      int attIndex = current.index(j);
      singletons.get(attIndex).increaseFrequency();
    }
  } else {
    for (int j = 0; j < current.numAttributes(); j++) {
      if (!current.isMissing(j)) {
        if (current.attribute(j).numValues() == 1 
            || current.value(j) == m_positiveIndex - 1) {
          singletons.get(j).increaseFrequency();
        }
      }
    }
  }
}
 
源代码4 项目: tsml   文件: PartitionMembership.java
/**
 * Convert a single instance over. The converted instance is added to 
 * the end of the output queue.
 *
 * @param instance the instance to convert
 * @throws Exception if something goes wrong
 */
protected void convertInstance(Instance instance) throws Exception {
  
  // Make copy and set weight to one
  Instance cp = (Instance)instance.copy();
  cp.setWeight(1.0);
  
  // Set up values
  double [] instanceVals = new double[outputFormatPeek().numAttributes()];
  double [] vals = m_partitionGenerator.getMembershipValues(cp);
  System.arraycopy(vals, 0, instanceVals, 0, vals.length);
  if (instance.classIndex() >= 0) {
    instanceVals[instanceVals.length - 1] = instance.classValue();
  }
  
  push(new SparseInstance(instance.weight(), instanceVals));
}
 
源代码5 项目: sentiment-analysis   文件: SentimentAnalyser.java
/**Decides upon a "disagreed" document by applying the learned model based on the last 1,000 "agreed" documents.*/
private String clarifyOnSlidingWindow(String tweet){
	String out = "";
       double[] instanceValues = new double[train.numAttributes()];
       instanceValues[0] = train.attribute(0).addStringValue(tweet);
	train.add(new SparseInstance(1.0, instanceValues));
	try {
		stwv.setInputFormat(train);
		Instances newData = Filter.useFilter(train, stwv);
		Instances train_ins = new Instances(newData, 0, train.size()-1);
		Instances test_ins = new Instances(newData, train.size()-1, 1);
		Classifier mnb = (Classifier)new NaiveBayesMultinomial();
		mnb.buildClassifier(train_ins);
		double[] preds = mnb.distributionForInstance(test_ins.get(0));
		if (preds[0]>0.5)
			out = "positive";
		else
			out = "negative";
	} catch (Exception e) {
		e.printStackTrace();
	}
	train.remove(train.numInstances()-1);
	return out;
}
 
源代码6 项目: sentiment-analysis   文件: TweetPreprocessor.java
/**Instantiates the text-based Instances*/
private String getTextInstances(){
	ArrayList<Attribute> atts = new ArrayList<Attribute>(2);
       ArrayList<String> classVal = new ArrayList<String>();
       classVal.add("positive");
       classVal.add("negative");
       atts.add(new Attribute("sentimentClassAttribute",classVal));
       atts.add(new Attribute("text",(ArrayList<String>)null));
       Instances textRaw = new Instances("TextInstances",atts,0);
       double[] instanceValue1 = new double[textRaw.numAttributes()];
       String tmp_txt = tp.getProcessed(tweet);
       instanceValue1[1] = textRaw.attribute(1).addStringValue(tmp_txt);
       textRaw.add(new SparseInstance(1.0, instanceValue1));
	text_instances = new Instances(textRaw);
       return tmp_txt;
}
 
源代码7 项目: sentiment-analysis   文件: TweetPreprocessor.java
/**Instantiates the complex-based Instances*/
private String getComplexInstances(String processed_text){
	ArrayList<Attribute> atts = new ArrayList<Attribute>(2);
       ArrayList<String> classVal = new ArrayList<String>();
       classVal.add("positive");
       classVal.add("negative");
       atts.add(new Attribute("sentimentClassAttribute",classVal));
       atts.add(new Attribute("text",(ArrayList<String>)null));
       Instances textRaw = new Instances("TextInstances",atts,0);
       double[] instanceValue1 = new double[textRaw.numAttributes()];
       String tmp_cmplx = cp.getProcessed(processed_text, tagger);
       instanceValue1[1] = textRaw.attribute(1).addStringValue(tmp_cmplx);
       textRaw.add(new SparseInstance(1.0, instanceValue1));
	complex_instances = new Instances(textRaw);
	return tmp_cmplx;
}
 
源代码8 项目: sentiment-analysis   文件: TweetPreprocessor.java
private void setLexiconInstances(){
	ArrayList<Attribute> atts = new ArrayList<Attribute>(6);
       ArrayList<String> classVal = new ArrayList<String>();
       classVal.add("positive");
       classVal.add("negative");
       
       atts.add(new Attribute("verb"));
       atts.add(new Attribute("noun"));
       atts.add(new Attribute("adj"));
       atts.add(new Attribute("adv"));
       atts.add(new Attribute("wordnet"));
       atts.add(new Attribute("polarity"));
       atts.add(new Attribute("sentimentClassAttribute",classVal));
       
       Instances textRaw = new Instances("TextInstances",atts,0);
       double[] vals = lp.getProcessed(tweet, tagger);
       textRaw.add(new SparseInstance(1.0, vals));
	lexicon_instances = new Instances(textRaw);
}
 
源代码9 项目: AIDR   文件: Model.java
Instance wordsToInstance(WordSet words) {
    Instance item = new SparseInstance(
            attributeSpecification.numAttributes());
    item.setDataset(attributeSpecification);
    // Words
    for (String word : words.getWords()) {
        Attribute attribute = attributeSpecification.attribute(word);
        if (attribute != null) {
            item.setValue(attribute, 1);
        }
    }

    item.replaceMissingValues(missingVal);

    return item;
}
 
源代码10 项目: mzmine3   文件: ClusteringTask.java
/**
 * Creates the weka data set for clustering of samples
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createSampleWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < rawData[0].length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < rawData.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);
    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      values[data.numAttributes() - 1] =
          data.attribute("name").addStringValue(this.selectedRawDataFiles[i].getName());
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}
 
源代码11 项目: mzmine3   文件: ClusteringTask.java
/**
 * Creates the weka data set for clustering of variables (metabolites)
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createVariableWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < this.selectedRawDataFiles.length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < selectedRows.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);

    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      DecimalFormat twoDForm = new DecimalFormat("#.##");
      double MZ = Double.valueOf(twoDForm.format(selectedRows[i].getAverageMZ()));
      double RT = Double.valueOf(twoDForm.format(selectedRows[i].getAverageRT()));
      String rowName = "MZ->" + MZ + "/RT->" + RT;
      values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName);
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}
 
源代码12 项目: tsml   文件: BagOfPatterns.java
@Override
public double classifyInstance(Instance instance) throws Exception {
    //convert to BOP form
    double[] hist = bop.bagToArray(bop.buildBag(instance));
    
    //stuff into Instance
    Instances newInsts = new Instances(matrix, 1); //copy attribute data
    newInsts.add(new SparseInstance(1.0, hist));
    
    return knn.classifyInstance(newInsts.firstInstance());
}
 
源代码13 项目: tsml   文件: BagOfPatterns.java
@Override
public double[] distributionForInstance(Instance instance) throws Exception {
    //convert to BOP form
    double[] hist = bop.bagToArray(bop.buildBag(instance));
    
    //stuff into Instance
    Instances newInsts = new Instances(matrix, 1); //copy attribute data
    newInsts.add(new SparseInstance(1.0, hist));
    
    return knn.distributionForInstance(newInsts.firstInstance());
}
 
源代码14 项目: tsml   文件: FPGrowth.java
/**
 * Inserts a single instance into the FPTree.
 * 
 * @param current the instance to insert
 * @param singletons the singleton item sets
 * @param tree the tree to insert into
 * @param minSupport the minimum support threshold
 */
private void insertInstance(Instance current, ArrayList<BinaryItem> singletons, 
    FPTreeRoot tree, int minSupport) {
  ArrayList<BinaryItem> transaction = new ArrayList<BinaryItem>();
  if (current instanceof SparseInstance) {
    for (int j = 0; j < current.numValues(); j++) {
      int attIndex = current.index(j);
      if (singletons.get(attIndex).getFrequency() >= minSupport) {
        transaction.add(singletons.get(attIndex));
      }
    }
    Collections.sort(transaction);
    tree.addItemSet(transaction, 1);
  } else {
    for (int j = 0; j < current.numAttributes(); j++) {
      if (!current.isMissing(j)) {
        if (current.attribute(j).numValues() == 1 
            || current.value(j) == m_positiveIndex - 1) {
          if (singletons.get(j).getFrequency() >= minSupport) {
            transaction.add(singletons.get(j));
          }
        }
      }
    }
    Collections.sort(transaction);
    tree.addItemSet(transaction, 1);
  }
}
 
源代码15 项目: tsml   文件: PrincipalComponents.java
/**
 * Convert a pc transformed instance back to the original space
 * 
 * @param inst        the instance to convert
 * @return            the processed instance
 * @throws Exception  if something goes wrong
 */
private Instance convertInstanceToOriginal(Instance inst)
  throws Exception {
  double[] newVals = null;

  if (m_hasClass) {
    newVals = new double[m_numAttribs+1];
  } else {
    newVals = new double[m_numAttribs];
  }

  if (m_hasClass) {
    // class is always appended as the last attribute
    newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1);
  }

  for (int i = 0; i < m_eTranspose[0].length; i++) {
    double tempval = 0.0;
    for (int j = 1; j < m_eTranspose.length; j++) {
      tempval += (m_eTranspose[j][i] * 
                  inst.value(j - 1));
     }
    newVals[i] = tempval;
    if (!m_center) {
      newVals[i] *= m_stdDevs[i];
    } 
    newVals[i] += m_means[i];
  }
  
  if (inst instanceof SparseInstance) {
    return new SparseInstance(inst.weight(), newVals);
  } else {
    return new DenseInstance(inst.weight(), newVals);
  }      
}
 
源代码16 项目: Java-Data-Analysis   文件: KMeans.java
private static Instances load(double[][] data) {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("X"));
    attributes.add(new Attribute("Y"));
    Instances dataset = new Instances("Dataset", attributes, M);
    for (double[] datum : data) {
        Instance instance = new SparseInstance(2);
        instance.setValue(0, datum[0]);
        instance.setValue(1, datum[1]);
        dataset.add(instance);
    }
    return dataset;
}
 
private static Instances load(double[][] data) {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("X"));
    attributes.add(new Attribute("Y"));
    Instances dataset = new Instances("Dataset", attributes, M);
    for (double[] datum : data) {
        Instance instance = new SparseInstance(2);
        instance.setValue(0, datum[0]);
        instance.setValue(1, datum[1]);
        dataset.add(instance);
    }
    return dataset;
}
 
private static Instances load(double[][] data) {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("X"));
    attributes.add(new Attribute("Y"));
    Instances dataset = new Instances("Dataset", attributes, M);
    for (double[] datum : data) {
        Instance instance = new SparseInstance(2);
        instance.setValue(0, datum[0]);
        instance.setValue(1, datum[1]);
        dataset.add(instance);
    }
    return dataset;
}
 
源代码19 项目: mzmine2   文件: ClusteringTask.java
/**
 * Creates the weka data set for clustering of samples
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createSampleWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < rawData[0].length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < rawData.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);
    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      values[data.numAttributes() - 1] =
          data.attribute("name").addStringValue(this.selectedRawDataFiles[i].getName());
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}
 
源代码20 项目: mzmine2   文件: ClusteringTask.java
/**
 * Creates the weka data set for clustering of variables (metabolites)
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createVariableWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < this.selectedRawDataFiles.length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < selectedRows.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);

    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      DecimalFormat twoDForm = new DecimalFormat("#.##");
      double MZ = Double.valueOf(twoDForm.format(selectedRows[i].getAverageMZ()));
      double RT = Double.valueOf(twoDForm.format(selectedRows[i].getAverageRT()));
      String rowName = "MZ->" + MZ + "/RT->" + RT;
      values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName);
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}
 
源代码21 项目: anthelion   文件: NutchOnlineClassifier.java
/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}.
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private static Instance convert(AnthURL url) {
	if (url != null) {

		Instance inst = new SparseInstance(dimension);
		inst.replaceMissingValues(replaceMissingValues);

		inst.setDataset(instances);
		inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
		inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
		Set<String> tokens = new HashSet<String>();

		tokens.addAll(tokenizer(url.uri.getPath()));
		tokens.addAll(tokenizer(url.uri.getQuery()));
		tokens.addAll(tokenizer(url.uri.getFragment()));
		for (String tok : tokens) {
			inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
		}
		return inst;

	} else {
		System.out.println("Input AnthURL for convertion into instance was null.");
		return null;
	}
}
 
源代码22 项目: anthelion   文件: NutchOnlineClassifier.java
/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}.
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private static Instance convert(AnthURL url) {
	if (url != null) {

		Instance inst = new SparseInstance(dimension);
		inst.replaceMissingValues(replaceMissingValues);

		inst.setDataset(instances);
		inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
		inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
		Set<String> tokens = new HashSet<String>();

		tokens.addAll(tokenizer(url.uri.getPath()));
		tokens.addAll(tokenizer(url.uri.getQuery()));
		tokens.addAll(tokenizer(url.uri.getFragment()));
		for (String tok : tokens) {
			inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
		}
		return inst;

	} else {
		System.out.println("Input AnthURL for convertion into instance was null.");
		return null;
	}
}
 
源代码23 项目: sentiment-analysis   文件: SentimentAnalyser.java
/**Decides upon a "disagreed" document by applying the learned model based on the previously build model.*/
private String clarifyOnModel(String tweet){
	String out = "";
	
	// get the text-based representation of the document
       double[] instanceValues = new double[2];
       instanceValues[0] = test.attribute(0).addStringValue(tweet);
       test.add(new SparseInstance(1.0, instanceValues));
       try{
       	stwv.setInputFormat(test);
       	Instances newData = Filter.useFilter(test, stwv);
   		
       	// re-order attributes so that they are compatible with the training set's ones
       	Instances test_instance = reformatText(newData);
       	
       	// find the polarity of the document based on the previously built model
       	test_instance.setClassIndex(0);
       	double[] preds = multiNB.distributionForInstance(test_instance.get(0));
       	if (preds[0]>0.5)
       		out = "light positive";
       	else
       		out = "light negative";
       } catch (Exception e){
       	e.printStackTrace();
       }
       test.remove(0);
	return out;
}
 
源代码24 项目: sentiment-analysis   文件: TweetPreprocessor.java
/**Initializes the feature-based Instances*/
private void getFeatureInstances(){
	ArrayList<Attribute> atts = new ArrayList<Attribute>(2);
       ArrayList<String> classVal = new ArrayList<String>();
       classVal.add("positive");
       classVal.add("negative");
       atts.add(new Attribute("sentimentClassAttribute",classVal));
       atts.add(new Attribute("text",(ArrayList<String>)null));
       Instances textRaw = new Instances("TextInstances",atts,0);
       double[] instanceValue1 = new double[textRaw.numAttributes()];
       instanceValue1[1] = textRaw.attribute(1).addStringValue(fp.getProcessed(tweet));
       textRaw.add(new SparseInstance(1.0, instanceValue1));
	feature_instances = new Instances(textRaw);
}
 
源代码25 项目: tsml   文件: SAXVSM.java
/**
 * If skip = one of <0 ... numInstances-1>, will not include instance at that index into the corpus
 * Part of leave one out cv, while avoiding unnecessary repeats of the BoP transformation 
 */
private Instances tfxidf(Instances bopData, int skip) {
    int numClasses = bopData.numClasses();
    int numInstances = bopData.numInstances();
    int numTerms = bopData.numAttributes()-1; //minus class attribute
    
    //initialise class weights
    double[][] classWeights = new double[numClasses][numTerms];

    //build class bags
    int inst = 0;
    for (Instance in : bopData) {
        if (inst++ == skip) //skip 'this' one, for leave-one-out cv
            continue;

        int classVal = (int)in.classValue();
        for (int j = 0; j < numTerms; ++j) {
            classWeights[classVal][j] += in.value(j);
        }
    }
        
    //apply tf x idf
    for (int i = 0; i < numTerms; ++i) { //for each term
        double df = 0; //document frequency
        for (int j = 0; j < numClasses; ++j) //find how many classes (documents) this term appears in
            if (classWeights[j][i] != 0)
                ++df;
        
        if (df != 0) { //if it appears
            if (df != numClasses) { //but not in all, apply weighting
                for (int j = 0; j < numClasses; ++j) 
                    if (classWeights[j][i] != 0) 
                        classWeights[j][i] = Math.log(1 + classWeights[j][i]) * Math.log(numClasses / df);                
            }
            else { //appears in all
                //avoid log calculations
                //if df == num classes -> idf = log(N/df) = log(1) = 0
                for (int j = 0; j < numClasses; ++j) 
                    classWeights[j][i] = 0;
            }      
        }
    }
    
    Instances tfxidfCorpus = new Instances(bopData, numClasses);
    for (int i = 0; i < numClasses; ++i)
        tfxidfCorpus.add(new SparseInstance(1.0, classWeights[i]));
    
    return tfxidfCorpus;
}
 
源代码26 项目: tsml   文件: CLOPE.java
/**
     * Calculate Delta
     */
     public double DeltaAdd(Instance inst, double r) {
//System.out.println("DeltaAdd");
int S_new;
int W_new;
double profit;
double profit_new;
double deltaprofit;
S_new = 0;
W_new = occ.size();

if (inst instanceof SparseInstance) {
  //System.out.println("DeltaAddSparceInstance");
  for (int i = 0; i < inst.numValues(); i++) {
    S_new++;

    if ((Integer) this.occ.get(inst.index(i)) == null) {
      W_new++;
    }
  }
} else {
  for (int i = 0; i < inst.numAttributes(); i++) {
    if (!inst.isMissing(i)) {
      S_new++;
      if ((Integer) this.occ.get(i + inst.toString(i)) == null) {
	W_new++;
      }
    }
  }
}
S_new += S;


if (N == 0) {
  deltaprofit = S_new / Math.pow(W_new, r);
} else {
  profit = S * N / Math.pow(W, r);
  profit_new = S_new * (N + 1) / Math.pow(W_new, r);
  deltaprofit = profit_new - profit;
}
return deltaprofit;
     }
 
源代码27 项目: tsml   文件: CLOPE.java
/**
  * Move instance to best cluster
  */
 public int MoveInstanceToBestCluster(Instance inst) {

   clusters.get(m_clusterAssignments.get(m_processed_InstanceID)).DeleteInstance(inst);
   m_clusterAssignments.set(m_processed_InstanceID, -1);
   double delta;
   double deltamax;
   int clustermax = -1;
   int tempS = 0;
   int tempW = 0;

   if (inst instanceof SparseInstance) {
     for (int i = 0; i < inst.numValues(); i++) {
tempS++;
tempW++;
     }
   } else {
     for (int i = 0; i < inst.numAttributes(); i++) {
if (!inst.isMissing(i)) {
  tempS++;
  tempW++;
}
     }
   }

   deltamax = tempS / Math.pow(tempW, m_Repulsion);
   for (int i = 0; i < clusters.size(); i++) {
     CLOPECluster tempcluster = clusters.get(i);
     delta = tempcluster.DeltaAdd(inst, m_Repulsion);
     // System.out.println("delta " + delta);
     if (delta > deltamax) {
deltamax = delta;
clustermax = i;
     }
   }
   if (clustermax == -1) {
     CLOPECluster newcluster = new CLOPECluster();
     clusters.add(newcluster);
     newcluster.AddInstance(inst);
     return clusters.size() - 1;
   }
   clusters.get(clustermax).AddInstance(inst);
   return clustermax;
 }
 
源代码28 项目: tsml   文件: PrincipalComponents.java
/**
 * Transform an instance in original (unormalized) format. Convert back
 * to the original space if requested.
 * @param instance an instance in the original (unormalized) format
 * @return a transformed instance
 * @throws Exception if instance cant be transformed
 */
public Instance convertInstance(Instance instance) throws Exception {

  if (m_eigenvalues == null) {
    throw new Exception("convertInstance: Principal components not "
                        +"built yet");
  }

  double[] newVals = new double[m_outputNumAtts];
  Instance tempInst = (Instance)instance.copy();
  if (!instance.dataset().equalHeaders(m_trainHeader)) {
    throw new Exception("Can't convert instance: header's don't match: "
                        +"PrincipalComponents\n"
                        + instance.dataset().equalHeadersMsg(m_trainHeader));
  }

  m_replaceMissingFilter.input(tempInst);
  m_replaceMissingFilter.batchFinished();
  tempInst = m_replaceMissingFilter.output();

  /*if (m_normalize) {
    m_normalizeFilter.input(tempInst);
    m_normalizeFilter.batchFinished();
    tempInst = m_normalizeFilter.output();
  }*/

  m_nominalToBinFilter.input(tempInst);
  m_nominalToBinFilter.batchFinished();
  tempInst = m_nominalToBinFilter.output();

  if (m_attributeFilter != null) {
    m_attributeFilter.input(tempInst);
    m_attributeFilter.batchFinished();
    tempInst = m_attributeFilter.output();
  }
  
  if (!m_center) {
    m_standardizeFilter.input(tempInst);
    m_standardizeFilter.batchFinished();
    tempInst = m_standardizeFilter.output();
  } else {
    m_centerFilter.input(tempInst);
    m_centerFilter.batchFinished();
    tempInst = m_centerFilter.output();
  }

  if (m_hasClass) {
     newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex());
  }

  double cumulative = 0;
  for (int i = m_numAttribs - 1; i >= 0; i--) {
    double tempval = 0.0;
    for (int j = 0; j < m_numAttribs; j++) {
      tempval += (m_eigenvectors[j][m_sortedEigens[i]] * 
                  tempInst.value(j));
     }
    newVals[m_numAttribs - i - 1] = tempval;
    cumulative+=m_eigenvalues[m_sortedEigens[i]];
    if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
      break;
    }
  }
  
  if (!m_transBackToOriginal) {
    if (instance instanceof SparseInstance) {
    return new SparseInstance(instance.weight(), newVals);
    } else {
      return new DenseInstance(instance.weight(), newVals);
    }      
  } else {
    if (instance instanceof SparseInstance) {
      return convertInstanceToOriginal(new SparseInstance(instance.weight(), 
                                                          newVals));
    } else {
      return convertInstanceToOriginal(new DenseInstance(instance.weight(),
                                                    newVals));
    }
  }
}
 
源代码29 项目: tsml   文件: LatentSemanticAnalysis.java
/**
 * Transform an instance in original (unnormalized) format
 * @param instance an instance in the original (unnormalized) format
 * @return a transformed instance
 * @throws Exception if instance can't be transformed
 */
public Instance convertInstance(Instance instance) throws Exception {
  if (m_s == null) {
    throw new Exception("convertInstance: Latent Semantic Analysis not " +
                         "performed yet.");
  }
  
  // array to hold new attribute values
  double [] newValues = new double[m_outputNumAttributes];
  
  // apply filters so new instance is in same format as training instances
  Instance tempInstance = (Instance)instance.copy();
  if (!instance.dataset().equalHeaders(m_trainHeader)) {
    throw new Exception("Can't convert instance: headers don't match: " +
    "LatentSemanticAnalysis");
  }
  // replace missing values
  m_replaceMissingFilter.input(tempInstance);
  m_replaceMissingFilter.batchFinished();
  tempInstance = m_replaceMissingFilter.output();
  // normalize
  if (m_normalize) {
    m_normalizeFilter.input(tempInstance);
    m_normalizeFilter.batchFinished();
    tempInstance = m_normalizeFilter.output();
  }
  // convert nominal attributes to binary
  m_nominalToBinaryFilter.input(tempInstance);
  m_nominalToBinaryFilter.batchFinished();
  tempInstance = m_nominalToBinaryFilter.output();
  // remove class/other attributes
  if (m_attributeFilter != null) {
    m_attributeFilter.input(tempInstance);
    m_attributeFilter.batchFinished();
    tempInstance = m_attributeFilter.output();
  }
  
  // record new attribute values
  if (m_hasClass) { // copy class value
    newValues[m_outputNumAttributes - 1] = instance.classValue();
  }
  double [][] oldInstanceValues = new double[1][m_numAttributes];
  oldInstanceValues[0] = tempInstance.toDoubleArray();
  Matrix instanceVector = new Matrix(oldInstanceValues); // old attribute values
  instanceVector = instanceVector.times(m_transformationMatrix); // new attribute values
  for (int i = 0; i < m_actualRank; i++) {
    newValues[i] = instanceVector.get(0, i);
  }
  
  // return newly transformed instance
  if (instance instanceof SparseInstance) {
    return new SparseInstance(instance.weight(), newValues);
  } else {
    return new DenseInstance(instance.weight(), newValues);
  }
}
 
源代码30 项目: tsml   文件: XMLInstances.java
/**
  * adds the instance to the XML structure
  * 
  * @param parent	the parent node to add the instance node as child
  * @param inst	the instance to add
  */
 protected void addInstance(Element parent, Instance inst) {
   Element		node;
   Element		value;
   Element		child;
   boolean		sparse;
   int			i;
   int			n;
   int			index;
   
   node = m_Document.createElement(TAG_INSTANCE);
   parent.appendChild(node);
   
   // sparse?
   sparse = (inst instanceof SparseInstance);
   if (sparse)
     node.setAttribute(ATT_TYPE, VAL_SPARSE);
   
   // weight
   if (inst.weight() != 1.0)
     node.setAttribute(ATT_WEIGHT, Utils.doubleToString(inst.weight(), m_Precision));
   
   // values
   for (i = 0; i < inst.numValues(); i++) {
     index = inst.index(i);
     
     value = m_Document.createElement(TAG_VALUE);
     node.appendChild(value);

     if (inst.isMissing(index)) {
value.setAttribute(ATT_MISSING, VAL_YES);
     }
     else {
if (inst.attribute(index).isRelationValued()) {
  child = m_Document.createElement(TAG_INSTANCES);
  value.appendChild(child);
  for (n = 0; n < inst.relationalValue(i).numInstances(); n++)
    addInstance(child, inst.relationalValue(i).instance(n));
}
else {
  if (inst.attribute(index).type() == Attribute.NUMERIC)
    value.appendChild(m_Document.createTextNode(Utils.doubleToString(inst.value(index), m_Precision)));
  else
    value.appendChild(m_Document.createTextNode(validContent(inst.stringValue(index))));
}
     }
     
     if (sparse)
value.setAttribute(ATT_INDEX, "" + (index+1));
   }
 }