下面列出了怎么用weka.core.SparseInstance的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Add instance to cluster
*/
public void AddInstance(Instance inst) {
if (inst instanceof SparseInstance) {
// System.out.println("AddSparceInstance");
for (int i = 0; i < inst.numValues(); i++) {
AddItem(inst.index(i));
// for(int i=0;i<inst.numAttributes();int++){
// AddItem(inst.index(i)+inst.value(i));
}
} else {
for (int i = 0; i < inst.numAttributes(); i++) {
if (!inst.isMissing(i)) {
AddItem(i + inst.toString(i));
}
}
}
this.W = this.occ.size();
this.N++;
}
/**
* Delete instance from cluster
*/
public void DeleteInstance(Instance inst) {
if (inst instanceof SparseInstance) {
// System.out.println("DeleteSparceInstance");
for (int i = 0; i < inst.numValues(); i++) {
DeleteItem(inst.index(i));
}
} else {
for (int i = 0; i <= inst.numAttributes() - 1; i++) {
if (!inst.isMissing(i)) {
DeleteItem(i + inst.toString(i));
}
}
}
this.W = this.occ.size();
this.N--;
}
private void processSingleton(Instance current,
ArrayList<BinaryItem> singletons) throws Exception {
if (current instanceof SparseInstance) {
for (int j = 0; j < current.numValues(); j++) {
int attIndex = current.index(j);
singletons.get(attIndex).increaseFrequency();
}
} else {
for (int j = 0; j < current.numAttributes(); j++) {
if (!current.isMissing(j)) {
if (current.attribute(j).numValues() == 1
|| current.value(j) == m_positiveIndex - 1) {
singletons.get(j).increaseFrequency();
}
}
}
}
}
/**
* Convert a single instance over. The converted instance is added to
* the end of the output queue.
*
* @param instance the instance to convert
* @throws Exception if something goes wrong
*/
protected void convertInstance(Instance instance) throws Exception {
// Make copy and set weight to one
Instance cp = (Instance)instance.copy();
cp.setWeight(1.0);
// Set up values
double [] instanceVals = new double[outputFormatPeek().numAttributes()];
double [] vals = m_partitionGenerator.getMembershipValues(cp);
System.arraycopy(vals, 0, instanceVals, 0, vals.length);
if (instance.classIndex() >= 0) {
instanceVals[instanceVals.length - 1] = instance.classValue();
}
push(new SparseInstance(instance.weight(), instanceVals));
}
/**Decides upon a "disagreed" document by applying the learned model based on the last 1,000 "agreed" documents.*/
private String clarifyOnSlidingWindow(String tweet){
String out = "";
double[] instanceValues = new double[train.numAttributes()];
instanceValues[0] = train.attribute(0).addStringValue(tweet);
train.add(new SparseInstance(1.0, instanceValues));
try {
stwv.setInputFormat(train);
Instances newData = Filter.useFilter(train, stwv);
Instances train_ins = new Instances(newData, 0, train.size()-1);
Instances test_ins = new Instances(newData, train.size()-1, 1);
Classifier mnb = (Classifier)new NaiveBayesMultinomial();
mnb.buildClassifier(train_ins);
double[] preds = mnb.distributionForInstance(test_ins.get(0));
if (preds[0]>0.5)
out = "positive";
else
out = "negative";
} catch (Exception e) {
e.printStackTrace();
}
train.remove(train.numInstances()-1);
return out;
}
/**Instantiates the text-based Instances*/
private String getTextInstances(){
ArrayList<Attribute> atts = new ArrayList<Attribute>(2);
ArrayList<String> classVal = new ArrayList<String>();
classVal.add("positive");
classVal.add("negative");
atts.add(new Attribute("sentimentClassAttribute",classVal));
atts.add(new Attribute("text",(ArrayList<String>)null));
Instances textRaw = new Instances("TextInstances",atts,0);
double[] instanceValue1 = new double[textRaw.numAttributes()];
String tmp_txt = tp.getProcessed(tweet);
instanceValue1[1] = textRaw.attribute(1).addStringValue(tmp_txt);
textRaw.add(new SparseInstance(1.0, instanceValue1));
text_instances = new Instances(textRaw);
return tmp_txt;
}
/**Instantiates the complex-based Instances*/
private String getComplexInstances(String processed_text){
ArrayList<Attribute> atts = new ArrayList<Attribute>(2);
ArrayList<String> classVal = new ArrayList<String>();
classVal.add("positive");
classVal.add("negative");
atts.add(new Attribute("sentimentClassAttribute",classVal));
atts.add(new Attribute("text",(ArrayList<String>)null));
Instances textRaw = new Instances("TextInstances",atts,0);
double[] instanceValue1 = new double[textRaw.numAttributes()];
String tmp_cmplx = cp.getProcessed(processed_text, tagger);
instanceValue1[1] = textRaw.attribute(1).addStringValue(tmp_cmplx);
textRaw.add(new SparseInstance(1.0, instanceValue1));
complex_instances = new Instances(textRaw);
return tmp_cmplx;
}
private void setLexiconInstances(){
ArrayList<Attribute> atts = new ArrayList<Attribute>(6);
ArrayList<String> classVal = new ArrayList<String>();
classVal.add("positive");
classVal.add("negative");
atts.add(new Attribute("verb"));
atts.add(new Attribute("noun"));
atts.add(new Attribute("adj"));
atts.add(new Attribute("adv"));
atts.add(new Attribute("wordnet"));
atts.add(new Attribute("polarity"));
atts.add(new Attribute("sentimentClassAttribute",classVal));
Instances textRaw = new Instances("TextInstances",atts,0);
double[] vals = lp.getProcessed(tweet, tagger);
textRaw.add(new SparseInstance(1.0, vals));
lexicon_instances = new Instances(textRaw);
}
Instance wordsToInstance(WordSet words) {
Instance item = new SparseInstance(
attributeSpecification.numAttributes());
item.setDataset(attributeSpecification);
// Words
for (String word : words.getWords()) {
Attribute attribute = attributeSpecification.attribute(word);
if (attribute != null) {
item.setValue(attribute, 1);
}
}
item.replaceMissingValues(missingVal);
return item;
}
/**
* Creates the weka data set for clustering of samples
*
* @param rawData Data extracted from selected Raw data files and rows.
* @return Weka library data set
*/
private Instances createSampleWekaDataset(double[][] rawData) {
FastVector attributes = new FastVector();
for (int i = 0; i < rawData[0].length; i++) {
String varName = "Var" + i;
Attribute var = new Attribute(varName);
attributes.addElement(var);
}
if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
Attribute name = new Attribute("name", (FastVector) null);
attributes.addElement(name);
}
Instances data = new Instances("Dataset", attributes, 0);
for (int i = 0; i < rawData.length; i++) {
double[] values = new double[data.numAttributes()];
System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);
if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
values[data.numAttributes() - 1] =
data.attribute("name").addStringValue(this.selectedRawDataFiles[i].getName());
}
Instance inst = new SparseInstance(1.0, values);
data.add(inst);
}
return data;
}
/**
* Creates the weka data set for clustering of variables (metabolites)
*
* @param rawData Data extracted from selected Raw data files and rows.
* @return Weka library data set
*/
private Instances createVariableWekaDataset(double[][] rawData) {
FastVector attributes = new FastVector();
for (int i = 0; i < this.selectedRawDataFiles.length; i++) {
String varName = "Var" + i;
Attribute var = new Attribute(varName);
attributes.addElement(var);
}
if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
Attribute name = new Attribute("name", (FastVector) null);
attributes.addElement(name);
}
Instances data = new Instances("Dataset", attributes, 0);
for (int i = 0; i < selectedRows.length; i++) {
double[] values = new double[data.numAttributes()];
System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);
if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
DecimalFormat twoDForm = new DecimalFormat("#.##");
double MZ = Double.valueOf(twoDForm.format(selectedRows[i].getAverageMZ()));
double RT = Double.valueOf(twoDForm.format(selectedRows[i].getAverageRT()));
String rowName = "MZ->" + MZ + "/RT->" + RT;
values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName);
}
Instance inst = new SparseInstance(1.0, values);
data.add(inst);
}
return data;
}
@Override
public double classifyInstance(Instance instance) throws Exception {
//convert to BOP form
double[] hist = bop.bagToArray(bop.buildBag(instance));
//stuff into Instance
Instances newInsts = new Instances(matrix, 1); //copy attribute data
newInsts.add(new SparseInstance(1.0, hist));
return knn.classifyInstance(newInsts.firstInstance());
}
@Override
public double[] distributionForInstance(Instance instance) throws Exception {
//convert to BOP form
double[] hist = bop.bagToArray(bop.buildBag(instance));
//stuff into Instance
Instances newInsts = new Instances(matrix, 1); //copy attribute data
newInsts.add(new SparseInstance(1.0, hist));
return knn.distributionForInstance(newInsts.firstInstance());
}
/**
* Inserts a single instance into the FPTree.
*
* @param current the instance to insert
* @param singletons the singleton item sets
* @param tree the tree to insert into
* @param minSupport the minimum support threshold
*/
private void insertInstance(Instance current, ArrayList<BinaryItem> singletons,
FPTreeRoot tree, int minSupport) {
ArrayList<BinaryItem> transaction = new ArrayList<BinaryItem>();
if (current instanceof SparseInstance) {
for (int j = 0; j < current.numValues(); j++) {
int attIndex = current.index(j);
if (singletons.get(attIndex).getFrequency() >= minSupport) {
transaction.add(singletons.get(attIndex));
}
}
Collections.sort(transaction);
tree.addItemSet(transaction, 1);
} else {
for (int j = 0; j < current.numAttributes(); j++) {
if (!current.isMissing(j)) {
if (current.attribute(j).numValues() == 1
|| current.value(j) == m_positiveIndex - 1) {
if (singletons.get(j).getFrequency() >= minSupport) {
transaction.add(singletons.get(j));
}
}
}
}
Collections.sort(transaction);
tree.addItemSet(transaction, 1);
}
}
/**
* Convert a pc transformed instance back to the original space
*
* @param inst the instance to convert
* @return the processed instance
* @throws Exception if something goes wrong
*/
private Instance convertInstanceToOriginal(Instance inst)
throws Exception {
double[] newVals = null;
if (m_hasClass) {
newVals = new double[m_numAttribs+1];
} else {
newVals = new double[m_numAttribs];
}
if (m_hasClass) {
// class is always appended as the last attribute
newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1);
}
for (int i = 0; i < m_eTranspose[0].length; i++) {
double tempval = 0.0;
for (int j = 1; j < m_eTranspose.length; j++) {
tempval += (m_eTranspose[j][i] *
inst.value(j - 1));
}
newVals[i] = tempval;
if (!m_center) {
newVals[i] *= m_stdDevs[i];
}
newVals[i] += m_means[i];
}
if (inst instanceof SparseInstance) {
return new SparseInstance(inst.weight(), newVals);
} else {
return new DenseInstance(inst.weight(), newVals);
}
}
private static Instances load(double[][] data) {
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
attributes.add(new Attribute("X"));
attributes.add(new Attribute("Y"));
Instances dataset = new Instances("Dataset", attributes, M);
for (double[] datum : data) {
Instance instance = new SparseInstance(2);
instance.setValue(0, datum[0]);
instance.setValue(1, datum[1]);
dataset.add(instance);
}
return dataset;
}
private static Instances load(double[][] data) {
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
attributes.add(new Attribute("X"));
attributes.add(new Attribute("Y"));
Instances dataset = new Instances("Dataset", attributes, M);
for (double[] datum : data) {
Instance instance = new SparseInstance(2);
instance.setValue(0, datum[0]);
instance.setValue(1, datum[1]);
dataset.add(instance);
}
return dataset;
}
private static Instances load(double[][] data) {
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
attributes.add(new Attribute("X"));
attributes.add(new Attribute("Y"));
Instances dataset = new Instances("Dataset", attributes, M);
for (double[] datum : data) {
Instance instance = new SparseInstance(2);
instance.setValue(0, datum[0]);
instance.setValue(1, datum[1]);
dataset.add(instance);
}
return dataset;
}
/**
* Creates the weka data set for clustering of samples
*
* @param rawData Data extracted from selected Raw data files and rows.
* @return Weka library data set
*/
private Instances createSampleWekaDataset(double[][] rawData) {
FastVector attributes = new FastVector();
for (int i = 0; i < rawData[0].length; i++) {
String varName = "Var" + i;
Attribute var = new Attribute(varName);
attributes.addElement(var);
}
if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
Attribute name = new Attribute("name", (FastVector) null);
attributes.addElement(name);
}
Instances data = new Instances("Dataset", attributes, 0);
for (int i = 0; i < rawData.length; i++) {
double[] values = new double[data.numAttributes()];
System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);
if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
values[data.numAttributes() - 1] =
data.attribute("name").addStringValue(this.selectedRawDataFiles[i].getName());
}
Instance inst = new SparseInstance(1.0, values);
data.add(inst);
}
return data;
}
/**
* Creates the weka data set for clustering of variables (metabolites)
*
* @param rawData Data extracted from selected Raw data files and rows.
* @return Weka library data set
*/
private Instances createVariableWekaDataset(double[][] rawData) {
FastVector attributes = new FastVector();
for (int i = 0; i < this.selectedRawDataFiles.length; i++) {
String varName = "Var" + i;
Attribute var = new Attribute(varName);
attributes.addElement(var);
}
if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
Attribute name = new Attribute("name", (FastVector) null);
attributes.addElement(name);
}
Instances data = new Instances("Dataset", attributes, 0);
for (int i = 0; i < selectedRows.length; i++) {
double[] values = new double[data.numAttributes()];
System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);
if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
DecimalFormat twoDForm = new DecimalFormat("#.##");
double MZ = Double.valueOf(twoDForm.format(selectedRows[i].getAverageMZ()));
double RT = Double.valueOf(twoDForm.format(selectedRows[i].getAverageRT()));
String rowName = "MZ->" + MZ + "/RT->" + RT;
values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName);
}
Instance inst = new SparseInstance(1.0, values);
data.add(inst);
}
return data;
}
/**
* Converts an {@link AnthURL} into an {@link Instance} which can be handled
* by the {@link Classifier}.
*
* @param url
* the {@link AnthURL} which should be transformed/converted.
* @return the resulting {@link Instance}.
*/
private static Instance convert(AnthURL url) {
if (url != null) {
Instance inst = new SparseInstance(dimension);
inst.replaceMissingValues(replaceMissingValues);
inst.setDataset(instances);
inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
Set<String> tokens = new HashSet<String>();
tokens.addAll(tokenizer(url.uri.getPath()));
tokens.addAll(tokenizer(url.uri.getQuery()));
tokens.addAll(tokenizer(url.uri.getFragment()));
for (String tok : tokens) {
inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
}
return inst;
} else {
System.out.println("Input AnthURL for convertion into instance was null.");
return null;
}
}
/**
* Converts an {@link AnthURL} into an {@link Instance} which can be handled
* by the {@link Classifier}.
*
* @param url
* the {@link AnthURL} which should be transformed/converted.
* @return the resulting {@link Instance}.
*/
private static Instance convert(AnthURL url) {
if (url != null) {
Instance inst = new SparseInstance(dimension);
inst.replaceMissingValues(replaceMissingValues);
inst.setDataset(instances);
inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
Set<String> tokens = new HashSet<String>();
tokens.addAll(tokenizer(url.uri.getPath()));
tokens.addAll(tokenizer(url.uri.getQuery()));
tokens.addAll(tokenizer(url.uri.getFragment()));
for (String tok : tokens) {
inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
}
return inst;
} else {
System.out.println("Input AnthURL for convertion into instance was null.");
return null;
}
}
/**Decides upon a "disagreed" document by applying the learned model based on the previously build model.*/
private String clarifyOnModel(String tweet){
String out = "";
// get the text-based representation of the document
double[] instanceValues = new double[2];
instanceValues[0] = test.attribute(0).addStringValue(tweet);
test.add(new SparseInstance(1.0, instanceValues));
try{
stwv.setInputFormat(test);
Instances newData = Filter.useFilter(test, stwv);
// re-order attributes so that they are compatible with the training set's ones
Instances test_instance = reformatText(newData);
// find the polarity of the document based on the previously built model
test_instance.setClassIndex(0);
double[] preds = multiNB.distributionForInstance(test_instance.get(0));
if (preds[0]>0.5)
out = "light positive";
else
out = "light negative";
} catch (Exception e){
e.printStackTrace();
}
test.remove(0);
return out;
}
/**Initializes the feature-based Instances*/
private void getFeatureInstances(){
ArrayList<Attribute> atts = new ArrayList<Attribute>(2);
ArrayList<String> classVal = new ArrayList<String>();
classVal.add("positive");
classVal.add("negative");
atts.add(new Attribute("sentimentClassAttribute",classVal));
atts.add(new Attribute("text",(ArrayList<String>)null));
Instances textRaw = new Instances("TextInstances",atts,0);
double[] instanceValue1 = new double[textRaw.numAttributes()];
instanceValue1[1] = textRaw.attribute(1).addStringValue(fp.getProcessed(tweet));
textRaw.add(new SparseInstance(1.0, instanceValue1));
feature_instances = new Instances(textRaw);
}
/**
* If skip = one of <0 ... numInstances-1>, will not include instance at that index into the corpus
* Part of leave one out cv, while avoiding unnecessary repeats of the BoP transformation
*/
private Instances tfxidf(Instances bopData, int skip) {
int numClasses = bopData.numClasses();
int numInstances = bopData.numInstances();
int numTerms = bopData.numAttributes()-1; //minus class attribute
//initialise class weights
double[][] classWeights = new double[numClasses][numTerms];
//build class bags
int inst = 0;
for (Instance in : bopData) {
if (inst++ == skip) //skip 'this' one, for leave-one-out cv
continue;
int classVal = (int)in.classValue();
for (int j = 0; j < numTerms; ++j) {
classWeights[classVal][j] += in.value(j);
}
}
//apply tf x idf
for (int i = 0; i < numTerms; ++i) { //for each term
double df = 0; //document frequency
for (int j = 0; j < numClasses; ++j) //find how many classes (documents) this term appears in
if (classWeights[j][i] != 0)
++df;
if (df != 0) { //if it appears
if (df != numClasses) { //but not in all, apply weighting
for (int j = 0; j < numClasses; ++j)
if (classWeights[j][i] != 0)
classWeights[j][i] = Math.log(1 + classWeights[j][i]) * Math.log(numClasses / df);
}
else { //appears in all
//avoid log calculations
//if df == num classes -> idf = log(N/df) = log(1) = 0
for (int j = 0; j < numClasses; ++j)
classWeights[j][i] = 0;
}
}
}
Instances tfxidfCorpus = new Instances(bopData, numClasses);
for (int i = 0; i < numClasses; ++i)
tfxidfCorpus.add(new SparseInstance(1.0, classWeights[i]));
return tfxidfCorpus;
}
/**
* Calculate Delta
*/
public double DeltaAdd(Instance inst, double r) {
//System.out.println("DeltaAdd");
int S_new;
int W_new;
double profit;
double profit_new;
double deltaprofit;
S_new = 0;
W_new = occ.size();
if (inst instanceof SparseInstance) {
//System.out.println("DeltaAddSparceInstance");
for (int i = 0; i < inst.numValues(); i++) {
S_new++;
if ((Integer) this.occ.get(inst.index(i)) == null) {
W_new++;
}
}
} else {
for (int i = 0; i < inst.numAttributes(); i++) {
if (!inst.isMissing(i)) {
S_new++;
if ((Integer) this.occ.get(i + inst.toString(i)) == null) {
W_new++;
}
}
}
}
S_new += S;
if (N == 0) {
deltaprofit = S_new / Math.pow(W_new, r);
} else {
profit = S * N / Math.pow(W, r);
profit_new = S_new * (N + 1) / Math.pow(W_new, r);
deltaprofit = profit_new - profit;
}
return deltaprofit;
}
/**
* Move instance to best cluster
*/
public int MoveInstanceToBestCluster(Instance inst) {
clusters.get(m_clusterAssignments.get(m_processed_InstanceID)).DeleteInstance(inst);
m_clusterAssignments.set(m_processed_InstanceID, -1);
double delta;
double deltamax;
int clustermax = -1;
int tempS = 0;
int tempW = 0;
if (inst instanceof SparseInstance) {
for (int i = 0; i < inst.numValues(); i++) {
tempS++;
tempW++;
}
} else {
for (int i = 0; i < inst.numAttributes(); i++) {
if (!inst.isMissing(i)) {
tempS++;
tempW++;
}
}
}
deltamax = tempS / Math.pow(tempW, m_Repulsion);
for (int i = 0; i < clusters.size(); i++) {
CLOPECluster tempcluster = clusters.get(i);
delta = tempcluster.DeltaAdd(inst, m_Repulsion);
// System.out.println("delta " + delta);
if (delta > deltamax) {
deltamax = delta;
clustermax = i;
}
}
if (clustermax == -1) {
CLOPECluster newcluster = new CLOPECluster();
clusters.add(newcluster);
newcluster.AddInstance(inst);
return clusters.size() - 1;
}
clusters.get(clustermax).AddInstance(inst);
return clustermax;
}
/**
* Transform an instance in original (unormalized) format. Convert back
* to the original space if requested.
* @param instance an instance in the original (unormalized) format
* @return a transformed instance
* @throws Exception if instance cant be transformed
*/
public Instance convertInstance(Instance instance) throws Exception {
if (m_eigenvalues == null) {
throw new Exception("convertInstance: Principal components not "
+"built yet");
}
double[] newVals = new double[m_outputNumAtts];
Instance tempInst = (Instance)instance.copy();
if (!instance.dataset().equalHeaders(m_trainHeader)) {
throw new Exception("Can't convert instance: header's don't match: "
+"PrincipalComponents\n"
+ instance.dataset().equalHeadersMsg(m_trainHeader));
}
m_replaceMissingFilter.input(tempInst);
m_replaceMissingFilter.batchFinished();
tempInst = m_replaceMissingFilter.output();
/*if (m_normalize) {
m_normalizeFilter.input(tempInst);
m_normalizeFilter.batchFinished();
tempInst = m_normalizeFilter.output();
}*/
m_nominalToBinFilter.input(tempInst);
m_nominalToBinFilter.batchFinished();
tempInst = m_nominalToBinFilter.output();
if (m_attributeFilter != null) {
m_attributeFilter.input(tempInst);
m_attributeFilter.batchFinished();
tempInst = m_attributeFilter.output();
}
if (!m_center) {
m_standardizeFilter.input(tempInst);
m_standardizeFilter.batchFinished();
tempInst = m_standardizeFilter.output();
} else {
m_centerFilter.input(tempInst);
m_centerFilter.batchFinished();
tempInst = m_centerFilter.output();
}
if (m_hasClass) {
newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex());
}
double cumulative = 0;
for (int i = m_numAttribs - 1; i >= 0; i--) {
double tempval = 0.0;
for (int j = 0; j < m_numAttribs; j++) {
tempval += (m_eigenvectors[j][m_sortedEigens[i]] *
tempInst.value(j));
}
newVals[m_numAttribs - i - 1] = tempval;
cumulative+=m_eigenvalues[m_sortedEigens[i]];
if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
break;
}
}
if (!m_transBackToOriginal) {
if (instance instanceof SparseInstance) {
return new SparseInstance(instance.weight(), newVals);
} else {
return new DenseInstance(instance.weight(), newVals);
}
} else {
if (instance instanceof SparseInstance) {
return convertInstanceToOriginal(new SparseInstance(instance.weight(),
newVals));
} else {
return convertInstanceToOriginal(new DenseInstance(instance.weight(),
newVals));
}
}
}
/**
* Transform an instance in original (unnormalized) format
* @param instance an instance in the original (unnormalized) format
* @return a transformed instance
* @throws Exception if instance can't be transformed
*/
public Instance convertInstance(Instance instance) throws Exception {
if (m_s == null) {
throw new Exception("convertInstance: Latent Semantic Analysis not " +
"performed yet.");
}
// array to hold new attribute values
double [] newValues = new double[m_outputNumAttributes];
// apply filters so new instance is in same format as training instances
Instance tempInstance = (Instance)instance.copy();
if (!instance.dataset().equalHeaders(m_trainHeader)) {
throw new Exception("Can't convert instance: headers don't match: " +
"LatentSemanticAnalysis");
}
// replace missing values
m_replaceMissingFilter.input(tempInstance);
m_replaceMissingFilter.batchFinished();
tempInstance = m_replaceMissingFilter.output();
// normalize
if (m_normalize) {
m_normalizeFilter.input(tempInstance);
m_normalizeFilter.batchFinished();
tempInstance = m_normalizeFilter.output();
}
// convert nominal attributes to binary
m_nominalToBinaryFilter.input(tempInstance);
m_nominalToBinaryFilter.batchFinished();
tempInstance = m_nominalToBinaryFilter.output();
// remove class/other attributes
if (m_attributeFilter != null) {
m_attributeFilter.input(tempInstance);
m_attributeFilter.batchFinished();
tempInstance = m_attributeFilter.output();
}
// record new attribute values
if (m_hasClass) { // copy class value
newValues[m_outputNumAttributes - 1] = instance.classValue();
}
double [][] oldInstanceValues = new double[1][m_numAttributes];
oldInstanceValues[0] = tempInstance.toDoubleArray();
Matrix instanceVector = new Matrix(oldInstanceValues); // old attribute values
instanceVector = instanceVector.times(m_transformationMatrix); // new attribute values
for (int i = 0; i < m_actualRank; i++) {
newValues[i] = instanceVector.get(0, i);
}
// return newly transformed instance
if (instance instanceof SparseInstance) {
return new SparseInstance(instance.weight(), newValues);
} else {
return new DenseInstance(instance.weight(), newValues);
}
}
/**
* adds the instance to the XML structure
*
* @param parent the parent node to add the instance node as child
* @param inst the instance to add
*/
protected void addInstance(Element parent, Instance inst) {
Element node;
Element value;
Element child;
boolean sparse;
int i;
int n;
int index;
node = m_Document.createElement(TAG_INSTANCE);
parent.appendChild(node);
// sparse?
sparse = (inst instanceof SparseInstance);
if (sparse)
node.setAttribute(ATT_TYPE, VAL_SPARSE);
// weight
if (inst.weight() != 1.0)
node.setAttribute(ATT_WEIGHT, Utils.doubleToString(inst.weight(), m_Precision));
// values
for (i = 0; i < inst.numValues(); i++) {
index = inst.index(i);
value = m_Document.createElement(TAG_VALUE);
node.appendChild(value);
if (inst.isMissing(index)) {
value.setAttribute(ATT_MISSING, VAL_YES);
}
else {
if (inst.attribute(index).isRelationValued()) {
child = m_Document.createElement(TAG_INSTANCES);
value.appendChild(child);
for (n = 0; n < inst.relationalValue(i).numInstances(); n++)
addInstance(child, inst.relationalValue(i).instance(n));
}
else {
if (inst.attribute(index).type() == Attribute.NUMERIC)
value.appendChild(m_Document.createTextNode(Utils.doubleToString(inst.value(index), m_Precision)));
else
value.appendChild(m_Document.createTextNode(validContent(inst.stringValue(index))));
}
}
if (sparse)
value.setAttribute(ATT_INDEX, "" + (index+1));
}
}