下面列出了怎么用weka.core.Utils的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Predicts the class memberships for a given instance. If
* an instance is unclassified, the returned array elements
* must be all zero. If the class is numeric, the array
* must consist of only one element, which contains the
* predicted value.
*
* @param instance the instance to be classified
* @return an array containing the estimated membership
* probabilities of the test instance in each class
* or the numeric prediction
* @throws Exception if distribution could not be
* computed successfully
*/
public double[] distributionForInstance(Instance instance) throws Exception {
double[] result;
result = m_ActualClassifier.getVotesForInstance(instanceConverter.samoaInstance(instance));
// ensure that the array has as many elements as there are
// class values!
if (result.length < instance.numClasses()) {
double[] newResult = new double[instance.numClasses()];
System.arraycopy(result, 0, newResult, 0, result.length);
result = newResult;
}
try {
Utils.normalize(result);
}
catch (Exception e) {
result = new double[instance.numClasses()];
}
return result;
}
/**
* Prints the condition satisfied by instances in a subset.
*/
public final String rightSide(int index,Instances data) {
StringBuffer text;
text = new StringBuffer();
if (data.attribute(m_attIndex).isNominal())
text.append(" = "+
data.attribute(m_attIndex).value(index));
else
if (index == 0)
text.append(" <= "+
Utils.doubleToString(m_splitPoint,6));
else
text.append(" > "+
Utils.doubleToString(m_splitPoint,6));
return text.toString();
}
/**
* Computes estimated errors for tree.
*
* @return the estimated errors
* @throws Exception if error estimate can't be computed
*/
private double errorsForTree() throws Exception {
double errors = 0;
if (m_isLeaf)
return errorsForLeaf();
else{
for (int i = 0; i < m_sons.length; i++)
if (Utils.eq(localModel().distribution().perBag(i), 0)) {
errors += m_test.perBag(i)-
m_test.perClassPerBag(i,localModel().distribution().
maxClass());
} else
errors += son(i).errorsForTree();
return errors;
}
}
/**
* Returns a description of the logistic model (i.e., attributes and
* coefficients).
*
* @return the description of the model
*/
public String toString(){
StringBuffer s = new StringBuffer();
//get used attributes
int[][] attributes = getUsedAttributes();
//get coefficients
double[][] coefficients = getCoefficients();
for (int j = 0; j < m_numClasses; j++) {
s.append("\nClass "+j+" :\n");
//constant term
s.append(Utils.doubleToString(coefficients[j][0],4,2)+" + \n");
for (int i = 0; i < attributes[j].length; i++) {
//attribute/coefficient pairs
s.append("["+m_numericDataHeader.attribute(attributes[j][i]).name()+"]");
s.append(" * " + Utils.doubleToString(coefficients[j][attributes[j][i]+1],4,2));
if (i != attributes[j].length - 1) s.append(" +");
s.append("\n");
}
}
return new String(s);
}
/**
* Parses a given list of options. Valid options are:<p>
*
* -W classname <br>
* Specify the full class name of the base associator.<p>
*
* Options after -- are passed to the designated associator.<p>
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String tmpStr;
tmpStr = Utils.getOption('W', options);
if (tmpStr.length() > 0) {
// This is just to set the associator in case the option
// parsing fails.
setAssociator(AbstractAssociator.forName(tmpStr, null));
setAssociator(AbstractAssociator.forName(tmpStr, Utils.partitionOptions(options)));
}
else {
// This is just to set the associator in case the option
// parsing fails.
setAssociator(AbstractAssociator.forName(defaultAssociatorString(), null));
setAssociator(AbstractAssociator.forName(defaultAssociatorString(), Utils.partitionOptions(options)));
}
}
/**
* Parses a given list of options. <p/>
*
<!-- options-start -->
<!-- options-end -->
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String binsString = Utils.getOption('B', options);
if (binsString.length() != 0) {
setNumBins(Integer.parseInt(binsString));
} else {
setNumBins(10);
}
setDeleteEmptyBins(Utils.getFlag('E', options));
setUseEqualFrequency(Utils.getFlag('F', options));
setMinimizeAbsoluteError(Utils.getFlag('A', options));
String tmpStr = Utils.getOption('K', options);
if (tmpStr.length() != 0)
setEstimatorType(new SelectedTag(Integer.parseInt(tmpStr), TAGS_ESTIMATOR));
else
setEstimatorType(new SelectedTag(ESTIMATOR_HISTOGRAM, TAGS_ESTIMATOR));
super.setOptions(options);
}
/**
* Calculates the class membership probabilities for the given test
* instance.
*
* @param instance the instance to be classified
* @return preedicted class probability distribution
* @exception Exception if distribution can't be computed successfully
*/
public double[] distributionForInstance(Instance instance) throws Exception {
double [] sums = new double [instance.numClasses()], newProbs;
for (int i = 0; i < m_NumIterations; i++) {
if (instance.classAttribute().isNumeric() == true) {
sums[0] += m_Classifiers[i].classifyInstance(instance);
} else {
newProbs = m_Classifiers[i].distributionForInstance(instance);
for (int j = 0; j < newProbs.length; j++)
sums[j] += newProbs[j];
}
}
if (instance.classAttribute().isNumeric() == true) {
sums[0] /= (double)m_NumIterations;
return sums;
} else if (Utils.eq(Utils.sum(sums), 0)) {
return sums;
} else {
Utils.normalize(sums);
return sums;
}
}
/**
* Compute the target function to minimize in gradient descent
* The formula is:<br/>
* 1/2*sum[i=1..p](f(X, Xi)-var(Y, Yi))^2 <p/>
* where p is the number of exemplars and Y is the class label.
* In the case of X=MU, f() is the Euclidean distance between two
* exemplars together with the related weights and var() is
* sqrt(numDimension)*(Y-Yi) where Y-Yi is either 0 (when Y==Yi)
* or 1 (Y!=Yi)
*
* @param x the weights of the exemplar in question
* @param rowpos row index of x in X
* @param Y the observed class label
* @return the result of the target function
*/
public double target(double[] x, double[][] X, int rowpos, double[] Y){
double y = Y[rowpos], result=0;
for(int i=0; i < X.length; i++){
if((i != rowpos) && (X[i] != null)){
double var = (y==Y[i]) ? 0.0 : Math.sqrt((double)m_Dimension - 1);
double f=0;
for(int j=0; j < m_Dimension; j++)
if(Utils.gr(m_Variance[rowpos][j], 0.0)){
f += x[j]*(X[rowpos][j]-X[i][j]) * (X[rowpos][j]-X[i][j]);
//System.out.println("i:"+i+" j: "+j+" row: "+rowpos);
}
f = Math.sqrt(f);
//System.out.println("???distance between "+rowpos+" and "+i+": "+f+"|y:"+y+" vs "+Y[i]);
if(Double.isInfinite(f))
System.exit(1);
result += 0.5 * (f - var) * (f - var);
}
}
//System.out.println("???target: "+result);
return result;
}
/**
* Calculates the class membership probabilities for the given test
* instance.
*
* @param instance the instance to be classified
* @return predicted class probability distribution
* @throws Exception if distribution can't be computed successfully
*/
public double[] distributionForInstance(Instance instance) throws Exception {
double [] sums = new double [instance.numClasses()], newProbs;
for (int i = 0; i < m_NumIterations; i++) {
if (instance.classAttribute().isNumeric() == true) {
sums[0] += m_Classifiers[i].classifyInstance(instance);
} else {
newProbs = m_Classifiers[i].distributionForInstance(instance);
for (int j = 0; j < newProbs.length; j++)
sums[j] += newProbs[j];
}
}
if (instance.classAttribute().isNumeric() == true) {
sums[0] /= (double)m_NumIterations;
return sums;
} else if (Utils.eq(Utils.sum(sums), 0)) {
return sums;
} else {
Utils.normalize(sums);
return sums;
}
}
/**
* returns a description of the search
* @return a description of the search as a String
*/
public String toString() {
StringBuffer GAString = new StringBuffer();
GAString.append("\tGenetic search.\n\tStart set: ");
if (m_starting == null) {
GAString.append("no attributes\n");
}
else {
GAString.append(startSetToString()+"\n");
}
GAString.append("\tPopulation size: "+m_popSize);
GAString.append("\n\tNumber of generations: "+m_maxGenerations);
GAString.append("\n\tProbability of crossover: "
+Utils.doubleToString(m_pCrossover,6,3));
GAString.append("\n\tProbability of mutation: "
+Utils.doubleToString(m_pMutation,6,3));
GAString.append("\n\tReport frequency: "+m_reportFrequency);
GAString.append("\n\tRandom number seed: "+m_seed+"\n");
GAString.append(m_generationReports.toString());
return GAString.toString();
}
/**
* Parses a given list of options. <p/>
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -mbc
* Applies a Markov Blanket correction to the network structure,
* after a network structure is learned. This ensures that all
* nodes in the network are part of the Markov blanket of the
* classifier node.</pre>
*
* <pre> -S [BAYES|MDL|ENTROPY|AIC|CROSS_CLASSIC|CROSS_BAYES]
* Score type (BAYES, BDeu, MDL, ENTROPY and AIC)</pre>
*
<!-- options-end -->
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
setMarkovBlanketClassifier(Utils.getFlag("mbc", options));
String sScore = Utils.getOption('S', options);
if (sScore.compareTo("BAYES") == 0) {
setScoreType(new SelectedTag(Scoreable.BAYES, TAGS_SCORE_TYPE));
}
if (sScore.compareTo("BDeu") == 0) {
setScoreType(new SelectedTag(Scoreable.BDeu, TAGS_SCORE_TYPE));
}
if (sScore.compareTo("MDL") == 0) {
setScoreType(new SelectedTag(Scoreable.MDL, TAGS_SCORE_TYPE));
}
if (sScore.compareTo("ENTROPY") == 0) {
setScoreType(new SelectedTag(Scoreable.ENTROPY, TAGS_SCORE_TYPE));
}
if (sScore.compareTo("AIC") == 0) {
setScoreType(new SelectedTag(Scoreable.AIC, TAGS_SCORE_TYPE));
}
}
/**
* Calculates the class membership probabilities for the given test instance.
*
* @param instance the instance to be classified
* @return predicted class probability distribution
* @throws Exception if instance could not be classified
* successfully
*/
public double [] distributionForInstance(Instance instance)
throws Exception {
// default model?
if (m_ZeroR != null) {
return m_ZeroR.distributionForInstance(instance);
}
if (m_NumIterationsPerformed == 0) {
throw new Exception("No model built");
}
double [] sums = new double [instance.numClasses()];
if (m_NumIterationsPerformed == 1) {
return m_Classifiers[0].distributionForInstance(instance);
} else {
for (int i = 0; i < m_NumIterationsPerformed; i++) {
sums[(int)m_Classifiers[i].classifyInstance(instance)] += m_Betas[i];
}
return Utils.logs2probs(sums);
}
}
/**
* Returns index of subset instance is assigned to.
* Returns -1 if instance is assigned to more than one subset.
*
* @exception Exception if something goes wrong
*/
public final int whichSubset(Instance instance) throws Exception {
if (instance.isMissing(m_attIndex))
return -1;
else{
if (instance.attribute(m_attIndex).isNominal()){
if ((int)m_splitPoint == (int)instance.value(m_attIndex))
return 0;
else
return 1;
}else
if (Utils.smOrEq(instance.value(m_attIndex),m_splitPoint))
return 0;
else
return 1;
}
}
/**
* Calculates the class membership probabilities for the given test
* instance.
*
* @param instance the instance to be classified
* @return predicted class probability distribution
* @throws Exception if there is a problem generating the prediction
*/
public double [] distributionForInstance(Instance instance) throws Exception
{
double[] probOfClassGivenDoc = new double[m_numClasses];
//calculate the array of log(Pr[D|C])
double[] logDocGivenClass = new double[m_numClasses];
for(int h = 0; h<m_numClasses; h++)
logDocGivenClass[h] = probOfDocGivenClass(instance, h);
double max = logDocGivenClass[Utils.maxIndex(logDocGivenClass)];
double probOfDoc = 0.0;
for(int i = 0; i<m_numClasses; i++)
{
probOfClassGivenDoc[i] = Math.exp(logDocGivenClass[i] - max) * m_probOfClass[i];
probOfDoc += probOfClassGivenDoc[i];
}
Utils.normalize(probOfClassGivenDoc,probOfDoc);
return probOfClassGivenDoc;
}
/**
* calculates the level of consistency in a dataset using a subset of
* features. The consistency of a hash table entry is the total number
* of instances hashed to that location minus the number of instances in
* the largest class hashed to that location. The total consistency is
* 1.0 minus the sum of the individual consistencies divided by the
* total number of instances.
* @return the consistency of the hash table as a value between 0 and 1.
*/
private double consistencyCount() {
Enumeration e = m_table.keys();
double [] classDist;
double count = 0.0;
while (e.hasMoreElements()) {
hashKey tt = (hashKey)e.nextElement();
classDist = (double []) m_table.get(tt);
count += Utils.sum(classDist);
int max = Utils.maxIndex(classDist);
count -= classDist[max];
}
count /= (double)m_numInstances;
return (1.0 - count);
}
/**
* This method computes the information gain in the same way
* C4.5 does.
*
* @param bags the distribution
* @param totalNoInst weight of ALL instances
* @param oldEnt entropy with respect to "no-split"-model.
*/
public final double splitCritValue(Distribution bags,double totalNoInst,
double oldEnt) {
double numerator;
double noUnknown;
double unknownRate;
int i;
noUnknown = totalNoInst-bags.total();
unknownRate = noUnknown/totalNoInst;
numerator = (oldEnt-newEnt(bags));
numerator = (1-unknownRate)*numerator;
// Splits with no gain are useless.
if (Utils.eq(numerator,0))
return 0;
return numerator/bags.total();
}
/**
* Returns the fraction of all attributes in the data that are used in the
* logistic model (in percent).
* An attribute is used in the model if it is used in any of the models for
* the different classes.
*
* @return the fraction of all attributes that are used
*/
public double percentAttributesUsed(){
boolean[] attributes = new boolean[m_numericDataHeader.numAttributes()];
double[][] coefficients = getCoefficients();
for (int j = 0; j < m_numClasses; j++){
for (int i = 1; i < m_numericDataHeader.numAttributes() + 1; i++) {
//attribute used if it is used in any class, note coefficients are shifted by one (because
//of constant term).
if (!Utils.eq(coefficients[j][i],0)) attributes[i - 1] = true;
}
}
//count number of used attributes (without the class attribute)
double count = 0;
for (int i = 0; i < attributes.length; i++) if (attributes[i]) count++;
return count / (double)(m_numericDataHeader.numAttributes() - 1) * 100.0;
}
public String toString(){
StringBuffer text = new StringBuffer();
text.append("\nsIB\n===\n");
text.append("\nNumber of clusters: " + m_numCluster + "\n");
for (int j = 0; j < m_numCluster; j++) {
text.append("\nCluster: " + j + " Size : " + bestT.size(j) + " Prior probability: "
+ Utils.doubleToString(bestT.Pt[j], 4) + "\n\n");
for (int i = 0; i < m_numAttributes; i++) {
text.append("Attribute: " + m_data.attribute(i).name() + "\n");
text.append("Probability given the cluster = "
+ Utils.doubleToString(bestT.Py_t.get(i, j), 4)
+ "\n");
}
}
return text.toString();
}
/**
* Rank Matrix
*/
public static int[][] rankMatrix(List<EvaluationStatistics> stats, String measurement) {
double V[][] = valueMatrix(stats,measurement);
int N = V.length;
int k = V[0].length;
int R[][] = new int[N][k];
for (int i = 0; i < N; i++) {
int indices[] = Utils.sort(V[i]);
// add 1 to each
for (int j = 0; j < k; j++) {
R[i][indices[j]] = (j+1);
}
}
return R;
}
/**
* Performs the actual initialization of the filters.
*/
@Override
protected void doInitializeFilters() {
List<String> filters = PluginManager.getPluginNamesOfTypeList(FileBasedEvaluationStatisticsHandler.class.getName());
m_FileFilters = new ArrayList<>();
for (String filter: filters) {
try {
FileBasedEvaluationStatisticsHandler handler = (FileBasedEvaluationStatisticsHandler) Utils.forName(
FileBasedEvaluationStatisticsHandler.class, filter, new String[0]);
m_FileFilters.add(new ExtensionFileFilterWithClass(
handler.getFormatExtensions(),
handler.getFormatDescription() + " (" + ObjectUtils.flatten(handler.getFormatExtensions(), ", ") + ")",
filter));
}
catch (Exception e) {
System.err.println("Failed to instantiate file filter: " + filter);
e.printStackTrace();
}
}
}
/**
* Classifies an instance.
*
* @exception Exception if instance can't be classified
*/
public double classifyInstance(Instance instance)
throws Exception {
double maxProb = -1;
double [] sumProbs;
int maxIndex = 0;
sumProbs = distributionForInstance(instance);
for (int j = 0; j < sumProbs.length; j++) {
if (Utils.gr(sumProbs[j],maxProb)){
maxIndex = j;
maxProb = sumProbs[j];
}
}
return (double)maxIndex;
}
/**
* Builds the tree structure with hold out set
*
* @param train the data for which the tree structure is to be
* generated.
* @param test the test data for potential pruning
* @param keepData is training Data to be kept?
* @throws Exception if something goes wrong
*/
public void buildTree(Instances train, Instances test, boolean keepData)
throws Exception {
Instances [] localTrain, localTest;
int i;
if (keepData) {
m_train = train;
}
m_isLeaf = false;
m_isEmpty = false;
m_sons = null;
m_localModel = m_toSelectModel.selectModel(train, test);
m_test = new Distribution(test, m_localModel);
if (m_localModel.numSubsets() > 1) {
localTrain = m_localModel.split(train);
localTest = m_localModel.split(test);
train = test = null;
m_sons = new ClassifierTree [m_localModel.numSubsets()];
for (i=0;i<m_sons.length;i++) {
m_sons[i] = getNewTree(localTrain[i], localTest[i]);
localTrain[i] = null;
localTest[i] = null;
}
}else{
m_isLeaf = true;
if (Utils.eq(train.sumOfWeights(), 0))
m_isEmpty = true;
train = test = null;
}
}
/**
* Parses a given list of options. <p/>
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -W <evaluator specification>
* Full name of base evaluator to use, followed by evaluator options.
* eg: "weka.attributeSelection.InfoGainAttributeEval -M"</pre>
*
* <pre> -F <filter specification>
* Full class name of filter to use, followed
* by filter options.
* eg: "weka.filters.supervised.instance.SpreadSubsample -M 1"</pre>
*
<!-- options-end -->
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String evaluator = Utils.getOption('W', options);
if (evaluator.length() > 0) {
String[] evaluatorSpec = Utils.splitOptions(evaluator);
if (evaluatorSpec.length == 0) {
throw new IllegalArgumentException("Invalid evaluator specification string");
}
String evaluatorName = evaluatorSpec[0];
evaluatorSpec[0] = "";
setAttributeEvaluator((ASEvaluation)Utils.forName(AttributeEvaluator.class,
evaluatorName, evaluatorSpec));
} else {
setAttributeEvaluator(new InfoGainAttributeEval());
}
// Same for filter
String filterString = Utils.getOption('F', options);
if (filterString.length() > 0) {
String [] filterSpec = Utils.splitOptions(filterString);
if (filterSpec.length == 0) {
throw new IllegalArgumentException("Invalid filter specification string");
}
String filterName = filterSpec[0];
filterSpec[0] = "";
setFilter((Filter) Utils.forName(Filter.class, filterName, filterSpec));
} else {
setFilter(new weka.filters.supervised.instance.SpreadSubsample());
}
}
/**
* Gets the classifier specification string, which contains the
* class name of the classifier and any options to the classifier
*
* @return the classifier string.
*/
protected String getClassifierSpec() {
Classifier c = getClassifier();
return c.getClass().getName() + " "
+ Utils.joinOptions(((OptionHandler)c).getOptions());
}
/**
* Gets the upper and lower boundary for the radius of the clusters.
*
* @return the string containing the upper and lower boundary for
* the radius of the clusters, separated by ..
*/
protected String getRadiuses() {
String fromTo = ""
+ Utils.doubleToString(getMinRadius(), 2) + ".."
+ Utils.doubleToString(getMaxRadius(), 2);
return fromTo;
}
/**
* Prints out the classifier.
*
* @return a description of the classifier as a string
*/
public String toString() {
if (m_weights == null) {
return "SPegasos: No model built yet.\n";
}
StringBuffer buff = new StringBuffer();
buff.append("Loss function: ");
if (m_loss == HINGE) {
buff.append("Hinge loss (SVM)\n\n");
} else {
buff.append("Log loss (logistic regression)\n\n");
}
int printed = 0;
for (int i = 0 ; i < m_weights.length - 1; i++) {
if (i != m_data.classIndex()) {
if (printed > 0) {
buff.append(" + ");
} else {
buff.append(" ");
}
buff.append(Utils.doubleToString(m_weights[i], 12, 4) +
" " + ((m_normalize != null) ? "(normalized) " : "")
+ m_data.attribute(i).name() + "\n");
printed++;
}
}
if (m_weights[m_weights.length - 1] > 0) {
buff.append(" + " + Utils.doubleToString(m_weights[m_weights.length - 1], 12, 4));
} else {
buff.append(" - " + Utils.doubleToString(-m_weights[m_weights.length - 1], 12, 4));
}
return buff.toString();
}
public void setOptions(String[] options) throws Exception {
String maxLagString=Utils.getOption('L', options);
if (maxLagString.length() != 0)
this.maxLag = Integer.parseInt(maxLagString);
else
this.maxLag = DEFAULT_MAXLAG;
}
/**
* Parses a given list of options. Valid options are:<p>
*
* -K <br>
* Use kernel estimation for modelling numeric attributes rather than
* a single normal distribution.<p>
*
* -D <br>
* Use supervised discretization to process numeric attributes.
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
//These are just examples, modify to suit your algorithm
// boolean k = Utils.getFlag('K', options);
// boolean d = Utils.getFlag('D', options);
// if (k && d) {
// throw new IllegalArgumentException(
// "Can't use both kernel density estimation and discretization!");
// }
// setUseSupervisedDiscretization(d);
// setUseKernelEstimator(k);
roa = (Utils.getOptionPos("P",options) >= 0) ? Double.parseDouble(Utils.getOption("P", options)) : roa;
m_userankstoclass= (Utils.getOptionPos("K",options) >= 0);
super.setOptions(options);
}
public static double L_RankLoss(int y[], double rpred[]) {
// works with missing
double[][] aligned = align(y, rpred);
y = toIntArray(aligned[0]);
rpred = aligned[1];
int r[] = Utils.sort(rpred);
return L_RankLoss(y, r);
}
/**
* Calculates the distance between two instances
*
* @param first the first instance
* @param second the second instance
* @return the distance between the two given instances
*/
private double distance(Instance first, double[] mean, double[] var, int pos) {
double diff, distance = 0;
for(int i = 0; i < m_Dimension; i++) {
// If attribute is numeric
if(first.attribute(i).isNumeric()){
if (!first.isMissing(i)){
diff = first.value(i) - mean[i];
if(Utils.gr(var[i], m_ZERO))
distance += m_Change[pos][i] * var[i] * diff * diff;
else
distance += m_Change[pos][i] * diff * diff;
}
else{
if(Utils.gr(var[i], m_ZERO))
distance += m_Change[pos][i] * var[i];
else
distance += m_Change[pos][i] * 1.0;
}
}
}
return distance;
}