下面列出了怎么用weka.core.converters.ArffSaver的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Simple util to saveDatasets out. Useful for shapelet transform.
*
* @param dataSet
* @param fileName
*/
public static void saveDataset(Instances dataSet, String fileName) {
try {
ArffSaver saver = new ArffSaver();
saver.setMaxDecimalPlaces(MAX_DECIMAL_PLACES);
saver.setInstances(dataSet);
if (fileName.endsWith(".arff")) {
saver.setFile(new File(fileName));
} else {
saver.setFile(new File(fileName + ".arff"));
}
saver.writeBatch();
} catch (IOException ex) {
System.out.println("Error saving transformed dataset" + ex);
}
}
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
// TODO code application logic here
try {
DataSource src = new DataSource("/Users/admin/wekafiles/data/weather.numeric.arff");
Instances dt= src.getDataSet();
System.out.println(dt.toSummaryString());
ArffSaver as = new ArffSaver();
as.setInstances(dt);
as.setFile(new File("weather.arff"));
as.writeBatch();
}
catch(Exception e)
{
System.out.println(e.getMessage());
}
}
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
// TODO code application logic here
try{
DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Datasets/weather.arff");
Instances dt = src.getDataSet();
String[] op = new String[]{"-R","2-4"};
Remove rmv = new Remove();
rmv.setOptions(op);
rmv.setInputFormat(dt);
Instances nd = Filter.useFilter(dt, rmv);
ArffSaver s = new ArffSaver();
s.setInstances(nd);
s.setFile(new File("fw.arff"));
s.writeBatch();
}
catch(Exception e){
System.out.println(e.getMessage());
}
}
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
// TODO code application logic here
try {
DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Datasets/weather.arff");
Instances dt = src.getDataSet();
AttributeSelection asel = new AttributeSelection();
CfsSubsetEval evl = new CfsSubsetEval();
GreedyStepwise sh = new GreedyStepwise();
asel.setEvaluator(evl);
asel.setSearch(sh);
asel.setInputFormat(dt);
Instances nd = Filter.useFilter(dt, asel);
ArffSaver as = new ArffSaver();
as.setInstances(nd);
as.setFile(new File("weather-sel.arff"));
as.writeBatch();
}
catch(Exception e){
System.out.println(e.getMessage());
}
}
private void convertCSVtoArff(String filename) throws Exception {
// load CSV
CSVLoader loader = new CSVLoader();
loader.setSource(new File(filename));
// CSV uses no header
String[] options = new String[1];
options[0] = "-H";
loader.setOptions(options);
Instances data = loader.getDataSet();
// save ARFF
ArffSaver saver = new ArffSaver();
saver.setInstances(data);
filename = filename.replace(".csv", ".arff");
// saver.setDestination(new File(filename));
saver.setFile(new File(filename));
saver.writeBatch();
}
/**
* Keep the words we want.
*
* @param out
* @param options
* @throws Exception
*/
private void removeWords(String output, String[] options, boolean inverse) throws Exception
{
Remove remove = new Remove();
if(inverse)
{
remove.setAttributeIndices(options[1]);
remove.setInvertSelection(true);
}else
{
remove.setOptions(options);
}
remove.setInputFormat(m_instances);
Instances newData = Filter.useFilter(m_instances, remove);
ArffSaver saver = new ArffSaver();
saver.setInstances(newData);
saver.setFile(new File(output));
saver.writeBatch();
}
/**
* Main method for testing this class.
*
*
* should contain the path of input dataset and the name of
* target file scheme (see Evaluation)
*@param args arguments
*/
static public void main(String args[]) {
if (args.length == 2) {
TweetCollectionToArff ta = new SemEvalToArff();
try {
Instances dataset = ta.createDataset(args[0]);
ArffSaver saver = new ArffSaver();
saver.setInstances(dataset);
saver.setFile(new File(args[1]));
saver.writeBatch();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/**
* Main method for testing this class.
*
* should contain the path of input dataset and the name of
* target file scheme (see Evaluation)
* @param args arguments
*/
static public void main(String args[]) {
if (args.length == 2) {
TweetCollectionToArff ta = new HumanCodedToArff();
try {
Instances dataset = ta.createDataset(args[0]);
ArffSaver saver = new ArffSaver();
saver.setInstances(dataset);
saver.setFile(new File(args[1]));
saver.writeBatch();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public static void main(String[] args) throws Exception {
if (args.length != 3)
throw new IllegalArgumentException("Required parameters: <input> <attribute_indices> <output>");
System.out.println("Loading input data: " + args[0]);
Instances input = DataSource.read(args[0]);
System.out.println("Applying filter using indices: " + args[1]);
MekaClassAttributes filter = new MekaClassAttributes();
filter.setAttributeIndices(args[1]);
filter.setInputFormat(input);
Instances output = Filter.useFilter(input, filter);
System.out.println("Saving filtered data to: " + args[2]);
ArffSaver saver = new ArffSaver();
saver.setFile(new File(args[2]));
DataSink.write(saver, output);
}
/**
* When the score changes, rewrite the file.
* This is really rare in practice, so don't bother optimizing it.
*/
private static void dump_from_scratch(Collection<String> names, Timestamp start_time) throws IOException {
saved_schema_version = names.size();
FastVector attributes = new FastVector();
// Answer score names
for (String name: names)
attributes.addElement(new Attribute(name));
Instances data = new Instances("Watsonsim captured question stream", attributes, 0);
// Save the results to a file
saver = new ArffSaver();
saver.setStructure(data);
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(new File("data/weka-log." + start_time + ".arff"));
for (Score row : dataset)
saver.writeIncremental(new Instance(1.0, row.getEach(names)));
}
/**
* @param args the command line arguments
*/
public static void main(String[] args) throws Exception {
CSVLoader loader = new CSVLoader();
loader.setSource(new File("/Users/admin/Documents/NetBeansProjects/Arff2CSV/weather.csv"));
Instances data = loader.getDataSet();
ArffSaver saver = new ArffSaver();
saver.setInstances(data);
saver.setFile(new File("weather.arff"));
saver.writeBatch();
}
/**
* Save @param data to the Arff file at @param path
*/
public static void saveDataToArffFile(String path, Instances data) throws IOException{
System.out.println("\nSaving to file " + path + "...");
ArffSaver saver = new ArffSaver();
saver.setInstances(data);
saver.setFile(new File(path));
saver.writeBatch();
}
public void generateArffFile(Instances instances, String path) {
ArffSaver saver = new ArffSaver();
saver.setInstances(instances);
try {
saver.setFile(new File(path));
saver.writeBatch();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void saveSingleInstances(final Instances dataSet, final String filePath) throws IOException {
ArffSaver saver = new ArffSaver();
saver.setInstances(dataSet);
File destFile = new File(filePath);
saver.setFile(destFile);
saver.writeBatch();
}
@Test
public void test() throws Exception {
// MLPlan4BigFileInput mlplan = new MLPlan4BigFileInput(new File("testrsc/openml/41103.arff"));
String origDataSrcName = "testrsc/openml/1240.arff";
if (true) {
Instances data = new Instances(new FileReader(new File(origDataSrcName)));
data.setClassIndex(data.numAttributes() - 1);
List<Instances> split = WekaUtil.getStratifiedSplit(data, 0, .7f);
ArffSaver saver = new ArffSaver();
saver.setInstances(split.get(0));
saver.setFile(new File(origDataSrcName + ".train"));
saver.writeBatch();
saver.setInstances(split.get(1));
saver.setFile(new File(origDataSrcName + ".test"));
saver.writeBatch();
System.exit(0);
}
MLPlan4BigFileInput mlplan = new MLPlan4BigFileInput(new File(origDataSrcName + ".train"));
mlplan.setTimeout(new Timeout(5, TimeUnit.MINUTES));
mlplan.setLoggerName("testedalgorithm");
long start = System.currentTimeMillis();
Classifier c = mlplan.call();
System.out.println("Observed output: " + c + " after " + (System.currentTimeMillis() - start) + "ms. Now validating the model");
/* check quality */
Instances testData = new Instances(new FileReader(new File(origDataSrcName + ".test")));
testData.setClassIndex(testData.numAttributes() - 1);
Evaluation eval = new Evaluation(testData);
eval.evaluateModel(c, testData);
System.out.println(eval.toSummaryString());
assertNotNull(c);
}
public static void shuffle(String file, int classindex, String outputFile)
throws IOException {
// create the stream to read the data
ArffFileStream stream = new ArffFileStream(file, classindex);
InstancesHeader header = stream.getHeader();
ArrayList<Instance> instanceList = new ArrayList<Instance>();
System.out.println("Loading data ...");
int cnt = 0;
while (stream.hasMoreInstances()) {
if (++cnt % 10000 == 0) {
System.out.println("Read " + cnt + " items.");
}
instanceList.add(stream.nextInstance());
}
System.out.println("Read all items ... shuffling.");
Collections.shuffle(instanceList);
ArrayList<Attribute> attributeList = new ArrayList<Attribute>();
for (int i = 0; i < header.numAttributes(); i++) {
attributeList.add(header.attribute(i));
}
Instances dataSet = new Instances("reduced", attributeList, 2);
for (Instance inst : instanceList) {
dataSet.add(inst);
inst.setDataset(dataSet);
}
System.out.println("Writing output ...");
ArffSaver saver = new ArffSaver();
saver.setInstances(dataSet);
saver.setFile(new File(outputFile));
saver.writeBatch();
System.out.println("Done.");
}
public void saveFile(Instances dataset, String type){
ArffSaver saver = new ArffSaver();
saver.setInstances(dataset);
try {
saver.setFile(new File(folder+"train/"+type+".arff"));
saver.writeBatch();
} catch (IOException e) {
e.printStackTrace();
}
}
public void ProcessTables(String tableType)
{
DataBase();
int execCount = 0;
try {
String SQL = "SELECT * from ArtTable where HasXML='yes' and specPragmatic='"+tableType+"' order by RAND() limit 200";
Statement st = conn.createStatement();
Instances instances = CreateInstances();
FastVector fvWekaAttributes = new FastVector(128);
rs = st.executeQuery(SQL);
while (rs.next()) {
Instance iExample = processTable(rs.getInt(1));
instances.add(iExample);
execCount ++;
if(execCount>10000){
conn.close();
DataBase();
execCount = 0;
}
}
System.out.println(instances.toString());
ArffSaver saver = new ArffSaver();
saver.setInstances(instances);
saver.setFile(new File("spptest.arff"));
//saver.setDestination(new File("./data/test.arff")); // **not** necessary in 3.5.4 and later
saver.writeBatch();
} catch (Exception ex) {
ex.printStackTrace();
}
}
public void ProcessTables(int[] table_array)
{
DataBase();
int execCount = 0;
try {
String SQL = "SELECT * from ArtTable where HasXML='yes' and idTable in "+Arrays.toString(table_array);
SQL = SQL.replace("[", "(").replace("]", ")");
Statement st = conn.createStatement();
Instances instances = CreateInstances();
FastVector fvWekaAttributes = new FastVector(48);
rs = st.executeQuery(SQL);
while (rs.next()) {
Instance iExample = processTable(rs.getInt(1));
instances.add(iExample);
execCount ++;
if(execCount>10000){
conn.close();
DataBase();
execCount = 0;
}
}
System.out.println(instances.toString());
ArffSaver saver = new ArffSaver();
saver.setInstances(instances);
saver.setFile(new File("spptest10.arff"));
//saver.setDestination(new File("./data/test.arff")); // **not** necessary in 3.5.4 and later
saver.writeBatch();
} catch (Exception ex) {
ex.printStackTrace();
}
}
public static void runExperiment(ExperimentalArguments expSettings, Instances train, Instances test, SimpleBatchFilter transformer, String fullWriteLocation, String additionalDataFilePath) throws Exception{
//this is hacky, but will do.
Instances[] transforms = setContractDataAndProcess(expSettings, train, test, transformer);
//Filter.useFilter is wekas weird way
Instances transformed_train = transforms[0];
Instances transformed_test = transforms[1];
ArffSaver saver = new ArffSaver();
String transformed_train_output = fullWriteLocation + expSettings.datasetName +"_TRAIN.arff";
String transformed_test_output = fullWriteLocation + expSettings.datasetName +"_TEST.arff";
saver.setInstances(transformed_train);
saver.setFile(new File(transformed_train_output));
saver.writeBatch();
saver.setInstances(transformed_test);
saver.setFile(new File(transformed_test_output));
saver.writeBatch();
writeAdditionalTransformData(expSettings, transformer, additionalDataFilePath);
}