下面列出了com.google.common.io.Files#newWriter ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@BeforeClass
public static void createSourceDataset() throws Exception {
repoUri = "hdfs://" + getDFS().getUri().getAuthority() + "/tmp/data";
TestUtil.run("delete", source, "-r", repoUri, "-d", "target/data");
String csv = "/tmp/users.csv";
BufferedWriter writer = Files.newWriter(
new File(csv), CSVSchemaCommand.SCHEMA_CHARSET);
writer.append("id,username,email\n");
writer.append("1,test,[email protected]\n");
writer.append("2,user,[email protected]\n");
writer.append("3,user3,[email protected]\n");
writer.append("4,user4,[email protected]\n");
writer.append("5,user5,[email protected]\n");
writer.append("6,user6,[email protected]\n");
writer.close();
TestUtil.run("-v", "csv-schema", csv, "-o", avsc, "--class", "User",
"--require", "id");
TestUtil.run("create", source, "-s", avsc,
"-r", repoUri, "-d", "target/data");
TestUtil.run("csv-import", csv, source, "-r", repoUri, "-d", "target/data");
}
@BeforeClass
public static void createSourceDataset() throws Exception {
TestUtil.run("delete", source, "--use-local", "-d", "target/data");
String csv = "target/users.csv";
BufferedWriter writer = Files.newWriter(
new File(csv), CSVSchemaCommand.SCHEMA_CHARSET);
writer.append("id,username,email\n");
writer.append("1,test,[email protected]\n");
writer.append("2,user,[email protected]\n");
writer.close();
TestUtil.run("-v", "csv-schema", csv, "-o", avsc, "--class", "User");
TestUtil.run("create", source, "-s", avsc,
"--use-local", "-d", "target/data");
TestUtil.run("csv-import", csv, source, "--use-local", "-d", "target/data");
}
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
String featureConstructorName = UimaContextHelper.getConfigParameterStringValue(context,
"feature-constructor");
featureConstructor = ProviderCache.getProvider(featureConstructorName,
FeatureConstructorProvider.class);
String classifierName = UimaContextHelper.getConfigParameterStringValue(context, "classifier");
classifier = ProviderCache.getProvider(classifierName, ClassifierProvider.class);
String predictFilename = UimaContextHelper.getConfigParameterStringValue(context,
"predict-file", null);
limit = UimaContextHelper.getConfigParameterIntValue(context, "limit", 1);
if (predictFilename != null) {
try {
predictFileWriter = Files.newWriter(new File(predictFilename), Charsets.UTF_8);
} catch (FileNotFoundException e) {
throw new ResourceInitializationException(e);
}
}
}
@BeforeClass
public static void createDatasetFromCSV() throws Exception {
String sample = "target/users.csv";
String avsc = "target/user.avsc";
BufferedWriter writer = Files.newWriter(
new File(sample), CSVSchemaCommand.SCHEMA_CHARSET);
writer.append("id,username,email\n");
writer.append("1,test,[email protected]\n");
writer.append("2,user,[email protected]\n");
writer.close();
TestUtil.run("delete", "users", "--use-local", "-d", "target/data");
TestUtil.run("-v", "csv-schema", sample, "-o", avsc, "--class", "User");
TestUtil.run("-v", "create", "users",
"--use-local", "-d", "target/data", "-s", avsc);
TestUtil.run("-v", "csv-import", sample,
"--use-local", "-d", "target/data", "users");
}
@Override
public void collectionProcessComplete() throws AnalysisEngineProcessException {
super.collectionProcessComplete();
if (featureFilename != null) {
try {
BufferedWriter bw = Files.newWriter(new File(featureFilename), Charsets.UTF_8);
Set<String> feats = feat2value.columnKeySet();
bw.write("\t\t" + feats.stream().collect(joining("\t")) + "\n");
bw.write(feat2value.rowMap().entrySet().stream().map(e -> e.getKey() + "\t" +
feats.stream().map(feat -> e.getValue().getOrDefault(feat, 0.0))
.map(String::valueOf).collect(joining("\t"))).collect(joining("\n")));
bw.close();
} catch (IOException ex) {
throw new AnalysisEngineProcessException(ex);
}
}
}
@Override
public void collectionProcessComplete() throws AnalysisEngineProcessException {
if (LOG.isInfoEnabled()) {
LOG.info("Total true count: {}", Y.stream().filter("true"::equals).count());
LOG.info("Total false count: {}", Y.stream().filter("false"::equals).count());
}
super.collectionProcessComplete();
if (cvPredictFile != null) {
try (BufferedWriter bw = Files.newWriter(new File(cvPredictFile), Charsets.UTF_8)) {
List<Double> results = classifier.crossTrainInfer(X, Y, resampleType, "true");
for (int i = 0; i < iduris.size(); i++) {
bw.write(iduris.get(i) + "\t" + results.get(i) + "\n");
}
bw.close();
} catch (IOException e) {
throw new AnalysisEngineProcessException(e);
}
}
classifier.train(X, Y, resampleType, true);
}
@Test
public void testDirectoryImport() throws Exception {
new File("target/sample").mkdir();
BufferedWriter writer = Files.newWriter(
new File("target/sample/one.csv"), CSVSchemaCommand.SCHEMA_CHARSET);
writer.append("id,username,email\n");
writer.append("1,test,[email protected]\n");
writer.close();
writer = Files.newWriter(
new File("target/sample/two.csv"), CSVSchemaCommand.SCHEMA_CHARSET);
writer.append("id,username,email\n");
writer.append("2,user,[email protected]\n");
writer.close();
command.targets = Lists.newArrayList("target/sample", datasetName);
command.run();
Assert.assertEquals("Should contain expected records",
expected, DatasetTestUtilities.materialize(dataset));
verify(console).trace(contains("repo:file:target/data"));
verify(console).info("Added {} records to \"{}\"", 2l, datasetName);
verifyNoMoreInteractions(console);
}
@BeforeClass
public static void setup() throws IOException {
localfs = LocalFileSystem.getInstance();
BufferedWriter writer = Files.newWriter(
new File(userFile.toString()), Charset.forName("UTF-8"));
for (String line : lines) {
writer.write(line);
writer.newLine();
}
writer.flush();
writer.close();
}
@Test
public void rstIndex() throws IOException, TemplateException {
final File outputFile = new File(this.outputDirectory, "index.rst");
final String templateName = "rst/index.rst.ftl";
Template template = configuration.getTemplate(templateName);
try (Writer writer = Files.newWriter(outputFile, Charsets.UTF_8)) {
process(writer, template, this.plugin);
}
}
@Test
public void readmeMD() throws IOException, TemplateException {
final File outputFile = new File("target", "README.md");
Template template = configuration.getTemplate("md/README.md.ftl");
try (Writer writer = Files.newWriter(outputFile, Charsets.UTF_8)) {
process(writer, template, this.plugin);
}
}
@Override
public void collectionProcessComplete() throws AnalysisEngineProcessException {
super.collectionProcessComplete();
if (cvPredictFile != null) {
try (BufferedWriter bw = Files.newWriter(new File(cvPredictFile), Charsets.UTF_8)) {
Set<Double> f1s = new HashSet<>();
List<List<String>> results = classifier
.crossTrainPredictMultiLabel(trainX, trainY, RESAMPLE_TYPE, limit);
for (int i = 0; i < qids.size(); i++) {
String qid = qids.get(i);
List<String> predLabels = results.get(i);
// calculate f1
Set<String> gsLabels = qid2labels.get(qid);
f1s.add(2.0 * Sets.intersection(gsLabels, ImmutableSet.copyOf(predLabels)).size() /
(gsLabels.size() + predLabels.size()));
// write to file
bw.write(qid + "\t" + predLabels.stream().collect(joining(";")) + "\n");
}
f1s.stream().mapToDouble(Double::doubleValue).average()
.ifPresent(f1 -> LOG.info("Micro F1: {}", f1));
bw.close();
} catch (IOException e) {
throw new AnalysisEngineProcessException(e);
}
}
LOG.info("Train Classifier");
// changed CV to false, as a "micro f1" will be calculated if the cvPredictFile is specifie
classifier.trainMultiLabel(trainX, trainY, RESAMPLE_TYPE, false);
}
@BeforeClass
public static void createCSVSchemaAndSample() throws Exception {
sample = "target/users.csv";
avsc = "target/user.avsc";
datasetName = "users";
BufferedWriter writer = Files.newWriter(
new File(sample), CSVSchemaCommand.SCHEMA_CHARSET);
writer.append("id,username,email\n");
writer.append("1,test,[email protected]\n");
writer.append("2,user,[email protected]\n");
writer.close();
TestUtil.run("-v", "csv-schema", sample, "-o", avsc, "--class", "User",
"--require", "id");
GenericRecordBuilder builder = new GenericRecordBuilder(
new Schema.Parser().parse(new File(avsc)));
builder.set("id", 1l);
builder.set("username", "test");
builder.set("email", "[email protected]");
expected.add(builder.build());
builder.set("id", 2l);
builder.set("username", "user");
builder.set("email", "[email protected]");
expected.add(builder.build());
}
@BeforeClass
public static void createDatasetFromCSV() throws Exception {
String sample = "target/users.csv";
String avsc = "target/user.avsc";
BufferedWriter writer = Files.newWriter(
new File(sample), CSVSchemaCommand.SCHEMA_CHARSET);
writer.append("id,username,email\n");
writer.append("1,test,[email protected]\n");
writer.append("2,user,[email protected]\n");
writer.close();
TestUtil.run("delete", "dataset:file:target/data/users");
TestUtil.run("-v", "csv-schema", sample, "-o", avsc, "--class", "User");
TestUtil.run("-v", "create", "dataset:file:target/data/users", "-s", avsc,
"-f", "parquet");
TestUtil.run("-v", "csv-import", sample, "dataset:file:target/data/users");
USER_SCHEMA = Schemas.fromAvsc(new File(avsc));
FileSystem fs = LocalFileSystem.getInstance();
FileStatus[] stats = fs.listStatus(new Path("target/data/users"));
Path parquetFile = null;
for (FileStatus stat : stats) {
if (stat.getPath().toString().endsWith(".parquet")) {
parquetFile = stat.getPath();
break;
}
}
// make a directory with the Parquet file
fs.mkdirs(existingDataPath);
fs.copyFromLocalFile(parquetFile, existingDataPath);
fs.mkdirs(existingPartitionedPathWithPartition);
fs.copyFromLocalFile(parquetFile, existingPartitionedPathWithPartition);
}
@Test
public void testCSVSchemaToHDFSFile() throws Exception {
String csvSample = "target/users.csv";
BufferedWriter writer = Files.newWriter(
new File(csvSample), CSVSchemaCommand.SCHEMA_CHARSET);
writer.append("id, username, email\n");
writer.append("1, test, [email protected]\n");
writer.close();
Schema schema = SchemaBuilder.record("User").fields()
.optionalLong("id")
.optionalString("username")
.optionalString("email")
.endRecord();
String hdfsSchemaPath = "hdfs:/tmp/schemas/csv.avsc";
CSVSchemaCommand command = new CSVSchemaCommand(console);
command.setConf(getConfiguration());
command.samplePaths = Lists.newArrayList(csvSample);
command.outputPath = hdfsSchemaPath;
command.recordName = "User";
int rc = command.run();
Assert.assertEquals("Should return success code", 0, rc);
String fileContent = CharStreams.toString(
new InputStreamReader(getDFS().open(new Path(hdfsSchemaPath)), "utf8"));
Assert.assertTrue("File should contain pretty printed schema",
TestUtil.matchesSchema(schema).matches(fileContent));
verifyNoMoreInteractions(console);
}
@RequiresNonNull("startupLogger")
private static void maskCentralData(File dataDir) throws Exception {
File maskScriptFile = File.createTempFile("mask-central-data", ".sql");
PrintWriter out = new PrintWriter(Files.newWriter(maskScriptFile, UTF_8));
try {
// mask agent ids and agent rollup ids
out.println("update trace set headline = left(headline, position(': ', headline) + 1)"
+ " || " + applyHash("substr(headline, position(': ', headline) + 2)")
+ " where transaction_type <> 'Web' and headline like '%: %';");
// mask query strings
out.println("update trace set headline = left(headline, position('?', headline))"
+ " || " + applyHash("substr(headline, position('?', headline) + 1)")
+ " where transaction_type = 'Web' and headline like '%?%';");
// mask usernames
out.println("update trace set user = " + applyHash("user")
+ " where transaction_type = 'Web'" + " and user is not null;");
} finally {
out.close();
}
RunScript.main("-url", "jdbc:h2:" + dataDir.getPath() + File.separator + "data", "-user",
"sa", "-script", maskScriptFile.getPath());
if (!maskScriptFile.delete()) {
startupLogger.info("failed to clean-up, cannot delete file: {}",
maskScriptFile.getPath());
}
// re-create data file to eliminate any trace of previous values
recover(dataDir);
}
/**
* Store scene in file
* */
public void writeScene(Scene scene)
throws IOException {
File file = new File(scene.getSceneRoot(), scene.getName());
File parent = file.getParentFile();
if (!parent.exists() && !parent.mkdirs()) {
throw new IllegalStateException("Failed to create new directory: " + parent);
}
BufferedWriter bufferedWriter = Files.newWriter(file, Charset.forName(SceneSerializationConstant.FILE_CHARSET));
SceneSerializer sceneSerializer = new SceneSerializer();
sceneSerializer.serialize(scene, bufferedWriter);
}
private java.io.File createBackupFile(Object backup) throws IOException {
java.io.File backupFile = new java.io.File(TEST);
BufferedWriter writer = Files.newWriter(backupFile, Charset.defaultCharset());
writer.write(backup.toString());
writer.flush();
writer.close();
return backupFile;
}
@Before
public void createDatasets() throws Exception {
repoUri = "hdfs://" + getDFS().getUri().getAuthority() + "/tmp/data";
TestUtil.run("delete", unpartitioned, "-r", repoUri, "-d", "target/data");
File csvFile = temp.newFile("users.csv");
csvFile.delete();
String csv = csvFile.toString();
BufferedWriter writer = Files.newWriter(
csvFile, CSVSchemaCommand.SCHEMA_CHARSET);
writer.append("id,username,email\n");
numRecords = 30;
for(int i = 0; i < numRecords; i++) {
writer.append(i+",test"+i+",test"+i+"@example.com\n");
}
writer.close();
TestUtil.run("-v", "csv-schema", csv, "-o", avsc, "--class", "User");
TestUtil.run("create", unpartitioned, "-s", avsc,
"-r", repoUri, "-d", "target/data");
URI dsUri = URIBuilder.build("repo:" + repoUri, "default", partitioned);
Datasets.<Object, Dataset<Object>>create(dsUri, new DatasetDescriptor.Builder()
.partitionStrategy(new PartitionStrategy.Builder()
.hash("id", 2)
.build())
.schema(SchemaBuilder.record("User").fields()
.requiredLong("id")
.optionalString("username")
.optionalString("email")
.endRecord())
.build(), Object.class);
TestUtil.run("csv-import", csv, unpartitioned, "-r", repoUri, "-d", "target/data");
TestUtil.run("csv-import", csv, partitioned, "-r", repoUri, "-d", "target/data");
}
public static void main(final String[] args) throws IOException {
// TODO re-run BIDE...
final int topN = 50;
final String baseDir = "/afs/inf.ed.ac.uk/user/j/jfowkes/Code/Sequences/";
final String[] datasets = new String[] { "alice_punc", "GAZELLE1", "jmlr", "SIGN", "aslbu", "aslgt", "auslan2",
"context", "pioneer", "skating" };
// Set up logging
final FileOutputStream outFile = new FileOutputStream(baseDir + "redundancy.txt");
final TeeOutputStream out = new TeeOutputStream(System.out, outFile);
final PrintStream ps = new PrintStream(out);
System.setOut(ps);
final Writer writer = Files.newWriter(new File(baseDir + "redundancy.tex"), Charsets.UTF_8);
for (int i = 0; i < datasets.length; i++) {
System.out.println("===== Dataset: " + datasets[i]);
// ISM sequences
final Map<Sequence, Double> intSequences = SequenceMiningCore
.readISMSequences(new File(baseDir + "Logs/" + datasets[i] + ".log"));
calculateRedundancyStats("ISM", intSequences, topN, writer);
// SQS sequences
final Map<Sequence, Double> sqsSequences = StatisticalSequenceMining
.readSQSSequences(new File(baseDir + "SQS/" + datasets[i] + ".txt"));
calculateRedundancyStats("SQS", sqsSequences, topN, writer);
// GoKrimp sequences
final Map<Sequence, Double> gokrimpSequences = StatisticalSequenceMining
.readGoKrimpSequences(new File(baseDir + "GoKrimp/" + datasets[i] + ".txt"));
calculateRedundancyStats("GoKrimp", gokrimpSequences, topN, writer);
// BIDE sequences
final Map<Sequence, Integer> bideSequences = FrequentSequenceMining
.readFrequentSequences(new File(baseDir + "BIDE/" + datasets[i] + ".txt"));
calculateRedundancyStats("BIDE", bideSequences, topN, writer);
System.out.println();
}
writer.close();
}
/**
* Constructs a new {@link XmlReporter}
*
* @param client the client
* @param output the output file
* @throws IOException if an error occurs
*/
public XmlReporter(LintCliClient client, File output) throws IOException {
super(client, output);
mWriter = new BufferedWriter(Files.newWriter(output, Charsets.UTF_8));
}