下面列出了org.apache.hadoop.mapred.SequenceFileOutputFormat#setOutputCompressionType ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Create a {@link RecordWriter} from path.
*/
public RecordWriter createRecordWriter(Path path) {
try {
checkInitialize();
JobConf conf = new JobConf(confWrapper.conf());
if (isCompressed) {
String codecStr = conf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname);
if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) {
//noinspection unchecked
Class<? extends CompressionCodec> codec =
(Class<? extends CompressionCodec>) Class.forName(codecStr, true,
Thread.currentThread().getContextClassLoader());
FileOutputFormat.setOutputCompressorClass(conf, codec);
}
String typeStr = conf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname);
if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) {
SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr);
SequenceFileOutputFormat.setOutputCompressionType(conf, style);
}
}
return hiveShim.getHiveRecordWriter(
conf,
hiveOutputFormatClz,
recordSerDe.getSerializedClass(),
isCompressed,
tableProperties,
path);
} catch (Exception e) {
throw new FlinkHiveException(e);
}
}
public static JobConf createDataJoinJob(String args[]) throws IOException {
String inputDir = args[0];
String outputDir = args[1];
Class inputFormat = SequenceFileInputFormat.class;
if (args[2].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileInputFormat: " + args[2]);
} else {
System.out.println("Using TextInputFormat: " + args[2]);
inputFormat = TextInputFormat.class;
}
int numOfReducers = Integer.parseInt(args[3]);
Class mapper = getClassByName(args[4]);
Class reducer = getClassByName(args[5]);
Class mapoutputValueClass = getClassByName(args[6]);
Class outputFormat = TextOutputFormat.class;
Class outputValueClass = Text.class;
if (args[7].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileOutputFormat: " + args[7]);
outputFormat = SequenceFileOutputFormat.class;
outputValueClass = getClassByName(args[7]);
} else {
System.out.println("Using TextOutputFormat: " + args[7]);
}
long maxNumOfValuesPerGroup = 100;
String jobName = "";
if (args.length > 8) {
maxNumOfValuesPerGroup = Long.parseLong(args[8]);
}
if (args.length > 9) {
jobName = args[9];
}
Configuration defaults = new Configuration();
JobConf job = new JobConf(defaults, DataJoinJob.class);
job.setJobName("DataJoinJob: " + jobName);
FileSystem fs = FileSystem.get(defaults);
fs.delete(new Path(outputDir), true);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormat(inputFormat);
job.setMapperClass(mapper);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
job.setOutputFormat(outputFormat);
SequenceFileOutputFormat.setOutputCompressionType(job,
SequenceFile.CompressionType.BLOCK);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(mapoutputValueClass);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(outputValueClass);
job.setReducerClass(reducer);
job.setNumMapTasks(1);
job.setNumReduceTasks(numOfReducers);
job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
return job;
}
public static JobConf createDataJoinJob(String args[]) throws IOException {
String inputDir = args[0];
String outputDir = args[1];
Class inputFormat = SequenceFileInputFormat.class;
if (args[2].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileInputFormat: " + args[2]);
} else {
System.out.println("Using TextInputFormat: " + args[2]);
inputFormat = TextInputFormat.class;
}
int numOfReducers = Integer.parseInt(args[3]);
Class mapper = getClassByName(args[4]);
Class reducer = getClassByName(args[5]);
Class mapoutputValueClass = getClassByName(args[6]);
Class outputFormat = TextOutputFormat.class;
Class outputValueClass = Text.class;
if (args[7].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileOutputFormat: " + args[7]);
outputFormat = SequenceFileOutputFormat.class;
outputValueClass = getClassByName(args[7]);
} else {
System.out.println("Using TextOutputFormat: " + args[7]);
}
long maxNumOfValuesPerGroup = 100;
String jobName = "";
if (args.length > 8) {
maxNumOfValuesPerGroup = Long.parseLong(args[8]);
}
if (args.length > 9) {
jobName = args[9];
}
Configuration defaults = new Configuration();
JobConf job = new JobConf(defaults, DataJoinJob.class);
job.setJobName("DataJoinJob: " + jobName);
FileSystem fs = FileSystem.get(defaults);
fs.delete(new Path(outputDir), true);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormat(inputFormat);
job.setMapperClass(mapper);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
job.setOutputFormat(outputFormat);
SequenceFileOutputFormat.setOutputCompressionType(job,
SequenceFile.CompressionType.BLOCK);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(mapoutputValueClass);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(outputValueClass);
job.setReducerClass(reducer);
job.setNumMapTasks(1);
job.setNumReduceTasks(numOfReducers);
job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
return job;
}
public static JobConf createDataJoinJob(String args[]) throws IOException {
String inputDir = args[0];
String outputDir = args[1];
Class inputFormat = SequenceFileInputFormat.class;
if (args[2].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileInputFormat: " + args[2]);
} else {
System.out.println("Using TextInputFormat: " + args[2]);
inputFormat = TextInputFormat.class;
}
int numOfReducers = Integer.parseInt(args[3]);
Class mapper = getClassByName(args[4]);
Class reducer = getClassByName(args[5]);
Class mapoutputValueClass = getClassByName(args[6]);
Class outputFormat = TextOutputFormat.class;
Class outputValueClass = Text.class;
if (args[7].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileOutputFormat: " + args[7]);
outputFormat = SequenceFileOutputFormat.class;
outputValueClass = getClassByName(args[7]);
} else {
System.out.println("Using TextOutputFormat: " + args[7]);
}
long maxNumOfValuesPerGroup = 100;
String jobName = "";
if (args.length > 8) {
maxNumOfValuesPerGroup = Long.parseLong(args[8]);
}
if (args.length > 9) {
jobName = args[9];
}
Configuration defaults = new Configuration();
JobConf job = new JobConf(defaults, DataJoinJob.class);
job.setJobName("DataJoinJob: " + jobName);
FileSystem fs = FileSystem.get(defaults);
fs.delete(new Path(outputDir));
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormat(inputFormat);
job.setMapperClass(mapper);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
job.setOutputFormat(outputFormat);
SequenceFileOutputFormat.setOutputCompressionType(job,
SequenceFile.CompressionType.BLOCK);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(mapoutputValueClass);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(outputValueClass);
job.setReducerClass(reducer);
job.setNumMapTasks(1);
job.setNumReduceTasks(numOfReducers);
job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
return job;
}
public static JobConf createDataJoinJob(String args[]) throws IOException {
String inputDir = args[0];
String outputDir = args[1];
Class inputFormat = SequenceFileInputFormat.class;
if (args[2].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileInputFormat: " + args[2]);
} else {
System.out.println("Using TextInputFormat: " + args[2]);
inputFormat = TextInputFormat.class;
}
int numOfReducers = Integer.parseInt(args[3]);
Class mapper = getClassByName(args[4]);
Class reducer = getClassByName(args[5]);
Class mapoutputValueClass = getClassByName(args[6]);
Class outputFormat = TextOutputFormat.class;
Class outputValueClass = Text.class;
if (args[7].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileOutputFormat: " + args[7]);
outputFormat = SequenceFileOutputFormat.class;
outputValueClass = getClassByName(args[7]);
} else {
System.out.println("Using TextOutputFormat: " + args[7]);
}
long maxNumOfValuesPerGroup = 100;
String jobName = "";
if (args.length > 8) {
maxNumOfValuesPerGroup = Long.parseLong(args[8]);
}
if (args.length > 9) {
jobName = args[9];
}
Configuration defaults = new Configuration();
JobConf job = new JobConf(defaults, DataJoinJob.class);
job.setJobName("DataJoinJob: " + jobName);
FileSystem fs = FileSystem.get(defaults);
fs.delete(new Path(outputDir));
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormat(inputFormat);
job.setMapperClass(mapper);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
job.setOutputFormat(outputFormat);
SequenceFileOutputFormat.setOutputCompressionType(job,
SequenceFile.CompressionType.BLOCK);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(mapoutputValueClass);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(outputValueClass);
job.setReducerClass(reducer);
job.setNumMapTasks(1);
job.setNumReduceTasks(numOfReducers);
job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
return job;
}