下面列出了org.apache.hadoop.mapred.lib.db.DBConfiguration#org.apache.avro.mapred.AvroJob 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Creates a JobConf for a map-reduce job. Loads the input schema from the input files.
*
* @param mapperClass AvroMapper subclass for the mapper.
* @param reducerClass AvroReducer subclass for the reducer.
* @param mapperOutputSchema Mapper output schema. Must be an instance of org.apache.avro.mapred.Pair
* @param outputSchema Reducer output schema
* @return A configured JobConf.
* @throws IOException
* @throws URISyntaxException
*/
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
Class<? extends AvroReducer> reducerClass,
Schema mapperOutputSchema,
Schema outputSchema) throws IOException, URISyntaxException
{
JobConf conf = createJobConf();
AvroJob.setMapperClass(conf, mapperClass);
AvroJob.setReducerClass(conf, reducerClass);
AvroJob.setMapOutputSchema(conf, mapperOutputSchema);
AvroJob.setOutputSchema(conf, outputSchema);
return conf;
}
/**
* Creates a JobConf for a map-reduce job that uses a combiner. Loads the input schema from the
* input files.
*
* @param mapperClass AvroMapper subclass for the mapper.
* @param reducerClass AvroReducer subclass for the reducer.
* @param combinerClass AvroReducer subclass for the combiner.
* @param mapperOutputSchema Mapper output schema. Must be an instance of org.apache.avro.mapred.Pair
* @param outputSchema Reducer output schema
* @return A configured JobConf.
* @throws IOException
* @throws URISyntaxException
*/
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
Class<? extends AvroReducer> reducerClass,
Class<? extends AvroReducer> combinerClass,
Schema mapperOutputSchema,
Schema outputSchema) throws IOException, URISyntaxException
{
JobConf conf = createJobConf();
AvroJob.setMapperClass(conf, mapperClass);
AvroJob.setReducerClass(conf, reducerClass);
AvroJob.setCombinerClass(conf, combinerClass);
AvroJob.setMapOutputSchema(conf, mapperOutputSchema);
AvroJob.setOutputSchema(conf, outputSchema);
return conf;
}
/**
* Creates a JobConf for a map-only job with an explicitly set input Schema.
*
* @param mapperClass AvroMapper subclass implementing the map phase
* @param inputSchema Schema of the input data.
* @param outputSchema Schema of the mapper output
* @return A configured JobConf.
* @throws IOException
* @throws URISyntaxException
*/
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
Schema inputSchema,
Schema outputSchema) throws IOException, URISyntaxException
{
JobConf conf = createJobConf();
AvroJob.setMapperClass(conf, mapperClass);
AvroJob.setReducerClass(conf, AvroReducer.class);
AvroJob.setInputSchema(conf, inputSchema);
AvroJob.setOutputSchema(conf, outputSchema);
conf.setNumReduceTasks(0);
return conf;
}
/**
* Creates a JobConf for a map-reducer job with an explicitly set input schema.
*
* @param mapperClass AvroMapper subclass for the mapper.
* @param reducerClass AvroReducer subclass for the reducer.
* @param inputSchema Schema of the input data.
* @param mapperOutputSchema Mapper output schema. Must be an instance of org.apache.avro.mapred.Pair
* @param outputSchema Reducer output schema
* @return A configured JobConf.
* @throws IOException
* @throws URISyntaxException
*/
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
Class<? extends AvroReducer> reducerClass,
Schema inputSchema,
Schema mapperOutputSchema,
Schema outputSchema) throws IOException, URISyntaxException
{
JobConf conf = createJobConf();
AvroJob.setMapperClass(conf, mapperClass);
AvroJob.setReducerClass(conf, reducerClass);
AvroJob.setInputSchema(conf, inputSchema);
AvroJob.setMapOutputSchema(conf, mapperOutputSchema);
AvroJob.setOutputSchema(conf, outputSchema);
return conf;
}
/**
* Creates a JobConf for a map-reduce job that uses a combiner and has an explicitly set input schema.
*
* @param mapperClass AvroMapper subclass for the mapper.
* @param reducerClass AvroReducer subclass for the reducer.
* @param combinerClass AvroReducer subclass for the combiner.
* @param inputSchema Schema of the input data.
* @param mapperOutputSchema Mapper output schema. Must be an instance of org.apache.avro.mapred.Pair
* @param outputSchema Reducer output schema
* @return A configured JobConf.
* @throws IOException
* @throws URISyntaxException
*/
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
Class<? extends AvroReducer> reducerClass,
Class<? extends AvroReducer> combinerClass,
Schema inputSchema,
Schema mapperOutputSchema,
Schema outputSchema) throws IOException, URISyntaxException
{
JobConf conf = createJobConf();
AvroJob.setMapperClass(conf, mapperClass);
AvroJob.setReducerClass(conf, reducerClass);
AvroJob.setCombinerClass(conf, combinerClass);
AvroJob.setInputSchema(conf, inputSchema);
AvroJob.setMapOutputSchema(conf, mapperOutputSchema);
AvroJob.setOutputSchema(conf, outputSchema);
return conf;
}
@Override
public void setConf(Configuration conf)
{
super.setConf(conf);
if (conf == null)
{
return;
}
_outputSchema = AvroJob.getOutputSchema(conf);
AvroDistributedCacheFileReader modelReader =
new AvroDistributedCacheFileReader(new JobConf(conf));
try
{
modelReader.build(conf.get(MODEL_PATH), _modelConsumer);
_modelConsumer.done();
}
catch (IOException e)
{
e.printStackTrace();
}
_lambda = conf.getFloat(LAMBDA, 0);
_ignoreValue = conf.getBoolean(BINARY_FEATURE, false);
_logger.info("Loaded the model for test, size:" + _modelConsumer.get().size());
}
static <K> void configureDataFileWriter(DataFileWriter<K> writer,
JobConf job) throws UnsupportedEncodingException {
if (FileOutputFormat.getCompressOutput(job)) {
int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
CodecFactory factory = codecName.equals(DEFLATE_CODEC) ?
CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
writer.setCodec(factory);
}
writer.setSyncInterval(job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY,
DEFAULT_SYNC_INTERVAL));
// copy metadata from job
for (Map.Entry<String,String> e : job) {
if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),e.getValue());
if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
URLDecoder.decode(e.getValue(), "ISO-8859-1")
.getBytes("ISO-8859-1"));
}
}
@Override
public boolean openForRead() throws Exception {
// Pass the schema to the AvroInputFormat
AvroJob.setInputSchema(jobConf, schema);
// The avroWrapper required for the iteration
avroWrapper = new AvroWrapper<>();
return super.openForRead();
}
/**
* Creates a JobConf for a map-only job. Automatically loads the schema from each input file.
*
* @param mapperClass AvroMapper subclass implementing the map phase
* @param outputSchema Schema of the mapper output
* @return A configured JobConf.
* @throws IOException
* @throws URISyntaxException
*/
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
Schema outputSchema) throws IOException, URISyntaxException
{
JobConf conf = createJobConf();
AvroJob.setMapperClass(conf, mapperClass);
AvroJob.setReducerClass(conf, AvroReducer.class);
AvroJob.setOutputSchema(conf, outputSchema);
conf.setNumReduceTasks(0);
return conf;
}
@Override
public void setConf(Configuration conf)
{
super.setConf(conf);
if (conf == null)
{
return;
}
_outputSchema = AvroJob.getOutputSchema(conf);
_lambda = conf.getFloat(LAMBDA, 0);
_ignoreValue = conf.getBoolean(BINARY_FEATURE, false);
String modelPath = conf.get(MODEL_PATH, "");
_logger.info("Going to read model files from distributed cache at:" + modelPath);
int reduceTaskId = conf.getInt("mapred.task.partition", -1);
_logger.info("The reduce task id=" + reduceTaskId);
if (reduceTaskId < 0)
{
throw new RuntimeException("Can't read reduce task id from mapred.task.partition!");
}
int nReducers = conf.getInt(NUM_REDUCERS, -1);
String lambdaKey = String.valueOf(_lambda) + "#";
_consumer = new ReadLinearModelConsumer(lambdaKey, reduceTaskId, nReducers);
AvroDistributedCacheFileReader modelReader =
new AvroDistributedCacheFileReader(new JobConf(conf));
try
{
modelReader.build(modelPath, _consumer);
_consumer.done();
}
catch (IOException e)
{
throw new RuntimeException("Can't load model, error=" + e);
}
_logger.info("Loaded linear models, number of models loaded="
+ _consumer.get().size());
}
private JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
Class<? extends AvroReducer> reducerClass) throws IOException, URISyntaxException
{
JobConf conf = createJobConf();
Schema inputSchema = Util.removeUnion(AvroUtils.getAvroInputSchema(conf));
if (inputSchema == null)
{
throw new IllegalStateException("Input does not have schema info and/or input is missing.");
}
_logger.info("Input Schema=" + inputSchema.toString());
List<Schema.Field> inputFields = inputSchema.getFields();
Schema.Field predField =
new Schema.Field("pred", Schema.create(Type.FLOAT), "", null);
List<Schema.Field> outputFields = new LinkedList<Schema.Field>();
for (Schema.Field field : inputFields)
{
outputFields.add(new Schema.Field(field.name(),
field.schema(),
field.doc(),
null));
}
outputFields.add(predField);
Schema outputSchema =
Schema.createRecord("PerItemTestOutput",
"Test output for PerItemTest",
"com.linkedin.lab.regression.avro",
false);
outputSchema.setFields(outputFields);
AvroJob.setOutputSchema(conf, outputSchema);
AvroJob.setMapOutputSchema(conf,
Pair.getPairSchema(Schema.create(Type.STRING), inputSchema));
AvroJob.setMapperClass(conf, mapperClass);
AvroJob.setReducerClass(conf, reducerClass);
return conf;
}
private JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
Class<? extends AvroReducer> reducerClass) throws IOException, URISyntaxException
{
JobConf conf = createJobConf();
Schema inputSchema = Util.removeUnion(AvroUtils.getAvroInputSchema(conf));
if (inputSchema == null)
{
throw new IllegalStateException("Input does not have schema info and/or input is missing.");
}
_logger.info("Input Schema=" + inputSchema.toString());
List<Schema.Field> inputFields = inputSchema.getFields();
Schema.Field predField =
new Schema.Field("pred", Schema.create(Type.FLOAT), "", null);
List<Schema.Field> outputFields = new LinkedList<Schema.Field>();
for (Schema.Field field : inputFields)
{
outputFields.add(new Schema.Field(field.name(),
field.schema(),
field.doc(),
null));
}
outputFields.add(predField);
Schema outputSchema =
Schema.createRecord("AdmmTestOutput",
"Test output for AdmmTest",
"com.linkedin.lab.regression.avro",
false);
outputSchema.setFields(outputFields);
AvroJob.setOutputSchema(conf, outputSchema);
AvroJob.setMapOutputSchema(conf,
Pair.getPairSchema(Schema.create(Type.FLOAT), outputSchema));
AvroJob.setMapperClass(conf, mapperClass);
AvroJob.setReducerClass(conf, reducerClass);
return conf;
}
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
JobConf job = new JobConf(conf);
job.setJarByClass(BloomFilterCreator.class);
job.set(AvroJob.OUTPUT_SCHEMA, AvroBytesRecord.SCHEMA.toString());
job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());
job.setInputFormat(KeyValueTextInputFormat.class);
job.setOutputFormat(AvroOutputFormat.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(BloomFilter.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(BloomFilter.class);
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
return JobClient.runJob(job).isSuccessful() ? 0 : 1;
}
static void configureDataFileWriter(DataFileWriter<GenericData.Record> writer,
JobConf job) throws UnsupportedEncodingException {
if (FileOutputFormat.getCompressOutput(job)) {
int level = job.getInt(DEFLATE_LEVEL_KEY,
DEFAULT_DEFLATE_LEVEL);
String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
CodecFactory factory = codecName.equals(DEFLATE_CODEC)
? CodecFactory.deflateCodec(level)
: CodecFactory.fromString(codecName);
writer.setCodec(factory);
}
// Do max as core-default.xml has io.file.buffer.size as 4K
writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, Math.max(
job.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));
// copy metadata from job
for (Map.Entry<String,String> e : job) {
if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),
e.getValue());
if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
URLDecoder.decode(e.getValue(), "ISO-8859-1")
.getBytes("ISO-8859-1"));
}
}
/**
* Sets up various standard settings in the JobConf. You probably don't want to mess with this.
*
* @return A configured JobConf.
* @throws IOException
* @throws URISyntaxException
*/
protected JobConf createJobConf() throws IOException, URISyntaxException
{
JobConf conf = new JobConf();
conf.setJobName(getJobId());
conf.setInputFormat(AvroInputFormat.class);
conf.setOutputFormat(AvroOutputFormat.class);
AvroOutputFormat.setDeflateLevel(conf, 9);
String hadoop_ugi = _config.getString("hadoop.job.ugi", null);
if (hadoop_ugi != null)
{
conf.set("hadoop.job.ugi", hadoop_ugi);
}
if (_config.getBoolean("is.local", false))
{
conf.set("mapred.job.tracker", "local");
conf.set("fs.default.name", "file:///");
conf.set("mapred.local.dir", "/tmp/map-red");
_log.info("Running locally, no hadoop jar set.");
}
// set JVM options if present
if (_config.containsKey("mapred.child.java.opts"))
{
conf.set("mapred.child.java.opts", _config.getString("mapred.child.java.opts"));
_log.info("mapred.child.java.opts set to " + _config.getString("mapred.child.java.opts"));
}
if (_config.containsKey(INPUT_PATHS))
{
List<String> inputPathnames = _config.getStringList(INPUT_PATHS);
for (String pathname : inputPathnames)
{
AvroUtils.addAllSubPaths(conf, new Path(pathname));
}
AvroJob.setInputSchema(conf, AvroUtils.getAvroInputSchema(conf));
}
if (_config.containsKey(OUTPUT_PATH))
{
Path path = new Path(_config.get(OUTPUT_PATH));
AvroOutputFormat.setOutputPath(conf, path);
if (_config.getBoolean("force.output.overwrite", false))
{
FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
fs.delete(FileOutputFormat.getOutputPath(conf), true);
}
}
// set all hadoop configs
for (String key : _config.keySet())
{
String lowerCase = key.toLowerCase();
if ( lowerCase.startsWith(HADOOP_PREFIX))
{
String newKey = key.substring(HADOOP_PREFIX.length());
conf.set(newKey, _config.get(key));
}
}
return conf;
}
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.OutputFileOption.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path output = new Path(cli.getArgValueAsString(CliCommonOpts.OutputFileOption.OUTPUT));
Configuration conf = super.getConf();
DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",
"jdbc:mysql://localhost/sqoop_test" +
"?user=hip_sqoop_user&password=password");
JobConf job = new JobConf(conf);
job.setJarByClass(DBImportMapReduce.class);
job.setInputFormat(DBInputFormat.class);
job.setOutputFormat(AvroOutputFormat.class);
AvroJob.setOutputSchema(job, Stock.SCHEMA$);
job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());
job.setMapperClass(Map.class);
job.setNumMapTasks(4);
job.setNumReduceTasks(0);
job.setMapOutputKeyClass(AvroWrapper.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(AvroWrapper.class);
job.setOutputValueClass(NullWritable.class);
FileOutputFormat.setOutputPath(job, output);
DBInputFormat.setInput(
job,
StockDbWritable.class,
"select * from stocks",
"SELECT COUNT(id) FROM stocks");
RunningJob runningJob = JobClient.runJob(job);
return runningJob.isSuccessful() ? 0 : 1;
}
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
JobConf job = new JobConf(conf);
job.setJarByClass(AvroMixedMapReduce.class);
job.set(AvroJob.INPUT_SCHEMA, Stock.SCHEMA$.toString());
job.set(AvroJob.OUTPUT_SCHEMA, StockAvg.SCHEMA$.toString());
job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName());
job.setInputFormat(AvroInputFormat.class);
job.setOutputFormat(AvroOutputFormat.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
return JobClient.runJob(job).isSuccessful() ? 0 : 1;
}
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
String inputPath = cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT);
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
JobConf job = new JobConf(conf);
job.setJarByClass(SmallFilesMapReduce.class);
job.set(AvroJob.INPUT_SCHEMA, SmallFilesWrite.SCHEMA.toString());
job.setInputFormat(AvroInputFormat.class);
job.setOutputFormat(TextOutputFormat.class);
job.setMapperClass(Map.class);
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
job.setNumReduceTasks(0);
return JobClient.runJob(job).isSuccessful() ? 0 : 1;
}