下面列出了org.apache.hadoop.hbase.HBaseConfiguration#addHbaseResources ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public int run(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.addHbaseResources(getConf());
CommandLine cmdLine = null;
try {
cmdLine = parseOptions(args);
} catch (IllegalStateException e) {
printHelpAndExit(e.getMessage(), getOptions());
}
Class.forName(DriverManager.class.getName());
Connection conn = DriverManager.getConnection(
getJdbcUrl(cmdLine.getOptionValue(ZK_QUORUM_OPT.getOpt())));
return loadData(conf, cmdLine, conn);
}
@VisibleForTesting
public int executeTool() {
Configuration conf = HBaseConfiguration.addHbaseResources(getConf());
try (Connection conn = getConnection(conf)) {
ConnectionQueryServices queryServices = conn.unwrap(PhoenixConnection.class)
.getQueryServices();
boolean status = extractTablesAndIndexes(conn.unwrap(PhoenixConnection.class));
if (status) {
return executeTool(conn, queryServices, conf);
}
} catch (SQLException e) {
LOGGER.severe("Something went wrong in executing tool "+ e);
}
return -1;
}
@Override
public int run(String[] args) throws Exception {
CommandLine cmdLine;
try {
cmdLine = parseOptions(args);
} catch (IllegalStateException e) {
printHelpAndExit(e.getMessage(), getOptions());
return -1;
}
configuration = HBaseConfiguration.addHbaseResources(getConf());
populateIndexToolAttributes(cmdLine);
if (tenantId != null) {
configuration.set(PhoenixRuntime.TENANT_ID_ATTRIB, tenantId);
}
try (Connection conn = getConnection(configuration)) {
createIndexToolTables(conn);
if (dataTable != null && indexTable != null) {
setupIndexAndDataTable(conn);
checkIfFeatureApplicable(startTime, endTime, lastVerifyTime, pDataTable, isLocalIndexBuild);
if (shouldDeleteBeforeRebuild) {
deleteBeforeRebuild(conn);
}
}
preSplitIndexTable(cmdLine, conn);
boolean result = submitIndexToolJob(conn, configuration);
if (result) {
return 0;
} else {
LOGGER.error("IndexTool job failed! Check logs for errors..");
return -1;
}
} catch (Exception ex) {
LOGGER.error("An exception occurred while performing the indexing job: "
+ ExceptionUtils.getMessage(ex) + " at:\n" + ExceptionUtils.getStackTrace(ex));
return -1;
}
}
@Override
public TaskRegionObserver.TaskResult checkCurrentResult(Task.TaskRecord taskRecord)
throws Exception {
String jobID = getJobID(taskRecord.getData());
if (jobID != null) {
Configuration conf = HBaseConfiguration.create(env.getConfiguration());
Configuration configuration = HBaseConfiguration.addHbaseResources(conf);
Cluster cluster = new Cluster(configuration);
Job job = cluster.getJob(org.apache.hadoop.mapreduce.JobID.forName(jobID));
if (job == null) {
return new TaskRegionObserver.TaskResult(TaskRegionObserver.TaskResultCode.SKIPPED, "");
}
if (job != null && job.isComplete()) {
if (job.isSuccessful()) {
LOGGER.warn("IndexRebuildTask checkCurrentResult job is successful "
+ taskRecord.getTableName());
return new TaskRegionObserver.TaskResult(TaskRegionObserver.TaskResultCode.SUCCESS, "");
} else {
return new TaskRegionObserver.TaskResult(TaskRegionObserver.TaskResultCode.FAIL,
"Index is DISABLED");
}
}
}
return null;
}
@Override
public int run(CommandLine cmd) throws Exception {
String source = cmd.getOptionValue('s');
String target = cmd.getOptionValue('t');
String targetGraph = cmd.getOptionValue('g');
String graphContext = cmd.getOptionValue('c');
String thresh = cmd.getOptionValue('r');
TableMapReduceUtil.addDependencyJars(getConf(),
HalyardExport.class,
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class,
HTable.class,
HBaseConfiguration.class,
AuthenticationProtos.class,
Trace.class,
Gauge.class);
HBaseConfiguration.addHbaseResources(getConf());
Job job = Job.getInstance(getConf(), "HalyardStats " + source + (target == null ? " update" : " -> " + target));
job.getConfiguration().set(SOURCE, source);
if (target != null) job.getConfiguration().set(TARGET, target);
if (targetGraph != null) job.getConfiguration().set(TARGET_GRAPH, targetGraph);
if (graphContext != null) job.getConfiguration().set(GRAPH_CONTEXT, graphContext);
if (thresh != null) job.getConfiguration().setLong(THRESHOLD, Long.parseLong(thresh));
job.setJarByClass(HalyardStats.class);
TableMapReduceUtil.initCredentials(job);
Scan scan = HalyardTableUtils.scan(null, null);
if (graphContext != null) { //restricting stats to scan given graph context only
List<RowRange> ranges = new ArrayList<>();
byte[] gcHash = HalyardTableUtils.hashKey(SimpleValueFactory.getInstance().createIRI(graphContext));
ranges.add(rowRange(HalyardTableUtils.CSPO_PREFIX, gcHash));
ranges.add(rowRange(HalyardTableUtils.CPOS_PREFIX, gcHash));
ranges.add(rowRange(HalyardTableUtils.COSP_PREFIX, gcHash));
if (target == null) { //add stats context to the scanned row ranges (when in update mode) to delete the related stats during MapReduce
ranges.add(rowRange(HalyardTableUtils.CSPO_PREFIX, HalyardTableUtils.hashKey(targetGraph == null ? HALYARD.STATS_GRAPH_CONTEXT : SimpleValueFactory.getInstance().createIRI(targetGraph))));
}
scan.setFilter(new MultiRowRangeFilter(ranges));
}
TableMapReduceUtil.initTableMapperJob(
source,
scan,
StatsMapper.class,
ImmutableBytesWritable.class,
LongWritable.class,
job);
job.setPartitionerClass(StatsPartitioner.class);
job.setReducerClass(StatsReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
if (job.waitForCompletion(true)) {
LOG.info("Stats Generation Completed..");
return 0;
}
return -1;
}
@Override
protected int run(CommandLine cmd) throws Exception {
if (!cmd.getArgList().isEmpty()) throw new HalyardExport.ExportException("Unknown arguments: " + cmd.getArgList().toString());
String source = cmd.getOptionValue('s');
String queryFiles = cmd.getOptionValue('q');
String target = cmd.getOptionValue('t');
if (!target.contains("{0}")) {
throw new HalyardExport.ExportException("Bulk export target must contain '{0}' to be replaced by stripped filename of the actual SPARQL query.");
}
getConf().set(SOURCE, source);
getConf().set(TARGET, target);
String driver = cmd.getOptionValue('c');
if (driver != null) {
getConf().set(JDBC_DRIVER, driver);
}
String props[] = cmd.getOptionValues('p');
if (props != null) {
for (int i=0; i<props.length; i++) {
props[i] = Base64.encodeBase64String(props[i].getBytes(StandardCharsets.UTF_8));
}
getConf().setStrings(JDBC_PROPERTIES, props);
}
if (cmd.hasOption('i')) getConf().set(HalyardBulkUpdate.ELASTIC_INDEX_URL, cmd.getOptionValue('i'));
TableMapReduceUtil.addDependencyJars(getConf(),
HalyardExport.class,
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class,
HTable.class,
HBaseConfiguration.class,
AuthenticationProtos.class,
Trace.class,
Gauge.class);
HBaseConfiguration.addHbaseResources(getConf());
String cp = cmd.getOptionValue('l');
if (cp != null) {
String jars[] = cp.split(":");
StringBuilder newCp = new StringBuilder();
for (int i=0; i<jars.length; i++) {
if (i > 0) newCp.append(':');
newCp.append(addTmpFile(jars[i])); //append clappspath entris to tmpfiles and trim paths from the classpath
}
getConf().set(JDBC_CLASSPATH, newCp.toString());
}
Job job = Job.getInstance(getConf(), "HalyardBulkExport " + source + " -> " + target);
job.setJarByClass(HalyardBulkExport.class);
job.setMaxMapAttempts(1);
job.setMapperClass(BulkExportMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(Void.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(QueryInputFormat.class);
QueryInputFormat.setQueriesFromDirRecursive(job.getConfiguration(), queryFiles, false, 0);
job.setOutputFormatClass(NullOutputFormat.class);
TableMapReduceUtil.initCredentials(job);
if (job.waitForCompletion(true)) {
LOG.info("Bulk Export Completed..");
return 0;
}
return -1;
}
@Override
public int run(CommandLine cmd) throws Exception {
String source = cmd.getOptionValue('t');
TableMapReduceUtil.addDependencyJars(getConf(),
HalyardExport.class,
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class,
HTable.class,
HBaseConfiguration.class,
AuthenticationProtos.class,
Trace.class,
Gauge.class);
HBaseConfiguration.addHbaseResources(getConf());
Job job = Job.getInstance(getConf(), "HalyardDelete " + source);
if (cmd.hasOption('s')) {
job.getConfiguration().set(SUBJECT, cmd.getOptionValue('s'));
}
if (cmd.hasOption('p')) {
job.getConfiguration().set(PREDICATE, cmd.getOptionValue('p'));
}
if (cmd.hasOption('o')) {
job.getConfiguration().set(OBJECT, cmd.getOptionValue('o'));
}
if (cmd.hasOption('g')) {
job.getConfiguration().setStrings(CONTEXTS, cmd.getOptionValues('g'));
}
job.setJarByClass(HalyardBulkDelete.class);
TableMapReduceUtil.initCredentials(job);
Scan scan = HalyardTableUtils.scan(null, null);
TableMapReduceUtil.initTableMapperJob(source,
scan,
DeleteMapper.class,
ImmutableBytesWritable.class,
LongWritable.class,
job);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
job.setSpeculativeExecution(false);
job.setMapSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
try (HTable hTable = HalyardTableUtils.getTable(getConf(), source, false, 0)) {
HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
FileOutputFormat.setOutputPath(job, new Path(cmd.getOptionValue('f')));
TableMapReduceUtil.addDependencyJars(job);
if (job.waitForCompletion(true)) {
new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(cmd.getOptionValue('f')), hTable);
LOG.info("Bulk Delete Completed..");
return 0;
}
}
return -1;
}
@Override
public int run(CommandLine cmd) throws Exception {
String source = cmd.getOptionValue('s');
String target = cmd.getOptionValue('t');
TableMapReduceUtil.addDependencyJars(getConf(),
HalyardExport.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class,
HTable.class,
HBaseConfiguration.class,
AuthenticationProtos.class,
Trace.class,
Gauge.class);
HBaseConfiguration.addHbaseResources(getConf());
Job job = Job.getInstance(getConf(), "HalyardSummary " + source + (target == null ? " update" : " -> " + target));
job.getConfiguration().set(SOURCE, source);
if (target != null) job.getConfiguration().set(TARGET, target);
if (cmd.hasOption('g')) job.getConfiguration().set(TARGET_GRAPH, cmd.getOptionValue('g'));
if (cmd.hasOption('d')) job.getConfiguration().setInt(DECIMATION_FACTOR, Integer.parseInt(cmd.getOptionValue('d')));
job.setJarByClass(HalyardSummary.class);
TableMapReduceUtil.initCredentials(job);
Scan scan = HalyardTableUtils.scan(new byte[]{HalyardTableUtils.POS_PREFIX}, new byte[]{HalyardTableUtils.POS_PREFIX + 1});
TableMapReduceUtil.initTableMapperJob(source,
scan,
SummaryMapper.class,
ImmutableBytesWritable.class,
LongWritable.class,
job);
job.setNumReduceTasks(1);
job.setCombinerClass(SummaryCombiner.class);
job.setReducerClass(SummaryReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
if (job.waitForCompletion(true)) {
LOG.info("Summary Generation Completed..");
return 0;
}
return -1;
}
@Override
protected int run(CommandLine cmd) throws Exception {
String source = cmd.getOptionValue('s');
String target = cmd.getOptionValue('t');
try (Connection con = ConnectionFactory.createConnection(getConf())) {
try (Admin admin = con.getAdmin()) {
if (admin.tableExists(TableName.valueOf(target))) {
LOG.log(Level.WARNING, "Pre-split cannot modify already existing table {0}", target);
return -1;
}
}
}
getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i'));
if (cmd.hasOption('g')) getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g'));
getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o'));
TableMapReduceUtil.addDependencyJars(getConf(),
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class);
HBaseConfiguration.addHbaseResources(getConf());
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
getConf().setInt(DECIMATION_FACTOR_PROPERTY, Integer.parseInt(cmd.getOptionValue('d', String.valueOf(DEFAULT_DECIMATION_FACTOR))));
getConf().setLong(SPLIT_LIMIT_PROPERTY, Long.parseLong(cmd.getOptionValue('l', String.valueOf(DEFAULT_SPLIT_LIMIT))));
Job job = Job.getInstance(getConf(), "HalyardPreSplit -> " + target);
job.getConfiguration().set(TABLE_PROPERTY, target);
job.setJarByClass(HalyardPreSplit.class);
job.setMapperClass(RDFDecimatingMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(LongWritable.class);
job.setInputFormatClass(RioFileInputFormat.class);
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setInputPaths(job, source);
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
job.setReducerClass(PreSplitReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(NullOutputFormat.class);
if (job.waitForCompletion(true)) {
LOG.info("PreSplit Calculation Completed..");
return 0;
}
return -1;
}
@Override
protected int run(CommandLine cmd) throws Exception {
String source = cmd.getOptionValue('s');
String workdir = cmd.getOptionValue('w');
String target = cmd.getOptionValue('t');
getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i'));
getConf().setBoolean(VERIFY_DATATYPE_VALUES_PROPERTY, cmd.hasOption('d'));
getConf().setBoolean(TRUNCATE_PROPERTY, cmd.hasOption('r'));
getConf().setInt(SPLIT_BITS_PROPERTY, Integer.parseInt(cmd.getOptionValue('b', "3")));
if (cmd.hasOption('g')) getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g'));
getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o'));
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, Long.parseLong(cmd.getOptionValue('e', String.valueOf(System.currentTimeMillis()))));
if (cmd.hasOption('m')) getConf().setLong("mapreduce.input.fileinputformat.split.maxsize", Long.parseLong(cmd.getOptionValue('m')));
TableMapReduceUtil.addDependencyJars(getConf(),
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class);
HBaseConfiguration.addHbaseResources(getConf());
Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + workdir + " -> " + target);
job.setJarByClass(HalyardBulkLoad.class);
job.setMapperClass(RDFMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
job.setInputFormatClass(RioFileInputFormat.class);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
try (HTable hTable = HalyardTableUtils.getTable(getConf(), target, true, getConf().getInt(SPLIT_BITS_PROPERTY, 3))) {
HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setInputPaths(job, source);
FileOutputFormat.setOutputPath(job, new Path(workdir));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
if (job.waitForCompletion(true)) {
if (getConf().getBoolean(TRUNCATE_PROPERTY, false)) {
HalyardTableUtils.truncateTable(hTable).close();
}
new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(workdir), hTable);
LOG.info("Bulk Load Completed..");
return 0;
}
}
return -1;
}
public int run(CommandLine cmd) throws Exception {
String source = cmd.getOptionValue('s');
String queryFiles = cmd.getOptionValue('q');
String workdir = cmd.getOptionValue('w');
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, Long.parseLong(cmd.getOptionValue('e', String.valueOf(System.currentTimeMillis()))));
if (cmd.hasOption('i')) getConf().set(ELASTIC_INDEX_URL, cmd.getOptionValue('i'));
TableMapReduceUtil.addDependencyJars(getConf(),
HalyardExport.class,
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class,
HTable.class,
HBaseConfiguration.class,
AuthenticationProtos.class,
Trace.class,
Gauge.class);
HBaseConfiguration.addHbaseResources(getConf());
getConf().setStrings(TABLE_NAME_PROPERTY, source);
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
int stages = 1;
for (int stage = 0; stage < stages; stage++) {
Job job = Job.getInstance(getConf(), "HalyardBulkUpdate -> " + workdir + " -> " + source + " stage #" + stage);
job.getConfiguration().setInt(STAGE_PROPERTY, stage);
job.setJarByClass(HalyardBulkUpdate.class);
job.setMapperClass(SPARQLUpdateMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
job.setInputFormatClass(QueryInputFormat.class);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
try (HTable hTable = HalyardTableUtils.getTable(getConf(), source, false, 0)) {
HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
QueryInputFormat.setQueriesFromDirRecursive(job.getConfiguration(), queryFiles, true, stage);
Path outPath = new Path(workdir, "stage"+stage);
FileOutputFormat.setOutputPath(job, outPath);
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
if (stage == 0) { //count real number of stages
for (InputSplit is : new QueryInputFormat().getSplits(job)) {
QueryInputFormat.QueryInputSplit qis = (QueryInputFormat.QueryInputSplit)is;
int updates = QueryParserUtil.parseUpdate(QueryLanguage.SPARQL, qis.getQuery(), null).getUpdateExprs().size();
if (updates > stages) {
stages = updates;
}
LOG.log(Level.INFO, "{0} contains {1} stages of the update sequence.", new Object[]{qis.getQueryName(), updates});
}
LOG.log(Level.INFO, "Bulk Update will process {0} MapReduce stages.", stages);
}
if (job.waitForCompletion(true)) {
new LoadIncrementalHFiles(getConf()).doBulkLoad(outPath, hTable);
LOG.log(Level.INFO, "Stage #{0} of {1} completed..", new Object[]{stage, stages});
} else {
return -1;
}
}
}
LOG.info("Bulk Update Completed..");
return 0;
}
public static void main (String[] args) throws IOException, InterruptedException, ClassNotFoundException {
if (args.length == 0) {
System.out
.println("ExportHBaseTableToDelimiteredTxt {tableName} {ColumnFamily} {outputPath} {shouldCompressWithGz} {schemaLocationOnHdfs} {delimiter} {rowKeyColumn.Optional}");
return;
}
String table = args[0];
String columnFamily = args[1];
String outputPath = args[2];
String shouldCompression = args[3];
String schemaFilePath = args[4];
String delimiter = args[5];
String rowKeyColumn = "";
if (args.length > 6) {
rowKeyColumn = args[6];
}
Job job = Job.getInstance();
job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
HBaseConfiguration.addHbaseResources(job.getConfiguration());
job.getConfiguration().set(SHOULD_COMPRESSION_CONF, shouldCompression);
job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath);
job.getConfiguration().set(DELIMITER_CONF, delimiter);
job.setJarByClass(ExportHBaseTableToDelimiteredTxt.class);
job.setJobName("ExportHBaseTableToDelimiteredTxt ");
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for
// MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
scan.addFamily(Bytes.toBytes(columnFamily));
TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
scan, // Scan instance to control CF and attribute selection
MyMapper.class, // mapper
null, // mapper output key
null, // mapper output value
job);
job.setOutputFormatClass(NullOutputFormat.class); // because we aren't
// emitting anything from
// mapper
job.setNumReduceTasks(0);
boolean b = job.waitForCompletion(true);
}
public static void main(String[] args) throws Exception{
if (args.length == 0) {
System.out.println("CreateTables {tableName} {columnFamilyName} {RegionCount}");
return;
}
String tableName = args[0];
String columnFamilyName = args[1];
String regionCount = args[2];
long regionMaxSize = 107374182400l;
Configuration config = HBaseConfiguration.addHbaseResources(new Configuration());
HBaseAdmin admin = new HBaseAdmin(config);
createTable(tableName, columnFamilyName, Short.parseShort(regionCount), regionMaxSize, admin);
admin.close();
System.out.println("Done");
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
if (args.length == 0) {
System.out.println("ExportHBaseTableToAvro {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowKeyColumn.Optional}");
return;
}
String table = args[0];
String columnFamily = args[1];
String outputPath = args[2];
String compressionCodec = args[3];
String schemaFilePath = args[4];
String rowKeyColumn = "";
if (args.length > 5) {
rowKeyColumn = args[5];
}
Job job = Job.getInstance();
HBaseConfiguration.addHbaseResources(job.getConfiguration());
job.setJarByClass(ExportHBaseTableToAvro.class);
job.setJobName("ExportHBaseTableToAvro ");
job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for
// MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
scan.addFamily(Bytes.toBytes(columnFamily));
TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
scan, // Scan instance to control CF and attribute selection
MyMapper.class, // mapper
null, // mapper output key
null, // mapper output value
job);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
AvroKeyOutputFormat.setOutputPath(job, new Path(outputPath));
Schema.Parser parser = new Schema.Parser();
FileSystem fs = FileSystem.get(job.getConfiguration());
AvroJob.setOutputKeySchema(job, parser.parse(fs.open(new Path(schemaFilePath))));
if (compressionCodec.equals("snappy")) {
AvroKeyOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
} else if (compressionCodec.equals("gzip")) {
AvroKeyOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
} else {
// nothing
}
job.setNumReduceTasks(0);
boolean b = job.waitForCompletion(true);
}
public static void main (String[] args) throws IOException, InterruptedException, ClassNotFoundException {
if (args.length == 0) {
System.out
.println("ExportHBaseTableToDelimiteredSeq {tableName} {ColumnFamily} {outputPath} {compressionCodec} {schemaLocationOnLocal} {delimiter} {rowKeyColumn.optional");
return;
}
String table = args[0];
String columnFamily = args[1];
String outputPath = args[2];
String compressionCodec = args[3];
String schemaFilePath = args[4];
String delimiter = args[5];
String rowKeyColumn = "";
if (args.length > 6) {
rowKeyColumn = args[6];
}
Job job = Job.getInstance();
job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
HBaseConfiguration.addHbaseResources(job.getConfiguration());
job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath);
job.getConfiguration().set(DELIMITER_CONF, delimiter);
job.setJarByClass(ExportHBaseTableToDelimiteredSeq.class);
job.setJobName("ExportHBaseTableToDelimiteredSeq ");
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for
// MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
scan.addFamily(Bytes.toBytes(columnFamily));
TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
scan, // Scan instance to control CF and attribute selection
MyMapper.class, // mapper
null, // mapper output key
null, // mapper output value
job);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
if (compressionCodec.equals("snappy")) {
SequenceFileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
} else if (compressionCodec.equals("gzip")) {
SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
} else {
//nothing
}
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
boolean b = job.waitForCompletion(true);
}
public static void main(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("PopulateSmallTable {numberOfMappers} {numberOfRecords} {tmpOutputPath} {tableName} {columnFamily} {runID}");
return;
}
String numberOfMappers = args[0];
String numberOfRecords = args[1];
String outputPath = args[2];
String tableName = args[3];
String columnFamily = args[4];
String runID = args[5];
// Create job
Job job = Job.getInstance();
HBaseConfiguration.addHbaseResources(job.getConfiguration());
job.setJarByClass(PopulateTable.class);
job.setJobName("PopulateTable: " + runID);
job.getConfiguration().set(NUMBER_OF_RECORDS, numberOfRecords);
job.getConfiguration().set(TABLE_NAME, tableName);
job.getConfiguration().set(COLUMN_FAMILY, columnFamily);
job.getConfiguration().set(RUN_ID, runID);
// Define input format and path
job.setInputFormatClass(NMapInputFormat.class);
NMapInputFormat.setNumMapTasks(job.getConfiguration(), Integer.parseInt(numberOfMappers));
Configuration config = HBaseConfiguration.create();
HTable hTable = new HTable(config, tableName);
// Auto configure partitioner and reducer
HFileOutputFormat.configureIncrementalLoad(job, hTable);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
// Define the mapper and reducer
job.setMapperClass(CustomMapper.class);
// job.setReducerClass(CustomReducer.class);
// Define the key and value format
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
// Exit
job.waitForCompletion(true);
FileSystem hdfs = FileSystem.get(config);
// Must all HBase to have write access to HFiles
HFileUtils.changePermissionR(outputPath, hdfs);
LoadIncrementalHFiles load = new LoadIncrementalHFiles(config);
load.doBulkLoad(new Path(outputPath), hTable);
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
if (args.length == 0) {
System.out
.println("ExportHBaseTableToParquet {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowkey.column.optional");
return;
}
String table = args[0];
String columnFamily = args[1];
String outputPath = args[2];
String compressionCodec = args[3];
String schemaFilePath = args[4];
String rowKeyColumn = "";
if (args.length > 5) {
rowKeyColumn = args[5];
}
Job job = Job.getInstance();
job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
HBaseConfiguration.addHbaseResources(job.getConfiguration());
job.setJarByClass(ExportHBaseTableToParquet.class);
job.setJobName("ExportHBaseTableToParquet ");
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for
// MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
scan.addFamily(Bytes.toBytes(columnFamily));
TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
scan, // Scan instance to control CF and attribute selection
MyMapper.class, // mapper
null, // mapper output key
null, // mapper output value
job);
job.setOutputFormatClass(AvroParquetOutputFormat.class);
AvroParquetOutputFormat.setOutputPath(job, new Path(outputPath));
Schema.Parser parser = new Schema.Parser();
FileSystem fs = FileSystem.get(job.getConfiguration());
AvroParquetOutputFormat.setSchema(job, parser.parse(fs.open(new Path(schemaFilePath))));
if (compressionCodec.equals("snappy")) {
AvroParquetOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
} else if (compressionCodec.equals("gzip")) {
AvroParquetOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
} else {
// nothing
}
job.setNumReduceTasks(0);
boolean b = job.waitForCompletion(true);
}