下面列出了org.slf4j.helpers.BasicMarkerFactory#org.kitesdk.morphline.api.CommandBuilder 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public ConvertHTML(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) throws SAXNotRecognizedException, SAXNotSupportedException {
super(builder, config, parent, child, context);
this.charset = getConfigs().getCharset(config, "charset", null);
this.omitXMLDeclaration = getConfigs().getBoolean(config, "omitXMLDeclaration", false);
this.xmlReader = new Parser(); // no reuse?
xmlReader.setProperty(Parser.schemaProperty, htmlSchema);
xmlReader.setFeature(Parser.CDATAElementsFeature, getConfigs().getBoolean(config, "noCDATA", false));
xmlReader.setFeature(Parser.namespacesFeature, !getConfigs().getBoolean(config, "noNamespaces", true));
xmlReader.setFeature(Parser.ignoreBogonsFeature, getConfigs().getBoolean(config, "noBogons", false)); // also see TIKA-599
xmlReader.setFeature(Parser.bogonsEmptyFeature, getConfigs().getBoolean(config, "emptyBogons", false));
xmlReader.setFeature(Parser.rootBogonsFeature, getConfigs().getBoolean(config, "noRootBogons", false));
xmlReader.setFeature(Parser.defaultAttributesFeature, getConfigs().getBoolean(config, "noDefaultAttributes", false));
xmlReader.setFeature(Parser.translateColonsFeature, getConfigs().getBoolean(config, "noColons", false));
xmlReader.setFeature(Parser.restartElementsFeature, getConfigs().getBoolean(config, "noRestart", false));
xmlReader.setFeature(Parser.ignorableWhitespaceFeature, !getConfigs().getBoolean(config, "suppressIgnorableWhitespace", true));
validateArguments();
}
/**
* Using the given <code>builder</code>, constructs a command rooted at the given morphline JSON
* <code>config</code>.
*
* The command will feed records into <code>child</code>. The command will have
* <code>parent</code> as it's parent. Additional parameters can be passed via the morphline
* <code>context</code>.
*/
protected AbstractCommand(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
Preconditions.checkNotNull(builder);
Preconditions.checkNotNull(config);
Preconditions.checkNotNull(parent);
Preconditions.checkNotNull(child);
Preconditions.checkNotNull(context);
this.config = config;
this.parent = parent;
this.child = child;
this.context = context;
Preconditions.checkArgument(builder.getNames().size() > 0);
this.name = "morphline." + builder.getNames().iterator().next();
this.configs = new Configs();
this.numProcessCallsMeter = getMeter(Metrics.NUM_PROCESS_CALLS);
this.numNotifyCallsMeter = getMeter(Metrics.NUM_NOTIFY_CALLS);
}
@SuppressWarnings("unchecked")
public TryRules(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.throwExceptionIfAllRulesFailed = getConfigs().getBoolean(config, "throwExceptionIfAllRulesFailed", true);
this.catchExceptions = getConfigs().getBoolean(config, "catchExceptions", false);
this.copyRecords = getConfigs().getBoolean(config, "copyRecords", true);
List<? extends Config> ruleConfigs = getConfigs().getConfigList(config, "rules", Collections.EMPTY_LIST);
for (Config ruleConfig : ruleConfigs) {
List<Command> commands = buildCommandChain(ruleConfig, "commands", child, true);
if (commands.size() > 0) {
childRules.add(commands.get(0));
}
}
validateArguments();
numExceptionsCaught = getMeter("numExceptionsCaught");
}
public IfThenElse(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
Command devNull = new DropRecordBuilder().build(null, this, null, context); // pipes into /dev/null
List<Command> conditions = buildCommandChain(config, "conditions", devNull, true);
if (conditions.size() == 0) {
throw new MorphlineCompilationException("Missing conditions", config);
} else {
this.conditionChain = conditions.get(0);
}
List<Command> thenCommands = buildCommandChain(config, "then", child, true);
if (thenCommands.size() > 0) {
this.thenChain = thenCommands.get(0);
}
List<Command> elseCommands = buildCommandChain(config, "else", child, true);
if (elseCommands.size() > 0) {
this.elseChain = elseCommands.get(0);
}
validateArguments();
}
public Pipe(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.id = getConfigs().getString(config, "id");
List<String> importCommandSpecs = getConfigs().getStringList(config, "importCommands",
Arrays.asList("com.**", "org.**", "net.**"));
context.importCommandBuilders(importCommandSpecs);
getConfigs().getConfigList(config, "commands", null);
List<Command> childCommands = buildCommandChain(config, "commands", child, false);
if (childCommands.size() > 0) {
this.realChild = childCommands.get(0);
} else {
this.realChild = child;
}
validateArguments();
}
public Java(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) throws ScriptException {
super(builder, config, parent, child, context);
String javaImports = getConfigs().getString(config, "imports", DEFAULT_IMPORTS);
String javaCodeBlock = getConfigs().getString(config, "code");
validateArguments();
this.script = new ScriptEvaluator<Boolean>(
javaImports,
javaCodeBlock,
Boolean.class,
new String[] {"record", "config", "parent", "child", "context", "logger"},
new Class[] {Record.class, Config.class, Command.class, Command.class, MorphlineContext.class, Logger.class},
new Class[] {Exception.class},
javaCodeBlock
);
}
public AddLocalHost(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.fieldName = getConfigs().getString(config, FIELD_NAME, "host");
this.preserveExisting = getConfigs().getBoolean(config, PRESERVE_EXISTING_NAME, true);
boolean useIP = getConfigs().getBoolean(config, USE_IP, true);
validateArguments();
InetAddress addr = null;
try {
addr = InetAddress.getLocalHost();
} catch (UnknownHostException e) {
LOG.warn("Cannot get address of local host", e);
}
if (addr == null) {
host = null;
} else if (useIP) {
host = addr.getHostAddress();
} else {
host = addr.getCanonicalHostName();
}
}
public GenerateUUID(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.fieldName = getConfigs().getString(config, FIELD_NAME, Fields.ID);
this.preserveExisting = getConfigs().getBoolean(config, PRESERVE_EXISTING_NAME, true);
this.prefix = getConfigs().getString(config, PREFIX_NAME, "");
Type type = new Validator<Type>().validateEnum(
config,
getConfigs().getString(config, "type", Type.secure.toString()),
Type.class);
if (type == Type.secure) {
prng = null; // secure & slow
} else {
Random rand = new SecureRandom();
int[] seed = new int[624];
for (int i = 0; i < seed.length; i++) {
seed[i] = rand.nextInt();
}
prng = new Well19937c(seed); // non-secure & fast
}
validateArguments();
}
public SplitKeyValue(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.inputFieldName = getConfigs().getString(config, "inputField");
this.outputFieldPrefix = getConfigs().getString(config, "outputFieldPrefix", "");
this.separator = getConfigs().getString(config, "separator", "=");
if (separator.length() == 0) {
throw new MorphlineCompilationException("separator must not be the empty string", config);
}
if (getConfigs().getBoolean(config, "isRegex", false)) {
GrokDictionaries dict = new GrokDictionaries(config, getConfigs());
this.regex = dict.compileExpression(separator).pattern().matcher("");
} else {
this.regex = null;
}
this.addEmptyStrings = getConfigs().getBoolean(config, "addEmptyStrings", false);
this.trim = getConfigs().getBoolean(config, "trim", true);
validateArguments();
}
public FindReplace(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
GrokDictionaries dict = new GrokDictionaries(config, getConfigs());
String replacementStr = getConfigs().getString(config, "replacement");
String pattern = getConfigs().getString(config, "pattern");
if (getConfigs().getBoolean(config, "isRegex", false)) {
Pattern regex = dict.compileExpression(pattern);
this.matcher = regex.pattern().matcher("");
replacementStr = regex.replaceProperties(replacementStr);
this.literalPattern = null;
} else {
this.matcher = null;
this.literalPattern = pattern;
}
this.replacement = replacementStr;
this.replaceFirst = getConfigs().getBoolean(config, "replaceFirst", false);
validateArguments();
}
public TokenizeText(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.inputFieldName = getConfigs().getString(config, "inputField");
this.outputFieldName = getConfigs().getString(config, "outputField");
String solrFieldType = getConfigs().getString(config, "solrFieldType");
Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
LOG.debug("solrLocator: {}", locator);
IndexSchema schema = locator.getIndexSchema();
FieldType fieldType = schema.getFieldTypeByName(solrFieldType);
if (fieldType == null) {
throw new MorphlineCompilationException("Missing Solr field type in schema.xml for name: " + solrFieldType, config);
}
this.analyzer = fieldType.getIndexAnalyzer();
Preconditions.checkNotNull(analyzer);
// register CharTermAttribute for later (implicit) reuse
this.token = analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class);
Preconditions.checkNotNull(token);
validateArguments();
}
public LoadSolr(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
Config solrLocatorConfig = getConfigs().getConfig(config, SOLR_LOCATOR_PARAM);
SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
LOG.debug("solrLocator: {}", locator);
RetryPolicyFactory retryPolicyFactory = parseRetryPolicyFactory(
getConfigs().getConfig(config, "retryPolicy", null));
this.loader = locator.getLoader(retryPolicyFactory, new CodahaleMetricsFacade(context.getMetricRegistry()));
Config boostsConfig = getConfigs().getConfig(config, "boosts", ConfigFactory.empty());
for (Map.Entry<String, Object> entry : new Configs().getEntrySet(boostsConfig)) {
String fieldName = entry.getKey();
float boost = Float.parseFloat(entry.getValue().toString().trim());
boosts.put(fieldName, boost);
}
this.rateLimiter = RateLimiter.create(getConfigs().getDouble(config, "maxRecordsPerSecond", Double.MAX_VALUE));
this.isDryRun = context.getTypedSettings().getBoolean(TypedSettings.DRY_RUN_SETTING_NAME, false);
validateArguments();
this.elapsedTime = getTimer(Metrics.ELAPSED_TIME);
}
public ReadAvro(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
String schemaString = getConfigs().getString(config, "writerSchemaString", null);
if (schemaString != null) {
this.writerSchema = new Parser().parse(schemaString);
} else {
String schemaFile = getConfigs().getString(config, "writerSchemaFile", null);
if (schemaFile != null) {
try {
this.writerSchema = new Parser().parse(new File(schemaFile));
} catch (IOException e) {
throw new MorphlineCompilationException("Cannot parse external Avro writer schema file: " + schemaFile, config, e);
}
} else {
this.writerSchema = null;
}
}
this.isJson = getConfigs().getBoolean(config, "isJson", false);
validateArguments();
}
public WriteAvroToByteArray(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.format = new Validator<Format>().validateEnum(
config,
getConfigs().getString(config, "format", Format.container.toString()),
Format.class);
String codec = getConfigs().getString(config, "codec", null);
if (codec == null) {
this.codecFactory = null;
} else {
this.codecFactory = CodecFactory.fromString(codec);
}
Config metadataConfig = getConfigs().getConfig(config, "metadata", ConfigFactory.empty());
for (Map.Entry<String, Object> entry : new Configs().getEntrySet(metadataConfig)) {
this.metadata.put(entry.getKey(), entry.getValue().toString());
}
validateArguments();
}
public DownloadHdfsFile(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context)
throws IOException {
super(builder, config, parent, child, context);
List<String> uris = getConfigs().getStringList(config, "inputFiles", Collections.<String>emptyList());
File dstRootDir = new File(getConfigs().getString(config, "outputDir", "."));
Configuration conf = new Configuration();
String defaultFileSystemUri = getConfigs().getString(config, "fs", null);
if (defaultFileSystemUri != null) {
FileSystem.setDefaultUri(conf, defaultFileSystemUri); // see Hadoop's GenericOptionsParser
}
for (String value : getConfigs().getStringList(config, "conf", Collections.<String>emptyList())) {
conf.addResource(new Path(value)); // see Hadoop's GenericOptionsParser
}
validateArguments();
download(uris, conf, dstRootDir);
}
public ToLowerCase(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.fieldName = getConfigs().getString(config, "field");
this.locale = getConfigs().getLocale(config, "locale", Locale.ROOT);
LOG.debug("fieldName: {}", fieldName);
validateArguments();
}
public Split(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.fieldName = getConfigs().getString(config, "field");
this.separator = getConfigs().getString(config, "separator");
this.newFields = getConfigs().getStringList(config, "newFields");
this.dropUndeclaredField = getConfigs().getBoolean(config, "dropUndeclaredField", true);
this.trimSegments = getConfigs().getBoolean(config, "trimSegments", true);
LOG.debug("fieldName: {}", fieldName);
validateArguments();
}
public DateCheck(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.fieldName = getConfigs().getString(config, "field");
this.operator = getConfigs().getString(config, "operator");
this.pattern = getConfigs().getString(config, "pattern");
LOG.debug("fieldName: {}", fieldName);
validateArguments();
}
public MyToLowerCase(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.fieldName = getConfigs().getString(config, "field");
this.locale = getConfigs().getLocale(config, "locale", Locale.ROOT);
this.reverse = getConfigs().getBoolean(config, "reverse", false);
LOG.debug("fieldName: {}", fieldName);
validateArguments();
}
public ExtractHBaseCells(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
for (Config mapping : getConfigs().getConfigList(config, "mappings")) {
mappings.add(new Mapping(mapping, context));
}
validateArguments();
}
protected AbstractParser(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
List<String> mimeTypes = getConfigs().getStringList(config, SUPPORTED_MIME_TYPES, Collections.<String>emptyList());
for (String mimeType : mimeTypes) {
addSupportedMimeType(mimeType);
}
this.numRecordsMeter = getMeter(Metrics.NUM_RECORDS);
}
public ReadLine(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.charset = getConfigs().getCharset(config, "charset", null);
this.ignoreFirstLine = getConfigs().getBoolean(config, "ignoreFirstLine", false);
String cprefix = getConfigs().getString(config, "commentPrefix", "");
if (cprefix.length() > 1) {
throw new MorphlineCompilationException("commentPrefix must be at most one character long: " + cprefix, config);
}
this.commentPrefix = (cprefix.length() > 0 ? cprefix : null);
validateArguments();
}
public ReadCSV(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
String separator = getConfigs().getString(config, "separator", ",");
if (separator.length() != 1) {
throw new MorphlineCompilationException("CSV separator must be one character only: " + separator, config);
}
this.separatorChar = separator.charAt(0);
this.columnNames = getConfigs().getStringList(config, "columns");
this.charset = getConfigs().getCharset(config, "charset", null);
this.ignoreFirstLine = getConfigs().getBoolean(config, "ignoreFirstLine", false);
this.trim = getConfigs().getBoolean(config, "trim", true);
this.addEmptyStrings = getConfigs().getBoolean(config, "addEmptyStrings", true);
this.quoteChar = getConfigs().getString(config, "quoteChar", "");
if (quoteChar.length() > 1) {
throw new MorphlineCompilationException(
"Quote character must not have a length of more than one character: " + quoteChar, config);
}
if (quoteChar.equals(String.valueOf(separatorChar))) {
throw new MorphlineCompilationException(
"Quote character must not be the same as separator: " + quoteChar, config);
}
this.commentPrefix = getConfigs().getString(config, "commentPrefix", "");
if (commentPrefix.length() > 1) {
throw new MorphlineCompilationException(
"Comment prefix must not have a length of more than one character: " + commentPrefix, config);
}
this.maxCharactersPerRecord = getConfigs().getInt(config, "maxCharactersPerRecord", 1000 * 1000);
this.ignoreTooLongRecords = new Validator<OnMaxCharactersPerRecord>().validateEnum(
config,
getConfigs().getString(config, "onMaxCharactersPerRecord", OnMaxCharactersPerRecord.throwException.toString()),
OnMaxCharactersPerRecord.class) == OnMaxCharactersPerRecord.ignoreRecord;
this.tokenizer = quoteChar.length() == 0 ?
new SimpleCSVTokenizer(separatorChar, trim, addEmptyStrings, columnNames) :
new QuotedCSVTokenizer(separatorChar, trim, addEmptyStrings, columnNames, maxCharactersPerRecord, ignoreTooLongRecords, quoteChar.charAt(0));
validateArguments();
}
public ReadMultiLine(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.regex = Pattern.compile(getConfigs().getString(config, "regex")).matcher("");
this.negate = getConfigs().getBoolean(config, "negate", false);
this.charset = getConfigs().getCharset(config, "charset", null);
this.what = new Validator<What>().validateEnum(
config,
getConfigs().getString(config, "what", What.previous.toString()),
What.class);
validateArguments();
}
public Equals(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.entrySet = new Configs().getEntrySet(config);
for (Map.Entry<String, Object> entry : entrySet) {
if (!(entry.getValue() instanceof Collection)) {
entry.setValue(new FieldExpression(entry.getValue().toString(), getConfig()));
}
}
this.renderedConfig = config.root().render();
}
public AbstractAddValuesCommand(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
entrySet = new Configs().getEntrySet(config);
for (Map.Entry<String, Object> entry : entrySet) {
if (!(entry.getValue() instanceof Collection)) {
entry.setValue(new FieldExpression(entry.getValue().toString(), getConfig()));
}
}
}
public Head(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.limit = getConfigs().getLong(config, "limit", -1);
if (limit < -1) {
throw new MorphlineCompilationException("Illegal limit: " + limit, config);
}
validateArguments();
}
public Grok(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
GrokDictionaries dict = new GrokDictionaries(config, getConfigs());
Config exprConfig = getConfigs().getConfig(config, "expressions", ConfigFactory.empty());
for (Map.Entry<String, Object> entry : new Configs().getEntrySet(exprConfig)) {
String expr = entry.getValue().toString();
this.regexes.add(new Regex(entry.getKey(), dict.compileExpression(expr).matcher("")));
}
this.firstKey = (regexes.size() == 0 ? null : regexes.iterator().next().getRecordInputField());
String extractStr = getConfigs().getString(config, "extract", "true");
this.extractInPlace = extractStr.equals("inplace");
if (extractInPlace) {
this.extract = true;
} else {
this.extract = getConfigs().getBoolean(config, "extract", true);
}
this.numRequiredMatches = new Validator<NumRequiredMatches>().validateEnum(
config,
getConfigs().getString(config, "numRequiredMatches", NumRequiredMatches.atLeastOnce.toString()),
NumRequiredMatches.class);
this.findSubstrings = getConfigs().getBoolean(config, "findSubstrings", false);
this.addEmptyStrings = getConfigs().getBoolean(config, "addEmptyStrings", false);
validateArguments();
this.renderedConfig = config.root().render();
}
public Contains(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.entrySet = new Configs().getEntrySet(config);
for (Map.Entry<String, Object> entry : entrySet) {
if (!(entry.getValue() instanceof Collection)) {
entry.setValue(new FieldExpression(entry.getValue().toString(), getConfig()));
}
}
this.renderedConfig = config.root().render();
}
public ExtractURIComponents(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.inputFieldName = getConfigs().getString(config, "inputField");
this.outputFieldPrefix = getConfigs().getString(config, "outputFieldPrefix", "");
this.failOnInvalidURI = getConfigs().getBoolean(config, "failOnInvalidURI", false);
validateArguments();
}