下面列出了org.slf4j.helpers.BasicMarkerFactory#org.kitesdk.morphline.api.MorphlineContext 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Test (expected = MorphlineCompilationException.class)
public void morphlineCompilationError(
final @Mocked Compiler compiler
) throws Exception {
new Expectations() {{
compiler.compile((File) any, anyString, (MorphlineContext) any, (Command) any);
result = new Exception("Compilation exception");
}};
Map<String, Object> configMap = Maps.newHashMap();
configMap.put(MorphlineTranslator.ENCODING_KEY, "UTF-8");
configMap.put(MorphlineTranslator.ENCODING_MSG, "UTF-8");
configMap.put(MorphlineTranslator.MORPHLINE, getResourcePath(MORPHLINE_FILE));
configMap.put(MorphlineTranslator.MORPHLINE_ID, "compiler-exception");
configMap.put(MorphlineTranslator.SCHEMA_CONFIG + "." + ComponentFactory.TYPE_CONFIG_NAME, "flat");
configMap.put(MorphlineTranslator.SCHEMA_CONFIG + "." + FlatSchema.FIELD_NAMES_CONFIG,
Lists.newArrayList("bar"));
configMap.put(MorphlineTranslator.SCHEMA_CONFIG + "." + FlatSchema.FIELD_TYPES_CONFIG,
Lists.newArrayList("int"));
Config config = ConfigFactory.parseMap(configMap);
translator = new MorphlineTranslator();
translator.configure(config);
Row raw = TestingMessageFactory.get("The Key", DataTypes.StringType, "The Message", DataTypes.StringType);
translator.translate(raw);
}
public WriteAvroToByteArray(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.format = new Validator<Format>().validateEnum(
config,
getConfigs().getString(config, "format", Format.container.toString()),
Format.class);
String codec = getConfigs().getString(config, "codec", null);
if (codec == null) {
this.codecFactory = null;
} else {
this.codecFactory = CodecFactory.fromString(codec);
}
Config metadataConfig = getConfigs().getConfig(config, "metadata", ConfigFactory.empty());
for (Map.Entry<String, Object> entry : new Configs().getEntrySet(metadataConfig)) {
this.metadata.put(entry.getKey(), entry.getValue().toString());
}
validateArguments();
}
public FindReplace(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
GrokDictionaries dict = new GrokDictionaries(config, getConfigs());
String replacementStr = getConfigs().getString(config, "replacement");
String pattern = getConfigs().getString(config, "pattern");
if (getConfigs().getBoolean(config, "isRegex", false)) {
Pattern regex = dict.compileExpression(pattern);
this.matcher = regex.pattern().matcher("");
replacementStr = regex.replaceProperties(replacementStr);
this.literalPattern = null;
} else {
this.matcher = null;
this.literalPattern = pattern;
}
this.replacement = replacementStr;
this.replaceFirst = getConfigs().getBoolean(config, "replaceFirst", false);
validateArguments();
}
/**
* Using the given <code>builder</code>, constructs a command rooted at the given morphline JSON
* <code>config</code>.
*
* The command will feed records into <code>child</code>. The command will have
* <code>parent</code> as it's parent. Additional parameters can be passed via the morphline
* <code>context</code>.
*/
protected AbstractCommand(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
Preconditions.checkNotNull(builder);
Preconditions.checkNotNull(config);
Preconditions.checkNotNull(parent);
Preconditions.checkNotNull(child);
Preconditions.checkNotNull(context);
this.config = config;
this.parent = parent;
this.child = child;
this.context = context;
Preconditions.checkArgument(builder.getNames().size() > 0);
this.name = "morphline." + builder.getNames().iterator().next();
this.configs = new Configs();
this.numProcessCallsMeter = getMeter(Metrics.NUM_PROCESS_CALLS);
this.numNotifyCallsMeter = getMeter(Metrics.NUM_NOTIFY_CALLS);
}
public ReadAvro(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
String schemaString = getConfigs().getString(config, "writerSchemaString", null);
if (schemaString != null) {
this.writerSchema = new Parser().parse(schemaString);
} else {
String schemaFile = getConfigs().getString(config, "writerSchemaFile", null);
if (schemaFile != null) {
try {
this.writerSchema = new Parser().parse(new File(schemaFile));
} catch (IOException e) {
throw new MorphlineCompilationException("Cannot parse external Avro writer schema file: " + schemaFile, config, e);
}
} else {
this.writerSchema = null;
}
}
this.isJson = getConfigs().getBoolean(config, "isJson", false);
validateArguments();
}
public ConvertHTML(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) throws SAXNotRecognizedException, SAXNotSupportedException {
super(builder, config, parent, child, context);
this.charset = getConfigs().getCharset(config, "charset", null);
this.omitXMLDeclaration = getConfigs().getBoolean(config, "omitXMLDeclaration", false);
this.xmlReader = new Parser(); // no reuse?
xmlReader.setProperty(Parser.schemaProperty, htmlSchema);
xmlReader.setFeature(Parser.CDATAElementsFeature, getConfigs().getBoolean(config, "noCDATA", false));
xmlReader.setFeature(Parser.namespacesFeature, !getConfigs().getBoolean(config, "noNamespaces", true));
xmlReader.setFeature(Parser.ignoreBogonsFeature, getConfigs().getBoolean(config, "noBogons", false)); // also see TIKA-599
xmlReader.setFeature(Parser.bogonsEmptyFeature, getConfigs().getBoolean(config, "emptyBogons", false));
xmlReader.setFeature(Parser.rootBogonsFeature, getConfigs().getBoolean(config, "noRootBogons", false));
xmlReader.setFeature(Parser.defaultAttributesFeature, getConfigs().getBoolean(config, "noDefaultAttributes", false));
xmlReader.setFeature(Parser.translateColonsFeature, getConfigs().getBoolean(config, "noColons", false));
xmlReader.setFeature(Parser.restartElementsFeature, getConfigs().getBoolean(config, "noRestart", false));
xmlReader.setFeature(Parser.ignorableWhitespaceFeature, !getConfigs().getBoolean(config, "suppressIgnorableWhitespace", true));
validateArguments();
}
public TokenizeText(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.inputFieldName = getConfigs().getString(config, "inputField");
this.outputFieldName = getConfigs().getString(config, "outputField");
String solrFieldType = getConfigs().getString(config, "solrFieldType");
Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
LOG.debug("solrLocator: {}", locator);
IndexSchema schema = locator.getIndexSchema();
FieldType fieldType = schema.getFieldTypeByName(solrFieldType);
if (fieldType == null) {
throw new MorphlineCompilationException("Missing Solr field type in schema.xml for name: " + solrFieldType, config);
}
this.analyzer = fieldType.getIndexAnalyzer();
Preconditions.checkNotNull(analyzer);
// register CharTermAttribute for later (implicit) reuse
this.token = analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class);
Preconditions.checkNotNull(token);
validateArguments();
}
public DownloadHdfsFile(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context)
throws IOException {
super(builder, config, parent, child, context);
List<String> uris = getConfigs().getStringList(config, "inputFiles", Collections.<String>emptyList());
File dstRootDir = new File(getConfigs().getString(config, "outputDir", "."));
Configuration conf = new Configuration();
String defaultFileSystemUri = getConfigs().getString(config, "fs", null);
if (defaultFileSystemUri != null) {
FileSystem.setDefaultUri(conf, defaultFileSystemUri); // see Hadoop's GenericOptionsParser
}
for (String value : getConfigs().getStringList(config, "conf", Collections.<String>emptyList())) {
conf.addResource(new Path(value)); // see Hadoop's GenericOptionsParser
}
validateArguments();
download(uris, conf, dstRootDir);
}
public ReadJsonTestTweets(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.isLengthDelimited = getConfigs().getBoolean(config, "isLengthDelimited", true);
this.idPrefix = getConfigs().getString(config, "idPrefix", null);
if ("random".equals(idPrefix)) {
idPrefix = String.valueOf(new Random().nextInt());
} else if (idPrefix == null) {
idPrefix = "";
}
validateArguments();
}
public Split(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.fieldName = getConfigs().getString(config, "field");
this.separator = getConfigs().getString(config, "separator");
this.newFields = getConfigs().getStringList(config, "newFields");
this.dropUndeclaredField = getConfigs().getBoolean(config, "dropUndeclaredField", true);
this.trimSegments = getConfigs().getBoolean(config, "trimSegments", true);
LOG.debug("fieldName: {}", fieldName);
validateArguments();
}
@Override
public Command build(Config config, Command parent, Command child, MorphlineContext context) {
try {
return new DownloadHdfsFile(this, config, parent, child, context);
} catch (IOException e) {
throw new MorphlineCompilationException("Cannot compile", config, e);
}
}
public DateCheck(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.fieldName = getConfigs().getString(config, "field");
this.operator = getConfigs().getString(config, "operator");
this.pattern = getConfigs().getString(config, "pattern");
LOG.debug("fieldName: {}", fieldName);
validateArguments();
}
/**
* Compiles the given morphline config using the given morphline context. The returned command
* will feed records into finalChild or into /dev/null if finalChild is null.
*/
public Command compile(Config morphlineConfig, MorphlineContext morphlineContext, Command finalChild) {
if (finalChild == null) {
finalChild = new DropRecordBuilder().build(null, null, null, morphlineContext);
}
return new PipeBuilder().build(morphlineConfig, null, finalChild, morphlineContext);
}
/** Deprecated; will be removed in the next release */
@Deprecated
protected AbstractParser(Config config, Command parent, Command child, MorphlineContext context) {
super(config, parent, child, context);
List<String> mimeTypes = getConfigs().getStringList(config, SUPPORTED_MIME_TYPES, Collections.<String>emptyList());
for (String mimeType : mimeTypes) {
addSupportedMimeType(mimeType);
}
this.numRecordsMeter = getMeter(Metrics.NUM_RECORDS);
}
public OpenHdfsFile(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.conf = new Configuration();
String defaultFileSystemUri = getConfigs().getString(config, "fs", null);
if (defaultFileSystemUri != null) {
FileSystem.setDefaultUri(conf, defaultFileSystemUri); // see Hadoop's GenericOptionsParser
}
for (String value : getConfigs().getStringList(config, "conf", Collections.<String>emptyList())) {
conf.addResource(new Path(value)); // see Hadoop's GenericOptionsParser
}
validateArguments();
}
public Equals(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.entrySet = new Configs().getEntrySet(config);
for (Map.Entry<String, Object> entry : entrySet) {
if (!(entry.getValue() instanceof Collection)) {
entry.setValue(new FieldExpression(entry.getValue().toString(), getConfig()));
}
}
this.renderedConfig = config.root().render();
}
public HashDigest(CommandBuilder builder, Config config, Command parent,
Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.inputFieldName = getConfigs().getString(config, INPUT_FIELD);
this.outputFieldName = getConfigs().getString(config, OUTPUT_FIELD);
this.hashType = getConfigs().getString(config, HASH_TYPE);
this.preserveExisting = getConfigs().getBoolean(config,
PRESERVE_EXISTING_NAME, PRESERVE_EXISTING_DEFAULT);
this.charset = getConfigs().getCharset(config, CHARSET_FIELD, Charsets.UTF_8);
try {
this.digest = MessageDigest.getInstance(hashType);
} catch (NoSuchAlgorithmException e) {
throw new MorphlineCompilationException("Unable to initialise digest", config, e);
}
validateArguments();
if (LOG.isTraceEnabled()) {
LOG.trace("inputField: {}", inputFieldName);
LOG.trace("outputField: {}", outputFieldName);
LOG.trace("hashType: {}", hashType);
LOG.trace("preserveExisting: {}", preserveExisting );
}
}
public Decompress(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
if (!config.hasPath(SUPPORTED_MIME_TYPES)) {
// for (MediaType mediaType : new CompressorParser().getSupportedTypes(new ParseContext())) {
for (MediaType mediaType : SUPPORTED_TYPES) {
addSupportedMimeType(mediaType.toString());
}
}
validateArguments();
}
private MorphlineContext createMorphlineContext() {
return new SolrMorphlineContext.Builder()
.setDocumentLoader(testServer)
// .setDocumentLoader(new CollectingDocumentLoader(100))
.setExceptionHandler(new FaultTolerance(false, false, SolrServerException.class.getName()))
.setMetricRegistry(new MetricRegistry())
.build();
}
public Grok(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
GrokDictionaries dict = new GrokDictionaries(config, getConfigs());
Config exprConfig = getConfigs().getConfig(config, "expressions", ConfigFactory.empty());
for (Map.Entry<String, Object> entry : new Configs().getEntrySet(exprConfig)) {
String expr = entry.getValue().toString();
this.regexes.add(new Regex(entry.getKey(), dict.compileExpression(expr).matcher("")));
}
this.firstKey = (regexes.size() == 0 ? null : regexes.iterator().next().getRecordInputField());
String extractStr = getConfigs().getString(config, "extract", "true");
this.extractInPlace = extractStr.equals("inplace");
if (extractInPlace) {
this.extract = true;
} else {
this.extract = getConfigs().getBoolean(config, "extract", true);
}
this.numRequiredMatches = new Validator<NumRequiredMatches>().validateEnum(
config,
getConfigs().getString(config, "numRequiredMatches", NumRequiredMatches.atLeastOnce.toString()),
NumRequiredMatches.class);
this.findSubstrings = getConfigs().getBoolean(config, "findSubstrings", false);
this.addEmptyStrings = getConfigs().getBoolean(config, "addEmptyStrings", false);
validateArguments();
this.renderedConfig = config.root().render();
}
public ExtractAvroPaths(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
ListMultimap<String, String> stepMultiMap = ArrayListMultimap.create();
this.flatten = getConfigs().getBoolean(config, "flatten", true);
Config paths = getConfigs().getConfig(config, "paths");
for (Map.Entry<String, Object> entry : new Configs().getEntrySet(paths)) {
String fieldName = entry.getKey();
String path = entry.getValue().toString().trim();
if (path.contains("//")) {
throw new MorphlineCompilationException("No support for descendant axis available yet", config);
}
if (path.startsWith("/")) {
path = path.substring(1);
}
if (path.endsWith("/")) {
path = path.substring(0, path.length() - 1);
}
path = path.trim();
for (String step : path.split("/")) {
step = step.trim();
if (step.length() > ARRAY_TOKEN.length() && step.endsWith(ARRAY_TOKEN)) {
step = step.substring(0, step.length() - ARRAY_TOKEN.length());
stepMultiMap.put(fieldName, normalize(step));
stepMultiMap.put(fieldName, ARRAY_TOKEN);
} else {
stepMultiMap.put(fieldName, normalize(step));
}
}
}
this.stepMap = stepMultiMap.asMap();
LOG.debug("stepMap: {}", stepMap);
validateArguments();
}
public ExtractURIComponents(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
this.inputFieldName = getConfigs().getString(config, "inputField");
this.outputFieldPrefix = getConfigs().getString(config, "outputFieldPrefix", "");
this.failOnInvalidURI = getConfigs().getBoolean(config, "failOnInvalidURI", false);
validateArguments();
}
public Unpack(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
if (!config.hasPath(SUPPORTED_MIME_TYPES)) {
// for (MediaType mediaType : new PackageParser().getSupportedTypes(new ParseContext())) {
for (MediaType mediaType : SUPPORTED_TYPES) {
addSupportedMimeType(mediaType.toString());
}
addSupportedMimeType(GTAR); // apparently not already included in PackageParser.getSupportedTypes()
}
validateArguments();
}
public ReadAvroContainer(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
String schemaString = getConfigs().getString(config, "readerSchemaString", null);
if (schemaString != null) {
this.readerSchema = new Parser().parse(schemaString);
} else {
String schemaFile = getConfigs().getString(config, "readerSchemaFile", null);
if (schemaFile != null) {
try {
this.readerSchema = new Parser().parse(new File(schemaFile));
} catch (IOException e) {
throw new MorphlineCompilationException("Cannot parse external Avro reader schema file: " + schemaFile, config, e);
}
} else {
this.readerSchema = null;
}
}
if (getClass() == ReadAvroContainer.class) {
resolverCache = new BoundedLRUHashMap<ByteArrayKey, ResolvingDecoder>(
getConfigs().getInt(config, "schemaCacheCapacity", 100));
validateArguments();
} else {
resolverCache = null;
}
}
public StartReportingMetricsToSLF4J(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
MetricFilter filter = PatternMetricFilter.parse(getConfigs(), config);
TimeUnit defaultDurationUnit = getConfigs().getTimeUnit(config, "defaultDurationUnit", TimeUnit.MILLISECONDS);
TimeUnit defaultRateUnit = getConfigs().getTimeUnit(config, "defaultRateUnit", TimeUnit.SECONDS);
long frequency = getConfigs().getNanoseconds(config, "frequency", 10 * 1000L * 1000 * 1000); // 10 secs, also see https://github.com/typesafehub/config/blob/master/HOCON.md#duration-format
this.logger = getConfigs().getString(config, "logger", "metrics");
String marker = getConfigs().getString(config, "marker", null);
validateArguments();
MetricRegistry registry = context.getMetricRegistry();
synchronized (REGISTRIES) {
Map<String, Slf4jReporter> reporters = REGISTRIES.get(registry);
if (reporters == null) {
reporters = Maps.newHashMap();
REGISTRIES.put(registry, reporters);
}
Slf4jReporter reporter = reporters.get(logger);
if (reporter == null) {
Builder reporterBuilder = Slf4jReporter.forRegistry(registry)
.filter(filter)
.convertDurationsTo(defaultDurationUnit)
.convertRatesTo(defaultRateUnit)
.outputTo(LoggerFactory.getLogger(logger));
if (marker != null) {
reporterBuilder = reporterBuilder.markWith(new BasicMarkerFactory().getMarker(marker));
}
reporter = reporterBuilder.build();
reporter.start(frequency, TimeUnit.NANOSECONDS);
reporters.put(logger, reporter);
}
}
}
public SolrLocator(Config config, MorphlineContext context) {
this(context);
this.config = config;
Configs configs = new Configs();
collectionName = configs.getString(config, "collection", null);
zkHost = configs.getString(config, "zkHost", null);
solrHomeDir = configs.getString(config, "solrHomeDir", null);
solrUrl = configs.getString(config, "solrUrl", null);
batchSize = configs.getInt(config, "batchSize", batchSize);
zkClientSessionTimeout = configs.getInt(config, "zkClientSessionTimeout", zkClientSessionTimeout);
zkClientConnectTimeout = configs.getInt(config, "zkClientConnectTimeout", zkClientConnectTimeout);
LOG.trace("Constructed solrLocator: {}", this);
configs.validateArguments(config);
}
public RemoveFields(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
super(builder, config, parent, child, context);
List<String> includes = getConfigs().getStringList(config, "blacklist", Collections.singletonList("*"));
List<String> excludes = getConfigs().getStringList(config, "whitelist", Collections.<String>emptyList());
int cacheCapacity = getConfigs().getInt(config, "cacheCapacity", 10000);
this.nameMatcher = new PatternNameMatcher(includes, excludes, cacheCapacity);
validateArguments();
}
@Override
public Command build(Config config, Command parent, Command child,
MorphlineContext context) {
return new HashDigest(this, config, parent, child, context);
}
MorphlineContext getMorphlineContext() {
return morphlineContext;
}
@Override
public Command build(Config config, Command parent, Command child, MorphlineContext context) {
return new FindReplace(this, config, parent, child, context);
}