下面列出了org.apache.hadoop.mapred.JobConfigurable#org.apache.flink.table.types.DataType 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public DataType visit(KeyValueDataType keyValueDataType) {
DataType newKeyType = keyValueDataType.getKeyDataType().accept(this);
DataType newValueType = keyValueDataType.getValueDataType().accept(this);
LogicalType logicalType = keyValueDataType.getLogicalType();
LogicalType newLogicalType;
if (logicalType instanceof MapType) {
newLogicalType = new MapType(
logicalType.isNullable(),
newKeyType.getLogicalType(),
newValueType.getLogicalType());
} else {
throw new UnsupportedOperationException("Unsupported logical type : " + logicalType);
}
return transformation.transform(new KeyValueDataType(newLogicalType, newKeyType, newValueType));
}
@Test
public void testStrategy() {
if (testSpec.expectedSignature != null) {
assertThat(
generateSignature(),
equalTo(testSpec.expectedSignature));
}
if (testSpec.expectedErrorMessage != null) {
thrown.expect(ValidationException.class);
thrown.expectCause(containsCause(new ValidationException(testSpec.expectedErrorMessage)));
}
for (List<DataType> actualArgumentTypes : testSpec.actualArgumentTypes) {
TypeInferenceUtil.Result result = runTypeInference(actualArgumentTypes);
if (testSpec.expectedArgumentTypes != null) {
assertThat(result.getExpectedArgumentTypes(), equalTo(testSpec.expectedArgumentTypes));
}
}
}
@Test
public void testAddMonths() throws Exception {
Assume.assumeTrue(HIVE_110_OR_LATER);
HiveGenericUDF udf = init(
Class.forName("org.apache.hadoop.hive.ql.udf.generic.GenericUDFAddMonths"),
new Object[] {
null,
1
},
new DataType[] {
DataTypes.STRING(),
DataTypes.INT()
}
);
assertEquals("2009-09-30", udf.eval("2009-08-31", 1));
assertEquals("2009-09-30", udf.eval("2009-08-31 11:11:11", 1));
}
public CsvInputFormat(
Path[] filePaths,
DataType[] fieldTypes,
String[] fieldNames,
CsvSchema csvSchema,
RowType formatRowType,
int[] selectFields,
List<String> partitionKeys,
String defaultPartValue,
long limit,
int[] csvSelectFieldToProjectFieldMapping,
int[] csvSelectFieldToCsvFieldMapping,
boolean ignoreParseErrors) {
super(filePaths, csvSchema);
this.fieldTypes = fieldTypes;
this.fieldNames = fieldNames;
this.formatRowType = formatRowType;
this.partitionKeys = partitionKeys;
this.defaultPartValue = defaultPartValue;
this.selectFields = selectFields;
this.limit = limit;
this.emitted = 0;
this.csvSelectFieldToProjectFieldMapping = csvSelectFieldToProjectFieldMapping;
this.csvSelectFieldToCsvFieldMapping = csvSelectFieldToCsvFieldMapping;
this.ignoreParseErrors = ignoreParseErrors;
}
private static AdaptedCallContext inferInputTypes(
TypeInference typeInference,
CallContext callContext,
@Nullable DataType outputType,
boolean throwOnFailure) {
final AdaptedCallContext adaptedCallContext = new AdaptedCallContext(callContext, outputType);
// typed arguments have highest priority
typeInference.getTypedArguments().ifPresent(adaptedCallContext::setExpectedArguments);
final List<DataType> inferredDataTypes = typeInference.getInputTypeStrategy()
.inferInputTypes(adaptedCallContext, throwOnFailure)
.orElse(null);
if (inferredDataTypes != null) {
adaptedCallContext.setExpectedArguments(inferredDataTypes);
} else if (throwOnFailure) {
throw new ValidationException("Invalid input arguments.");
}
return adaptedCallContext;
}
private void testField(
DataType fieldType,
String csvValue,
Object value,
Consumer<CsvRowDataSerializationSchema.Builder> serializationConfig,
Consumer<CsvRowDataDeserializationSchema.Builder> deserializationConfig,
String fieldDelimiter) throws Exception {
RowType rowType = (RowType) ROW(
FIELD("f0", STRING()),
FIELD("f1", fieldType),
FIELD("f2", STRING())
).getLogicalType();
String expectedCsv = "BEGIN" + fieldDelimiter + csvValue + fieldDelimiter + "END\n";
// deserialization
CsvRowDataDeserializationSchema.Builder deserSchemaBuilder =
new CsvRowDataDeserializationSchema.Builder(rowType, new RowDataTypeInfo(rowType));
deserializationConfig.accept(deserSchemaBuilder);
RowData deserializedRow = deserialize(deserSchemaBuilder, expectedCsv);
// serialization
CsvRowDataSerializationSchema.Builder serSchemaBuilder = new CsvRowDataSerializationSchema.Builder(rowType);
serializationConfig.accept(serSchemaBuilder);
byte[] serializedRow = serialize(serSchemaBuilder, deserializedRow);
assertEquals(expectedCsv, new String(serializedRow));
}
public ParquetInputFormat(
Path[] paths,
String[] fullFieldNames,
DataType[] fullFieldTypes,
int[] selectedFields,
String partDefaultName,
long limit,
Configuration conf,
boolean utcTimestamp) {
super.setFilePaths(paths);
this.limit = limit;
this.partDefaultName = partDefaultName;
this.fullFieldNames = fullFieldNames;
this.fullFieldTypes = fullFieldTypes;
this.selectedFields = selectedFields;
this.conf = new SerializableConfiguration(conf);
this.utcTimestamp = utcTimestamp;
}
@Override
public DataType getProducedDataType() {
TableSchema fullSchema = getTableSchema();
DataType type;
if (projectedFields == null) {
type = fullSchema.toRowDataType();
} else {
String[] fullNames = fullSchema.getFieldNames();
DataType[] fullTypes = fullSchema.getFieldDataTypes();
type = TableSchema.builder().fields(
Arrays.stream(projectedFields).mapToObj(i -> fullNames[i]).toArray(String[]::new),
Arrays.stream(projectedFields).mapToObj(i -> fullTypes[i]).toArray(DataType[]::new))
.build().toRowDataType();
}
return type.bridgedTo(BaseRow.class);
}
@Test
public void testComplexDataTypes() throws Exception {
DataType[] types = new DataType[]{
DataTypes.ARRAY(DataTypes.DOUBLE()),
DataTypes.MAP(DataTypes.FLOAT(), DataTypes.BIGINT()),
DataTypes.ROW(
DataTypes.FIELD("0", DataTypes.BOOLEAN()),
DataTypes.FIELD("1", DataTypes.BOOLEAN()),
DataTypes.FIELD("2", DataTypes.DATE())),
// nested complex types
DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.INT())),
DataTypes.MAP(DataTypes.STRING(), DataTypes.MAP(DataTypes.STRING(), DataTypes.BIGINT())),
DataTypes.ROW(
DataTypes.FIELD("3", DataTypes.ARRAY(DataTypes.DECIMAL(5, 3))),
DataTypes.FIELD("4", DataTypes.MAP(DataTypes.TINYINT(), DataTypes.SMALLINT())),
DataTypes.FIELD("5", DataTypes.ROW(DataTypes.FIELD("3", DataTypes.TIMESTAMP(9))))
)
};
verifyDataTypes(types);
}
private TypeInferenceUtil.Result resolveWithCastedAssignment(
UnresolvedCallExpression unresolvedCall,
List<ResolvedExpression> args,
List<TypeInformation<?>> expectedTypes,
TypeInformation<?> resultType) {
final List<PlannerExpression> plannerArgs = unresolvedCall.getChildren()
.stream()
.map(e -> e.accept(CONVERTER))
.collect(Collectors.toList());
final List<DataType> castedArgs = IntStream.range(0, plannerArgs.size())
.mapToObj(idx -> castIfNeeded(
args.get(idx),
plannerArgs.get(idx),
expectedTypes.get(idx)))
.collect(Collectors.toList());
return new TypeInferenceUtil.Result(
castedArgs,
null,
fromLegacyInfoToDataType(resultType));
}
/**
* Returns the names of all registered column qualifiers of a specific column family.
*
* @param family The name of the column family for which the column qualifier names are returned.
* @return The names of all registered column qualifiers of a specific column family.
*/
public String[] getQualifierNames(String family) {
Map<String, DataType> qualifierMap = familyMap.get(family);
if (qualifierMap == null) {
throw new IllegalArgumentException("Family " + family + " does not exist in schema.");
}
String[] qualifierNames = new String[qualifierMap.size()];
int i = 0;
for (String qualifier: qualifierMap.keySet()) {
qualifierNames[i] = qualifier;
i++;
}
return qualifierNames;
}
private JdbcTableSource(
JdbcOptions options, JdbcReadOptions readOptions, JdbcLookupOptions lookupOptions,
TableSchema schema, int[] selectFields) {
this.options = options;
this.readOptions = readOptions;
this.lookupOptions = lookupOptions;
this.schema = schema;
this.selectFields = selectFields;
final DataType[] schemaDataTypes = schema.getFieldDataTypes();
final String[] schemaFieldNames = schema.getFieldNames();
if (selectFields != null) {
DataType[] dataTypes = new DataType[selectFields.length];
String[] fieldNames = new String[selectFields.length];
for (int i = 0; i < selectFields.length; i++) {
dataTypes[i] = schemaDataTypes[selectFields[i]];
fieldNames[i] = schemaFieldNames[selectFields[i]];
}
this.producedDataType =
TableSchema.builder().fields(fieldNames, dataTypes).build().toRowDataType();
} else {
this.producedDataType = schema.toRowDataType();
}
}
@Override
public DataType getProducedDataType() {
return TableSchema.builder()
.fields(config.getSelectedFieldNames(), config.getSelectedFieldDataTypes())
.build()
.toRowDataType();
}
private BiFunction<PositionedGetter, Integer, Object> singleValueConverter(DataType dataType, Schema avroType) throws SchemaUtils.IncompatibleSchemaException {
LogicalTypeRoot tpe = dataType.getLogicalType().getTypeRoot();
Schema.Type atpe = avroType.getType();
if (tpe == LogicalTypeRoot.NULL && atpe == Schema.Type.NULL) {
return (getter, ordinal) -> null;
} else if ((tpe == LogicalTypeRoot.BOOLEAN && atpe == Schema.Type.BOOLEAN) ||
(tpe == LogicalTypeRoot.TINYINT && atpe == Schema.Type.INT) ||
(tpe == LogicalTypeRoot.SMALLINT && atpe == Schema.Type.INT) ||
(tpe == LogicalTypeRoot.INTEGER && atpe == Schema.Type.INT) ||
(tpe == LogicalTypeRoot.BIGINT && atpe == Schema.Type.LONG) ||
(tpe == LogicalTypeRoot.FLOAT && atpe == Schema.Type.FLOAT) ||
(tpe == LogicalTypeRoot.DOUBLE && atpe == Schema.Type.DOUBLE) ||
(tpe == LogicalTypeRoot.VARCHAR && atpe == Schema.Type.STRING) ||
(tpe == LogicalTypeRoot.VARBINARY && atpe == Schema.Type.BYTES) ||
(tpe == LogicalTypeRoot.DATE && atpe == Schema.Type.INT)) {
return (getter, ordinal) -> getter.getField(ordinal);
} else if (tpe == LogicalTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE && atpe == Schema.Type.LONG) {
LogicalType altpe = avroType.getLogicalType();
if (altpe instanceof LogicalTypes.TimestampMillis || altpe instanceof LogicalTypes.TimestampMicros) {
return (getter, ordinal) -> getter.getField(ordinal);
} else {
throw new SchemaUtils.IncompatibleSchemaException(
"Cannot convert flink timestamp to avro logical type " + altpe.toString());
}
} else {
throw new SchemaUtils.IncompatibleSchemaException(String.format(
"Cannot convert flink type %s to avro type %s", dataType.toString(), avroType.toString(true)));
}
}
private Function<Object, Object> newStructConverter(FieldsDataType dataType, Schema avroStruct) throws SchemaUtils.IncompatibleSchemaException {
if (avroStruct.getType() != RECORD ||
avroStruct.getFields().size() != dataType.getFieldDataTypes().size()) {
throw new SchemaUtils.IncompatibleSchemaException(
String.format("Cannot convert Flink type %s to Avro type %s.", dataType.toString(), avroStruct.toString(true)));
}
Map<String, DataType> fieldsType = dataType.getFieldDataTypes();
List<RowType.RowField> fields = ((RowType) dataType.getLogicalType()).getFields();
List<BiFunction<PositionedGetter, Integer, Object>> fieldConverters = new ArrayList<>();
for (int i = 0; i < fields.size(); i++) {
RowType.RowField rf = fields.get(i);
DataType dt = fieldsType.get(rf.getName());
Schema.Field at = avroStruct.getFields().get(i);
fieldConverters.add(newConverter(dt, resolveNullableType(at.schema(), dt.getLogicalType().isNullable())));
}
int numFields = fieldsType.size();
return row -> {
GenericSchema<GenericRecord> pSchema = SchemaUtils.avroSchema2PulsarSchema(avroStruct);
GenericRecordBuilder builder = pSchema.newRecordBuilder();
Row rowX = (Row) row;
for (int i = 0; i < numFields; i++) {
if (rowX.getField(i) == null) {
builder.set(pSchema.getFields().get(i), null);
} else {
builder.set(pSchema.getFields().get(i), fieldConverters.get(i).apply(new PositionedGetter(rowX), i));
}
}
return (GenericAvroRecord) builder.build();
};
}
public Result(
List<DataType> expectedArgumentTypes,
@Nullable DataType accumulatorDataType,
DataType outputDataType) {
this.expectedArgumentTypes = expectedArgumentTypes;
this.accumulatorDataType = accumulatorDataType;
this.outputDataType = outputDataType;
}
@Test
public void testIsCompositeTypeSimpleType() {
DataType dataType = DataTypes.TIMESTAMP();
boolean isCompositeType = LogicalTypeChecks.isCompositeType(dataType.getLogicalType());
assertThat(isCompositeType, is(false));
}
public QueryOperation create(
List<ResolvedExpression> projectList,
QueryOperation child,
boolean explicitAlias,
ExpressionResolver.PostResolverFactory postResolverFactory) {
final NamingVisitor namingVisitor = new NamingVisitor(postResolverFactory);
final List<ResolvedExpression> namedExpressions = nameExpressions(namingVisitor, projectList);
String[] fieldNames = validateAndGetUniqueNames(namedExpressions);
final List<ResolvedExpression> finalExpression;
if (explicitAlias) {
finalExpression = namedExpressions;
} else {
finalExpression = namedExpressions.stream()
.map(expr -> expr.accept(stripAliases))
.collect(Collectors.toList());
}
DataType[] fieldTypes = namedExpressions.stream()
.map(ResolvedExpression::getOutputDataType)
.toArray(DataType[]::new);
TableSchema tableSchema = TableSchema.builder().fields(fieldNames, fieldTypes).build();
return new ProjectQueryOperation(finalExpression, child, tableSchema);
}
@Test
public void testUDFToInteger() {
HiveSimpleUDF udf = init(
UDFToInteger.class,
new DataType[]{
DataTypes.DECIMAL(5, 3)
});
assertEquals(1, udf.eval(BigDecimal.valueOf(1.1d)));
}
@Test
public void testExpandStructuredType() {
StructuredType logicalType = StructuredType.newBuilder(ObjectIdentifier.of("catalog", "database", "type"))
.attributes(Arrays.asList(
new StructuredType.StructuredAttribute("f0", DataTypes.INT().getLogicalType()),
new StructuredType.StructuredAttribute("f1", DataTypes.STRING().getLogicalType()),
new StructuredType.StructuredAttribute("f2", DataTypes.TIMESTAMP(5).getLogicalType()),
new StructuredType.StructuredAttribute("f3", DataTypes.TIMESTAMP(3).getLogicalType())
))
.build();
List<DataType> dataTypes = Arrays.asList(
DataTypes.INT(),
DataTypes.STRING(),
DataTypes.TIMESTAMP(5).bridgedTo(Timestamp.class),
DataTypes.TIMESTAMP(3));
FieldsDataType dataType = new FieldsDataType(logicalType, dataTypes);
TableSchema schema = DataTypeUtils.expandCompositeTypeToSchema(dataType);
assertThat(
schema,
equalTo(
TableSchema.builder()
.field("f0", INT())
.field("f1", STRING())
.field("f2", TIMESTAMP(5).bridgedTo(Timestamp.class))
.field("f3", TIMESTAMP(3).bridgedTo(LocalDateTime.class))
.build()));
}
public JacksonRecordParser(DataType schema, JSONOptions options) {
assert schema instanceof FieldsDataType;
this.schema = schema;
this.options = options;
this.rootConverter = makeStructRootConverter((FieldsDataType) schema);
this.factory = new JsonFactory();
options.setJacksonOptions(factory);
}
/**
* Declares a list of fields to be rowtime attributes.
*
* @param rowtimeAttributeDescriptors The descriptors of the rowtime attributes.
*/
protected void setRowtimeAttributeDescriptors(List<RowtimeAttributeDescriptor> rowtimeAttributeDescriptors) {
// validate that all declared fields exist and are of correct type
for (RowtimeAttributeDescriptor desc : rowtimeAttributeDescriptors) {
String rowtimeAttribute = desc.getAttributeName();
Optional<DataType> tpe = schema.getFieldDataType(rowtimeAttribute);
if (!tpe.isPresent()) {
throw new ValidationException("Rowtime attribute " + rowtimeAttribute + " is not present in TableSchema.");
} else if (tpe.get().getLogicalType().getTypeRoot() != LogicalTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE) {
throw new ValidationException("Rowtime attribute " + rowtimeAttribute + " is not of type TIMESTAMP.");
}
}
this.rowtimeAttributeDescriptors = rowtimeAttributeDescriptors;
}
/**
* Convert Hive data type to a Flink data type.
*
* @param hiveType a Hive data type
* @return the corresponding Flink data type
*/
public static DataType toFlinkType(TypeInfo hiveType) {
checkNotNull(hiveType, "hiveType cannot be null");
switch (hiveType.getCategory()) {
case PRIMITIVE:
return toFlinkPrimitiveType((PrimitiveTypeInfo) hiveType);
case LIST:
ListTypeInfo listTypeInfo = (ListTypeInfo) hiveType;
return DataTypes.ARRAY(toFlinkType(listTypeInfo.getListElementTypeInfo()));
case MAP:
MapTypeInfo mapTypeInfo = (MapTypeInfo) hiveType;
return DataTypes.MAP(toFlinkType(mapTypeInfo.getMapKeyTypeInfo()), toFlinkType(mapTypeInfo.getMapValueTypeInfo()));
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) hiveType;
List<String> names = structTypeInfo.getAllStructFieldNames();
List<TypeInfo> typeInfos = structTypeInfo.getAllStructFieldTypeInfos();
DataTypes.Field[] fields = new DataTypes.Field[names.size()];
for (int i = 0; i < fields.length; i++) {
fields[i] = DataTypes.FIELD(names.get(i), toFlinkType(typeInfos.get(i)));
}
return DataTypes.ROW(fields);
default:
throw new UnsupportedOperationException(
String.format("Flink doesn't support Hive data type %s yet.", hiveType));
}
}
static RelDataType invokeGetResultType(
Object function, Object[] constantArguments, LogicalType[] argTypes,
FlinkTypeFactory typeFactory) {
try {
// See hive HiveFunction
Method method = function.getClass()
.getMethod("getHiveResultType", Object[].class, DataType[].class);
DataType resultType = (DataType) method.invoke(
function, constantArguments, TypeConversions.fromLogicalToDataType(argTypes));
return typeFactory.createFieldTypeFromLogicalType(fromDataTypeToLogicalType(resultType));
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
throw new RuntimeException(e);
}
}
private static RexNode convertThrowException(CallExpression call, ConvertContext context) {
checkArgumentNumber(call, 2);
DataType type = ((TypeLiteralExpression) call.getChildren().get(1)).getOutputDataType();
SqlThrowExceptionFunction function = new SqlThrowExceptionFunction(
context.getTypeFactory().createFieldTypeFromLogicalType(fromDataTypeToLogicalType(type)));
return context.getRelBuilder().call(function, context.toRexNode(call.getChildren().get(0)));
}
public CallExpression get(ResolvedExpression composite, ValueLiteralExpression key, DataType dataType) {
final FunctionLookup.Result lookupOfGet = functionLookup
.lookupBuiltInFunction(BuiltInFunctionDefinitions.GET);
return new CallExpression(
lookupOfGet.getObjectIdentifier(),
lookupOfGet.getFunctionDefinition(),
Arrays.asList(composite, key),
dataType);
}
@Test
public void testConversions() {
if (testSpec.expectedErrorMessage != null) {
thrown.expect(TableException.class);
thrown.expectMessage(equalTo(testSpec.expectedErrorMessage));
}
for (Map.Entry<Class<?>, Object> from : testSpec.conversions.entrySet()) {
final DataType fromDataType = testSpec.dataType.bridgedTo(from.getKey());
final DataStructureConverter<Object, Object> fromConverter =
simulateSerialization(DataStructureConverters.getConverter(fromDataType));
fromConverter.open(DataStructureConvertersTest.class.getClassLoader());
final Object internalValue = fromConverter.toInternalOrNull(from.getValue());
final Object anotherValue = testSpec.conversionsWithAnotherValue.get(from.getKey());
if (anotherValue != null) {
fromConverter.toInternalOrNull(anotherValue);
}
for (Map.Entry<Class<?>, Object> to : testSpec.conversions.entrySet()) {
final DataType toDataType = testSpec.dataType.bridgedTo(to.getKey());
final DataStructureConverter<Object, Object> toConverter =
simulateSerialization(DataStructureConverters.getConverter(toDataType));
toConverter.open(DataStructureConvertersTest.class.getClassLoader());
assertArrayEquals(
new Object[]{to.getValue()},
new Object[]{toConverter.toExternalOrNull(internalValue)});
}
}
}
public CallExpression cast(ResolvedExpression expression, DataType dataType) {
final FunctionLookup.Result lookupOfCast = functionLookup
.lookupBuiltInFunction(BuiltInFunctionDefinitions.CAST);
return new CallExpression(
lookupOfCast.getObjectIdentifier(),
lookupOfCast.getFunctionDefinition(),
Arrays.asList(expression, typeLiteral(dataType)),
dataType);
}
@Test
public void testUDFConv() {
HiveSimpleUDF udf = init(
UDFConv.class,
new DataType[]{
DataTypes.STRING(),
DataTypes.INT(),
DataTypes.INT()
});
assertEquals("1", udf.eval("12", 2, 10));
assertEquals("-16", udf.eval(-10, 16, -10));
}
private static RowType createJsonRowType(DataType databaseSchema) {
// Canal JSON contains other information, e.g. "database", "ts"
// but we don't need them
return (RowType) DataTypes.ROW(
DataTypes.FIELD("data", DataTypes.ARRAY(databaseSchema)),
DataTypes.FIELD("old", DataTypes.ARRAY(databaseSchema)),
DataTypes.FIELD("type", DataTypes.STRING())).getLogicalType();
}