下面列出了org.apache.hadoop.mapred.JobConfigurable#org.apache.hadoop.hive.serde2.SerDeUtils 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Create properties info to initialize a SerDe.
* @param storageDescriptor
* @return
*/
public static Properties createPropertiesFromStorageDescriptor(StorageDescriptor storageDescriptor) {
SerDeInfo serDeInfo = storageDescriptor.getSerdeInfo();
Map<String, String> parameters = serDeInfo.getParameters();
Properties properties = new Properties();
properties.setProperty(
serdeConstants.SERIALIZATION_FORMAT,
parameters.get(serdeConstants.SERIALIZATION_FORMAT));
List<String> colTypes = new ArrayList<>();
List<String> colNames = new ArrayList<>();
List<FieldSchema> cols = storageDescriptor.getCols();
for (FieldSchema col: cols){
colTypes.add(col.getType());
colNames.add(col.getName());
}
properties.setProperty(serdeConstants.LIST_COLUMNS, StringUtils.join(colNames, String.valueOf(SerDeUtils.COMMA)));
// Note: serdeConstants.COLUMN_NAME_DELIMITER is not defined in previous Hive. We use a literal to save on shim
properties.setProperty("column.name.delimite", String.valueOf(SerDeUtils.COMMA));
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, StringUtils.join(colTypes, DEFAULT_LIST_COLUMN_TYPES_SEPARATOR));
properties.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
properties.putAll(parameters);
return properties;
}
@Nonnull
private static void serializeList(@Nonnull final StringBuilder sb, @Nullable final Object obj,
@Nullable final ListObjectInspector loi) throws SerDeException {
ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
List<?> olist = loi.getList(obj);
if (olist == null) {
sb.append("null");
} else {
sb.append(SerDeUtils.LBRACKET);
for (int i = 0; i < olist.size(); i++) {
if (i > 0) {
sb.append(SerDeUtils.COMMA);
}
buildJSONString(sb, olist.get(i), listElementObjectInspector);
}
sb.append(SerDeUtils.RBRACKET);
}
}
/**
* Serialize a object into bytes.
* @param foi object inspector
* @param decalred output object inspector
* @param obj object to be serialized
* @param useJsonSerialize true to use json serialization
* @return object in serialized bytes
* @throws IOException when error happens
*/
protected byte[] serializeToBytes(ObjectInspector foi, ObjectInspector doi, Object obj, boolean useJsonSerialize) throws IOException {
serializeStream.reset();
boolean isNotNull;
if (!foi.getCategory().equals(Category.PRIMITIVE)
&& useJsonSerialize) {
isNotNull = serialize(SerDeUtils.getJSONString(obj, foi),
PrimitiveObjectInspectorFactory.javaStringObjectInspector, doi, 1);
} else {
isNotNull = serialize(obj, foi, doi, 1);
}
if (!isNotNull) {
return null;
}
byte[] key = new byte[serializeStream.getCount()];
System.arraycopy(serializeStream.getData(), 0, key, 0, serializeStream.getCount());
return key;
}
@Override
public void initialize(@Nullable Configuration conf, Properties tbl) throws SerDeException {
final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl
.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
deviceId = tbl.getProperty(DEVICE_ID);
if (columnNameProperty == null || columnNameProperty.isEmpty()
|| columnTypeProperty == null || columnTypeProperty.isEmpty()) {
columnNames = Collections.emptyList();
columnTypes = Collections.emptyList();
}
else {
columnNames = StringInternUtils.internStringsInList(
Arrays.asList(columnNameProperty.split(columnNameDelimiter)));
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
// Check column and types equals
if (columnTypes.size() != columnNames.size()) {
throw new TsFileSerDeException("len(columnNames) != len(columnTypes)");
}
oi = createObjectInspector();
}
private static void serializeMap(@Nonnull final StringBuilder sb, @Nullable final Object obj,
@Nonnull final MapObjectInspector moi) throws SerDeException {
ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
Map<?, ?> omap = moi.getMap(obj);
if (omap == null) {
sb.append("null");
} else {
sb.append(SerDeUtils.LBRACE);
boolean first = true;
for (Object entry : omap.entrySet()) {
if (first) {
first = false;
} else {
sb.append(SerDeUtils.COMMA);
}
Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
StringBuilder keyBuilder = new StringBuilder();
buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector);
String keyString = keyBuilder.toString().trim();
if ((!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE)) {
appendWithQuotes(sb, keyString);
} else {
sb.append(keyString);
}
sb.append(SerDeUtils.COLON);
buildJSONString(sb, e.getValue(), mapValueObjectInspector);
}
sb.append(SerDeUtils.RBRACE);
}
}
private void checkInitialize() throws Exception {
if (initialized) {
return;
}
JobConf jobConf = confWrapper.conf();
Object serdeLib = Class.forName(serDeInfo.getSerializationLib()).newInstance();
Preconditions.checkArgument(serdeLib instanceof Serializer && serdeLib instanceof Deserializer,
"Expect a SerDe lib implementing both Serializer and Deserializer, but actually got "
+ serdeLib.getClass().getName());
this.recordSerDe = (Serializer) serdeLib;
ReflectionUtils.setConf(recordSerDe, jobConf);
// TODO: support partition properties, for now assume they're same as table properties
SerDeUtils.initializeSerDe((Deserializer) recordSerDe, jobConf, tableProperties, null);
this.formatFields = allColumns.length - partitionColumns.length;
this.hiveConversions = new HiveObjectConversion[formatFields];
this.converters = new DataFormatConverter[formatFields];
List<ObjectInspector> objectInspectors = new ArrayList<>(hiveConversions.length);
for (int i = 0; i < formatFields; i++) {
DataType type = allTypes[i];
ObjectInspector objectInspector = HiveInspectors.getObjectInspector(type);
objectInspectors.add(objectInspector);
hiveConversions[i] = HiveInspectors.getConversion(
objectInspector, type.getLogicalType(), hiveShim);
converters[i] = DataFormatConverters.getConverterForDataType(type);
}
this.formatInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
Arrays.asList(allColumns).subList(0, formatFields),
objectInspectors);
this.initialized = true;
}
@Override
public AbstractSerDe createSerde() throws SerializationError {
try {
Properties tableProps = table.getMetadata();
tableProps.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(",").join(inputColumns));
tableProps.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(":").join(inputTypes));
NiFiRecordSerDe serde = new NiFiRecordSerDe(recordReader, log);
SerDeUtils.initializeSerDe(serde, conf, tableProps, null);
this.serde = serde;
return serde;
} catch (SerDeException e) {
throw new SerializationError("Error initializing serde " + NiFiRecordSerDe.class.getName(), e);
}
}
@Override
public void open(int taskNumber, int numTasks) throws IOException {
try {
StorageDescriptor sd = hiveTablePartition.getStorageDescriptor();
Object serdeLib = Class.forName(sd.getSerdeInfo().getSerializationLib()).newInstance();
Preconditions.checkArgument(serdeLib instanceof Serializer && serdeLib instanceof Deserializer,
"Expect a SerDe lib implementing both Serializer and Deserializer, but actually got " + serdeLib.getClass().getName());
recordSerDe = (Serializer) serdeLib;
ReflectionUtils.setConf(recordSerDe, jobConf);
// TODO: support partition properties, for now assume they're same as table properties
SerDeUtils.initializeSerDe((Deserializer) recordSerDe, jobConf, tableProperties, null);
outputClass = recordSerDe.getSerializedClass();
} catch (IllegalAccessException | SerDeException | InstantiationException | ClassNotFoundException e) {
throw new FlinkRuntimeException("Error initializing Hive serializer", e);
}
TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
+ String.format("%" + (6 - Integer.toString(taskNumber).length()) + "s", " ").replace(" ", "0")
+ taskNumber + "_0");
this.jobConf.set("mapred.task.id", taskAttemptID.toString());
this.jobConf.setInt("mapred.task.partition", taskNumber);
// for hadoop 2.2
this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
this.jobConf.setInt("mapreduce.task.partition", taskNumber);
this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);
if (!isDynamicPartition) {
staticWriter = writerForLocation(hiveTablePartition.getStorageDescriptor().getLocation());
} else {
dynamicPartitionOffset = fieldNames.length - partitionColumns.size() + hiveTablePartition.getPartitionSpec().size();
}
numNonPartitionColumns = isPartitioned ? fieldNames.length - partitionColumns.size() : fieldNames.length;
hiveConversions = new HiveObjectConversion[numNonPartitionColumns];
List<ObjectInspector> objectInspectors = new ArrayList<>(hiveConversions.length);
for (int i = 0; i < numNonPartitionColumns; i++) {
ObjectInspector objectInspector = HiveInspectors.getObjectInspector(fieldTypes[i]);
objectInspectors.add(objectInspector);
hiveConversions[i] = HiveInspectors.getConversion(objectInspector, fieldTypes[i].getLogicalType());
}
if (!isPartitioned) {
rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
Arrays.asList(fieldNames),
objectInspectors);
} else {
rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
Arrays.asList(fieldNames).subList(0, fieldNames.length - partitionColumns.size()),
objectInspectors);
defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname,
HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal);
}
}
private static void buildJSONString(@Nonnull final StringBuilder sb, @Nullable final Object obj,
@Nonnull final ObjectInspector oi) throws SerDeException {
switch (oi.getCategory()) {
case PRIMITIVE: {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
serializePrimitive(sb, obj, poi);
break;
}
case LIST: {
ListObjectInspector loi = (ListObjectInspector) oi;
serializeList(sb, obj, loi);
break;
}
case MAP: {
MapObjectInspector moi = (MapObjectInspector) oi;
serializeMap(sb, obj, moi);
break;
}
case STRUCT: {
StructObjectInspector soi = (StructObjectInspector) oi;
serializeStruct(sb, obj, soi, null);
break;
}
case UNION: {
UnionObjectInspector uoi = (UnionObjectInspector) oi;
if (obj == null) {
sb.append("null");
} else {
sb.append(SerDeUtils.LBRACE);
sb.append(uoi.getTag(obj));
sb.append(SerDeUtils.COLON);
buildJSONString(sb, uoi.getField(obj),
uoi.getObjectInspectors().get(uoi.getTag(obj)));
sb.append(SerDeUtils.RBRACE);
}
break;
}
default:
throw new SerDeException("Unknown type in ObjectInspector: " + oi.getCategory());
}
}
@Nonnull
private static StringBuilder appendWithQuotes(@Nonnull final StringBuilder sb,
@Nonnull final String value) {
return sb.append(SerDeUtils.QUOTE).append(value).append(SerDeUtils.QUOTE);
}