下面列出了org.apache.hadoop.mapred.JobConfigurable#org.apache.hadoop.hive.metastore.api.StorageDescriptor 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
static Table createEmptyTable(String namespace, String name) {
Table table = new Table();
table.setDbName(namespace);
table.setTableName(name);
table.setPartitionKeys(new ArrayList<FieldSchema>());
table.setParameters(new HashMap<String, String>());
StorageDescriptor sd = new StorageDescriptor();
sd.setSerdeInfo(new SerDeInfo());
sd.setNumBuckets(-1);
sd.setBucketCols(new ArrayList<String>());
sd.setCols(new ArrayList<FieldSchema>());
sd.setParameters(new HashMap<String, String>());
sd.setSortCols(new ArrayList<Order>());
sd.getSerdeInfo().setParameters(new HashMap<String, String>());
SkewedInfo skewInfo = new SkewedInfo();
skewInfo.setSkewedColNames(new ArrayList<String>());
skewInfo.setSkewedColValues(new ArrayList<List<String>>());
skewInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>());
sd.setSkewedInfo(skewInfo);
table.setSd(sd);
return table;
}
public static com.amazonaws.services.glue.model.StorageDescriptor convertStorageDescriptor(
StorageDescriptor hiveSd) {
com.amazonaws.services.glue.model.StorageDescriptor catalogSd =
new com.amazonaws.services.glue.model.StorageDescriptor();
catalogSd.setNumberOfBuckets(hiveSd.getNumBuckets());
catalogSd.setCompressed(hiveSd.isCompressed());
catalogSd.setParameters(hiveSd.getParameters());
catalogSd.setBucketColumns(hiveSd.getBucketCols());
catalogSd.setColumns(convertFieldSchemaList(hiveSd.getCols()));
catalogSd.setInputFormat(hiveSd.getInputFormat());
catalogSd.setLocation(hiveSd.getLocation());
catalogSd.setOutputFormat(hiveSd.getOutputFormat());
catalogSd.setSerdeInfo(convertSerDeInfo(hiveSd.getSerdeInfo()));
catalogSd.setSkewedInfo(convertSkewedInfo(hiveSd.getSkewedInfo()));
catalogSd.setSortColumns(convertOrderList(hiveSd.getSortCols()));
catalogSd.setStoredAsSubDirectories(hiveSd.isStoredAsSubDirectories());
return catalogSd;
}
@Test
public void testCheckTableSchemaTypeMappingInvalid() throws MetaException {
TableDescription description = getHashRangeTable();
Table table = new Table();
Map<String, String> parameters = Maps.newHashMap();
parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
"col2:dynamo_col2#,hashKey:hashKey");
parameters.put(DynamoDBConstants.DYNAMODB_TYPE_MAPPING, "col2:NS");
table.setParameters(parameters);
StorageDescriptor sd = new StorageDescriptor();
List<FieldSchema> cols = Lists.newArrayList();
cols.add(new FieldSchema("col1", "string", ""));
cols.add(new FieldSchema("col2", "bigint", ""));
cols.add(new FieldSchema("hashKey", "string", ""));
sd.setCols(cols);
table.setSd(sd);
exceptionRule.expect(MetaException.class);
exceptionRule.expectMessage("The DynamoDB type NS does not support Hive type bigint");
storageHandler.checkTableSchemaType(description, table);
}
public static Partition fromMetastoreApiPartition(org.apache.hadoop.hive.metastore.api.Partition partition, List<FieldSchema> schema)
{
StorageDescriptor storageDescriptor = partition.getSd();
if (storageDescriptor == null) {
throw new PrestoException(HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition);
}
Partition.Builder partitionBuilder = Partition.builder()
.setDatabaseName(partition.getDbName())
.setTableName(partition.getTableName())
.setValues(partition.getValues())
.setColumns(schema.stream()
.map(ThriftMetastoreUtil::fromMetastoreApiFieldSchema)
.collect(toImmutableList()))
.setParameters(partition.getParameters());
// TODO is bucketing_version set on partition level??
fromMetastoreApiStorageDescriptor(
partition.getParameters(),
storageDescriptor,
partitionBuilder.getStorageBuilder(),
format("%s.%s", partition.getTableName(), partition.getValues()));
return partitionBuilder.build();
}
private static void fromMetastoreApiStorageDescriptor(
Map<String, String> tableParameters,
StorageDescriptor storageDescriptor,
Storage.Builder builder,
String tablePartitionName)
{
SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
if (serdeInfo == null) {
throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
}
builder.setStorageFormat(StorageFormat.createNullable(serdeInfo.getSerializationLib(), storageDescriptor.getInputFormat(), storageDescriptor.getOutputFormat()))
.setLocation(nullToEmpty(storageDescriptor.getLocation()))
.setBucketProperty(HiveBucketProperty.fromStorageDescriptor(tableParameters, storageDescriptor, tablePartitionName))
.setSkewed(storageDescriptor.isSetSkewedInfo() && storageDescriptor.getSkewedInfo().isSetSkewedColNames() && !storageDescriptor.getSkewedInfo().getSkewedColNames().isEmpty())
.setSerdeParameters(serdeInfo.getParameters() == null ? ImmutableMap.of() : serdeInfo.getParameters());
}
@Test
public void testCheckTableSchemaTypeValid() throws MetaException {
TableDescription description = getHashRangeTable();
Table table = new Table();
Map<String, String> parameters = Maps.newHashMap();
parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
"col2:dynamo_col2#,hashKey:hashKey");
table.setParameters(parameters);
StorageDescriptor sd = new StorageDescriptor();
List<FieldSchema> cols = Lists.newArrayList();
cols.add(new FieldSchema("col1", "string", ""));
cols.add(new FieldSchema("col2", "bigint", ""));
cols.add(new FieldSchema("hashKey", "string", ""));
sd.setCols(cols);
table.setSd(sd);
// This check is expected to pass for the given input
storageHandler.checkTableSchemaType(description, table);
}
private Partition instantiateHivePartition(Table hiveTable, CatalogPartitionSpec partitionSpec, CatalogPartition catalogPartition)
throws PartitionSpecInvalidException {
List<String> partCols = getFieldNames(hiveTable.getPartitionKeys());
List<String> partValues = getOrderedFullPartitionValues(
partitionSpec, partCols, new ObjectPath(hiveTable.getDbName(), hiveTable.getTableName()));
// validate partition values
for (int i = 0; i < partCols.size(); i++) {
if (StringUtils.isNullOrWhitespaceOnly(partValues.get(i))) {
throw new PartitionSpecInvalidException(getName(), partCols,
new ObjectPath(hiveTable.getDbName(), hiveTable.getTableName()), partitionSpec);
}
}
// TODO: handle GenericCatalogPartition
StorageDescriptor sd = hiveTable.getSd().deepCopy();
sd.setLocation(catalogPartition.getProperties().remove(HiveCatalogConfig.PARTITION_LOCATION));
Map<String, String> properties = new HashMap<>(catalogPartition.getProperties());
properties.put(HiveCatalogConfig.COMMENT, catalogPartition.getComment());
return HiveTableUtil.createHivePartition(
hiveTable.getDbName(),
hiveTable.getTableName(),
partValues,
sd,
properties);
}
/**
* Create properties info to initialize a SerDe.
* @param storageDescriptor
* @return
*/
public static Properties createPropertiesFromStorageDescriptor(StorageDescriptor storageDescriptor) {
SerDeInfo serDeInfo = storageDescriptor.getSerdeInfo();
Map<String, String> parameters = serDeInfo.getParameters();
Properties properties = new Properties();
properties.setProperty(
serdeConstants.SERIALIZATION_FORMAT,
parameters.get(serdeConstants.SERIALIZATION_FORMAT));
List<String> colTypes = new ArrayList<>();
List<String> colNames = new ArrayList<>();
List<FieldSchema> cols = storageDescriptor.getCols();
for (FieldSchema col: cols){
colTypes.add(col.getType());
colNames.add(col.getName());
}
properties.setProperty(serdeConstants.LIST_COLUMNS, StringUtils.join(colNames, String.valueOf(SerDeUtils.COMMA)));
// Note: serdeConstants.COLUMN_NAME_DELIMITER is not defined in previous Hive. We use a literal to save on shim
properties.setProperty("column.name.delimite", String.valueOf(SerDeUtils.COMMA));
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, StringUtils.join(colTypes, DEFAULT_LIST_COLUMN_TYPES_SEPARATOR));
properties.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
properties.putAll(parameters);
return properties;
}
/**
* @param entity, name of the entity to be changed, e.g. hive table or partition
* @param sd, StorageDescriptor of the entity
*/
public static void updateAvroSchemaURL(String entity, StorageDescriptor sd, HiveCopyEntityHelper hiveHelper) {
String oldAvroSchemaURL = sd.getSerdeInfo().getParameters().get(HIVE_TABLE_AVRO_SCHEMA_URL);
if (oldAvroSchemaURL != null) {
Path oldAvroSchemaPath = new Path(oldAvroSchemaURL);
URI sourceFileSystemURI = hiveHelper.getDataset().getFs().getUri();
if (PathUtils.isAbsoluteAndSchemeAuthorityNull(oldAvroSchemaPath)
|| (oldAvroSchemaPath.toUri().getScheme().equals(sourceFileSystemURI.getScheme())
&& oldAvroSchemaPath.toUri().getAuthority().equals(sourceFileSystemURI.getAuthority()))) {
String newAvroSchemaURL = hiveHelper.getTargetPathHelper().getTargetPath(oldAvroSchemaPath, hiveHelper.getTargetFileSystem(),
Optional.<Partition>absent(), true).toString();
sd.getSerdeInfo().getParameters().put(HIVE_TABLE_AVRO_SCHEMA_URL, newAvroSchemaURL);
log.info(String.format("For entity %s, change %s from %s to %s", entity,
HIVE_TABLE_AVRO_SCHEMA_URL, oldAvroSchemaURL, newAvroSchemaURL));
}
}
}
private static ColumnStatistics createHiveColumnStatistics(
Map<String, CatalogColumnStatisticsDataBase> colStats,
StorageDescriptor sd,
ColumnStatisticsDesc desc) {
List<ColumnStatisticsObj> colStatsList = new ArrayList<>();
for (FieldSchema field : sd.getCols()) {
String hiveColName = field.getName();
String hiveColType = field.getType();
CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName());
if (null != flinkColStat) {
ColumnStatisticsData statsData =
getColumnStatisticsData(HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)), flinkColStat);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData);
colStatsList.add(columnStatisticsObj);
}
}
return new ColumnStatistics(desc, colStatsList);
}
private String getFilePath(Table tbl) throws Exception {
StorageDescriptor descTable = tbl.getSd();
InputFormat<?, ?> fformat = HiveDataFragmenter.makeInputFormat(descTable.getInputFormat(), jobConf);
FileInputFormat.setInputPaths(jobConf, new Path(descTable.getLocation()));
InputSplit[] splits;
try {
splits = fformat.getSplits(jobConf, 1);
} catch (org.apache.hadoop.mapred.InvalidInputException e) {
LOG.debug("getSplits failed on " + e.getMessage());
throw new RuntimeException("Unable to get file path for table.");
}
for (InputSplit split : splits) {
FileSplit fsp = (FileSplit) split;
String[] hosts = fsp.getLocations();
String filepath = fsp.getPath().toString();
return filepath;
}
throw new RuntimeException("Unable to get file path for table.");
}
public Table makeMetastoreTableObject(HiveMetaStoreClient client,
String dbName, String tabName, List<FieldSchema> cols) throws Exception {
Table tbl = new Table();
tbl.setDbName(dbName);
tbl.setTableName(tabName);
StorageDescriptor sd = new StorageDescriptor();
tbl.setSd(sd);
tbl.setParameters(new HashMap<String, String>());
sd.setCols(cols);
sd.setCompressed(false);
sd.setParameters(new HashMap<String, String>());
sd.setSerdeInfo(new SerDeInfo());
sd.getSerdeInfo().setName(tbl.getTableName());
sd.getSerdeInfo().setParameters(new HashMap<String, String>());
sd.getSerdeInfo().getParameters()
.put(serdeConstants.SERIALIZATION_FORMAT, "1");
sd.setSortCols(new ArrayList<Order>());
return tbl;
}
private HiveStorageFormat extractHiveStorageFormat(final Table table) throws MetaException {
final StorageDescriptor descriptor = table.getSd();
if (descriptor == null) {
throw new MetaException("Table is missing storage descriptor");
}
final SerDeInfo serdeInfo = descriptor.getSerdeInfo();
if (serdeInfo == null) {
throw new MetaException(
"Table storage descriptor is missing SerDe info");
}
final String outputFormat = descriptor.getOutputFormat();
final String serializationLib = serdeInfo.getSerializationLib();
for (HiveStorageFormat format : HiveStorageFormat.values()) {
if (format.getOutputFormat().equals(outputFormat) && format.getSerde().equals(serializationLib)) {
return format;
}
}
throw new MetaException(
String.format("Output format %s with SerDe %s is not supported", outputFormat, serializationLib));
}
@Test
public void testCheckTableSchemaMappingMissingColumn() throws MetaException {
TableDescription description = getHashRangeTable();
Table table = new Table();
Map<String, String> parameters = Maps.newHashMap();
parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$,hashMap:hashMap");
table.setParameters(parameters);
StorageDescriptor sd = new StorageDescriptor();
List<FieldSchema> cols = Lists.newArrayList();
cols.add(new FieldSchema("col1", "string", ""));
cols.add(new FieldSchema("col2", "tinyint", ""));
cols.add(new FieldSchema("col3", "string", ""));
cols.add(new FieldSchema("hashMap", "map<string,string>", ""));
sd.setCols(cols);
table.setSd(sd);
exceptionRule.expect(MetaException.class);
exceptionRule.expectMessage("Could not find column mapping for column: col2");
storageHandler.checkTableSchemaMapping(description, table);
}
@Test
public void testCheckListTableSchemaTypeValid() throws MetaException {
TableDescription description = getHashRangeTable();
Table table = new Table();
Map<String, String> parameters = Maps.newHashMap();
parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
"col2:dynamo_col2#,col3:dynamo_col3#,col4:dynamo_col4#,col5:dynamo_col5#," +
"col6:dynamo_col6#,col7:dynamo_col7#,hashKey:hashKey");
table.setParameters(parameters);
StorageDescriptor sd = new StorageDescriptor();
List<FieldSchema> cols = Lists.newArrayList();
cols.add(new FieldSchema("col1", "map<string,bigint>", ""));
cols.add(new FieldSchema("col2", "array<map<string,bigint>>", ""));
cols.add(new FieldSchema("col3", "array<map<string,double>>", ""));
cols.add(new FieldSchema("col4", "array<map<string,string>>", ""));
cols.add(new FieldSchema("col5", "array<bigint>", ""));
cols.add(new FieldSchema("col6", "array<double>", ""));
cols.add(new FieldSchema("col7", "array<string>", ""));
cols.add(new FieldSchema("hashKey", "string", ""));
sd.setCols(cols);
table.setSd(sd);
// This check is expected to pass for the given input
storageHandler.checkTableSchemaType(description, table);
}
private Table createUnpartitionedTable(String databaseName, String tableName, HiveServer2Core server)
throws Exception {
Table table = new Table();
table.setDbName(databaseName);
table.setTableName(tableName);
table.setSd(new StorageDescriptor());
table.getSd().setCols(Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)));
table.getSd().setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
table.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
table.getSd().setSerdeInfo(new SerDeInfo());
table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe");
HiveMetaStoreClient client = server.getCore().newClient();
client.createTable(table);
client.close();
return table;
}
private Table createPartitionedTable(String databaseName, String tableName, HiveServer2Core server) throws Exception {
Table table = new Table();
table.setDbName(DATABASE);
table.setTableName(tableName);
table.setPartitionKeys(Arrays.asList(new FieldSchema("partcol", "int", null)));
table.setSd(new StorageDescriptor());
table.getSd().setCols(Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)));
table.getSd().setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
table.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
table.getSd().setSerdeInfo(new SerDeInfo());
table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe");
HiveMetaStoreClient client = server.getCore().newClient();
client.createTable(table);
client.close();
return table;
}
@Test
public void testCheckStructTableSchemaTypeInvalid() throws MetaException {
TableDescription description = getHashRangeTable();
Table table = new Table();
Map<String, String> parameters = Maps.newHashMap();
parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
"col2:dynamo_col2#,hashKey:hashKey");
table.setParameters(parameters);
StorageDescriptor sd = new StorageDescriptor();
List<FieldSchema> cols = Lists.newArrayList();
cols.add(new FieldSchema("col1", "struct<bignum:bigint,smallnum:tinyint>", ""));
cols.add(new FieldSchema("col2", "array<map<string,bigint>>", ""));
cols.add(new FieldSchema("hashKey", "string", ""));
sd.setCols(cols);
table.setSd(sd);
exceptionRule.expect(MetaException.class);
exceptionRule.expectMessage("The hive type struct<bignum:bigint,smallnum:tinyint> is not " +
"supported in DynamoDB");
storageHandler.checkTableSchemaType(description, table);
}
private void setupHiveTables() throws TException, IOException {
List<FieldSchema> partitionKeys = Lists.newArrayList(newFieldSchema("p1"), newFieldSchema("p2"));
File tableLocation = new File("db1", "table1");
StorageDescriptor sd = newStorageDescriptor(tableLocation, "col0");
table1 = newTable("table1", "db1", partitionKeys, sd);
Partition partition1 = newPartition(table1, "value1", "value2");
Partition partition2 = newPartition(table1, "value11", "value22");
table1Partitions = Arrays.asList(partition1, partition2); //
table1PartitionNames = Arrays
.asList(Warehouse.makePartName(partitionKeys, partition1.getValues()),
Warehouse.makePartName(partitionKeys, partition2.getValues()));
File tableLocation2 = new File("db2", "table2");
StorageDescriptor sd2 = newStorageDescriptor(tableLocation2, "col0");
table2 = newTable("table2", "db2", partitionKeys, sd2);
}
private Table newTable() {
Table table = new Table();
table.setDbName(DB_NAME);
table.setTableName(TABLE_NAME);
table.setTableType(TableType.EXTERNAL_TABLE.name());
StorageDescriptor sd = new StorageDescriptor();
sd.setLocation(tableLocation);
table.setSd(sd);
HashMap<String, String> parameters = new HashMap<>();
parameters.put(StatsSetupConst.ROW_COUNT, "1");
table.setParameters(parameters);
table.setPartitionKeys(PARTITIONS);
return table;
}
@Before
public void setUp() {
SourceTable sourceTable = new SourceTable();
sourceTable.setDatabaseName(DATABASE);
sourceTable.setTableName(TABLE);
tableReplication.setSourceTable(sourceTable);
ReplicaTable replicaTable = new ReplicaTable();
replicaTable.setDatabaseName(DATABASE);
replicaTable.setTableName(REPLICA_TABLE);
tableReplication.setReplicaTable(replicaTable);
when(replicaMetaStoreClientSupplier.get()).thenReturn(client);
replica = new DestructiveReplica(replicaMetaStoreClientSupplier, cleanupLocationManager, tableReplication);
table = new Table();
table.setDbName(DATABASE);
table.setTableName(REPLICA_TABLE);
table.setPartitionKeys(Lists.newArrayList(new FieldSchema("part1", "string", "")));
Map<String, String> parameters = new HashMap<>();
parameters.put(CircusTrainTableParameter.SOURCE_TABLE.parameterName(), DATABASE + "." + TABLE);
parameters.put(REPLICATION_EVENT.parameterName(), EVENT_ID);
table.setParameters(parameters);
StorageDescriptor sd1 = new StorageDescriptor();
sd1.setLocation(tableLocation.toString());
table.setSd(sd1);
}
private Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(String[] partitionColumns,
StorageDescriptor sd) {
String serLib = sd.getSerdeInfo().getSerializationLib().toLowerCase();
int formatFieldCount = tableSchema.getFieldCount() - partitionColumns.length;
String[] formatNames = new String[formatFieldCount];
LogicalType[] formatTypes = new LogicalType[formatFieldCount];
for (int i = 0; i < formatFieldCount; i++) {
formatNames[i] = tableSchema.getFieldName(i).get();
formatTypes[i] = tableSchema.getFieldDataType(i).get().getLogicalType();
}
RowType formatType = RowType.of(formatTypes, formatNames);
Configuration formatConf = new Configuration(jobConf);
sd.getSerdeInfo().getParameters().forEach(formatConf::set);
if (serLib.contains("parquet")) {
return Optional.of(ParquetRowDataBuilder.createWriterFactory(
formatType, formatConf, hiveVersion.startsWith("3.")));
} else if (serLib.contains("orc")) {
TypeDescription typeDescription = OrcSplitReaderUtil.logicalTypeToOrcType(formatType);
return Optional.of(hiveShim.createOrcBulkWriterFactory(
formatConf, typeDescription.toString(), formatTypes));
} else {
return Optional.empty();
}
}
@Test
public void testCheckTableSchemaTypeInvalidType() throws MetaException {
TableDescription description = getHashRangeTable();
Table table = new Table();
Map<String, String> parameters = Maps.newHashMap();
parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
"col2:dynamo_col2#,hashKey:hashKey");
table.setParameters(parameters);
StorageDescriptor sd = new StorageDescriptor();
List<FieldSchema> cols = Lists.newArrayList();
cols.add(new FieldSchema("col1", "string", ""));
cols.add(new FieldSchema("col2", "tinyint", ""));
cols.add(new FieldSchema("hashKey", "string", ""));
sd.setCols(cols);
table.setSd(sd);
exceptionRule.expect(MetaException.class);
exceptionRule.expectMessage("The hive type tinyint is not supported in DynamoDB");
storageHandler.checkTableSchemaType(description, table);
}
public static HiveTablePartition toHiveTablePartition(
List<String> partitionKeys,
String[] fieldNames,
DataType[] fieldTypes,
HiveShim shim,
Properties tableProps,
String defaultPartitionName,
Partition partition) {
StorageDescriptor sd = partition.getSd();
Map<String, Object> partitionColValues = new HashMap<>();
List<String> nameList = Arrays.asList(fieldNames);
for (int i = 0; i < partitionKeys.size(); i++) {
String partitionColName = partitionKeys.get(i);
String partitionValue = partition.getValues().get(i);
DataType type = fieldTypes[nameList.indexOf(partitionColName)];
Object partitionObject;
if (defaultPartitionName.equals(partitionValue)) {
LogicalTypeRoot typeRoot = type.getLogicalType().getTypeRoot();
// while this is inline with Hive, seems it should be null for string columns as well
partitionObject = typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? defaultPartitionName : null;
} else {
partitionObject = restorePartitionValueFromFromType(shim, partitionValue, type);
}
partitionColValues.put(partitionColName, partitionObject);
}
return new HiveTablePartition(sd, partitionColValues, tableProps);
}
public static StorageDescriptor create(String location) {
StorageDescriptor storageDescriptor = new StorageDescriptor();
storageDescriptor.setInputFormat(AVRO_INPUT_FORMAT);
storageDescriptor.setOutputFormat(AVRO_OUTPUT_FORMAT);
storageDescriptor.setLocation(location);
storageDescriptor.setCols(emptyList());
SerDeInfo serdeInfo = new SerDeInfo();
serdeInfo.setSerializationLib(AVRO_SERDE);
storageDescriptor.setSerdeInfo(serdeInfo);
return storageDescriptor;
}
@Test
public void newHiveTable() throws URISyntaxException {
when(uriResolver.resolve(schema1, TABLE, 1))
.thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();
Table result = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);
assertThat(result.getDbName(), is(DATABASE));
assertThat(result.getTableName(), is(TABLE));
assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
Map<String, String> parameters = result.getParameters();
assertThat(parameters.get("EXTERNAL"), is("TRUE"));
assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("1"));
List<FieldSchema> partitionKeys = result.getPartitionKeys();
assertThat(partitionKeys.size(), is(1));
assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
StorageDescriptor storageDescriptor = result.getSd();
assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
assertThat(storageDescriptor.getLocation(), is(LOCATION));
assertThat(storageDescriptor.getCols().size(), is(0));
SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
@Test
public void alterHiveTable() throws URISyntaxException {
when(uriResolver.resolve(schema1, TABLE, 1))
.thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
when(uriResolver.resolve(schema2, TABLE, 2))
.thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();
Table table = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);
Table result = underTest.alterHiveTable(table, schema2, 2);
assertThat(result.getDbName(), is(DATABASE));
assertThat(result.getTableName(), is(TABLE));
assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
Map<String, String> parameters = result.getParameters();
assertThat(parameters.get("EXTERNAL"), is("TRUE"));
assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("2"));
List<FieldSchema> partitionKeys = result.getPartitionKeys();
assertThat(partitionKeys.size(), is(1));
assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
StorageDescriptor storageDescriptor = result.getSd();
assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
assertThat(storageDescriptor.getLocation(), is(LOCATION));
assertThat(storageDescriptor.getCols().size(), is(0));
SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
private static SerDeInfo getSerdeInfo(org.apache.hadoop.hive.metastore.api.Table table)
{
StorageDescriptor storageDescriptor = table.getSd();
if (storageDescriptor == null) {
throw new PrestoException(HIVE_INVALID_METADATA, "Table does not contain a storage descriptor: " + table);
}
SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
if (serdeInfo == null) {
throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
}
return serdeInfo;
}
private Optional<Table> createEvolvedDestinationTable(String tableName, String dbName, String location,
boolean withComment) {
List<FieldSchema> cols = new ArrayList<>();
// Existing columns that match avroToOrcSchemaEvolutionTest/source_schema_evolution_enabled.ddl
cols.add(new FieldSchema("parentFieldRecord__nestedFieldRecord__superNestedFieldString", "string",
withComment ? "from flatten_source parentFieldRecord.nestedFieldRecord.superNestedFieldString" : ""));
cols.add(new FieldSchema("parentFieldRecord__nestedFieldRecord__superNestedFieldInt", "int",
withComment ? "from flatten_source parentFieldRecord.nestedFieldRecord.superNestedFieldInt" : ""));
cols.add(new FieldSchema("parentFieldRecord__nestedFieldString", "string",
withComment ? "from flatten_source parentFieldRecord.nestedFieldString" : ""));
// The following column is skipped (simulating un-evolved schema):
// Column name : parentFieldRecord__nestedFieldInt
// Column type : int
// Column comment: from flatten_source parentFieldRecord.nestedFieldInt
cols.add(new FieldSchema("parentFieldInt", "int",
withComment ? "from flatten_source parentFieldInt" : ""));
// Extra schema
cols.add(new FieldSchema("parentFieldRecord__nestedFieldString2", "string",
withComment ? "from flatten_source parentFieldRecord.nestedFieldString2" : ""));
String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat";
StorageDescriptor storageDescriptor = new StorageDescriptor(cols, location, inputFormat, outputFormat, false, 0,
new SerDeInfo(), null, Lists.<Order>newArrayList(), null);
Table table = new Table(tableName, dbName, "ketl_dev", 0, 0, 0, storageDescriptor,
Lists.<FieldSchema>newArrayList(), Maps.<String,String>newHashMap(), "", "", "");
return Optional.of(table);
}
private static StorageDescriptor makeStorageDescriptor(String tableName, List<Column> columns, Storage storage)
{
SerDeInfo serdeInfo = new SerDeInfo();
serdeInfo.setName(tableName);
serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable());
serdeInfo.setParameters(storage.getSerdeParameters());
StorageDescriptor sd = new StorageDescriptor();
sd.setLocation(emptyToNull(storage.getLocation()));
sd.setCols(columns.stream()
.map(ThriftMetastoreUtil::toMetastoreApiFieldSchema)
.collect(toImmutableList()));
sd.setSerdeInfo(serdeInfo);
sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable());
sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable());
sd.setSkewedInfoIsSet(storage.isSkewed());
sd.setParameters(ImmutableMap.of());
Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty();
if (bucketProperty.isPresent()) {
sd.setNumBuckets(bucketProperty.get().getBucketCount());
sd.setBucketCols(bucketProperty.get().getBucketedBy());
if (!bucketProperty.get().getSortedBy().isEmpty()) {
sd.setSortCols(bucketProperty.get().getSortedBy().stream()
.map(column -> new Order(column.getColumnName(), column.getOrder().getHiveOrder()))
.collect(toImmutableList()));
}
}
return sd;
}