org.apache.hadoop.mapred.JobConfigurable#org.apache.hadoop.hive.metastore.api.StorageDescriptor源码实例Demo

下面列出了org.apache.hadoop.mapred.JobConfigurable#org.apache.hadoop.hive.metastore.api.StorageDescriptor 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: kite   文件: HiveUtils.java
static Table createEmptyTable(String namespace, String name) {
  Table table = new Table();
  table.setDbName(namespace);
  table.setTableName(name);
  table.setPartitionKeys(new ArrayList<FieldSchema>());
  table.setParameters(new HashMap<String, String>());

  StorageDescriptor sd = new StorageDescriptor();
  sd.setSerdeInfo(new SerDeInfo());
  sd.setNumBuckets(-1);
  sd.setBucketCols(new ArrayList<String>());
  sd.setCols(new ArrayList<FieldSchema>());
  sd.setParameters(new HashMap<String, String>());
  sd.setSortCols(new ArrayList<Order>());
  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  SkewedInfo skewInfo = new SkewedInfo();
  skewInfo.setSkewedColNames(new ArrayList<String>());
  skewInfo.setSkewedColValues(new ArrayList<List<String>>());
  skewInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>());
  sd.setSkewedInfo(skewInfo);
  table.setSd(sd);

  return table;
}
 
public static com.amazonaws.services.glue.model.StorageDescriptor convertStorageDescriptor(
        StorageDescriptor hiveSd) {
  com.amazonaws.services.glue.model.StorageDescriptor catalogSd =
          new com.amazonaws.services.glue.model.StorageDescriptor();
  catalogSd.setNumberOfBuckets(hiveSd.getNumBuckets());
  catalogSd.setCompressed(hiveSd.isCompressed());
  catalogSd.setParameters(hiveSd.getParameters());
  catalogSd.setBucketColumns(hiveSd.getBucketCols());
  catalogSd.setColumns(convertFieldSchemaList(hiveSd.getCols()));
  catalogSd.setInputFormat(hiveSd.getInputFormat());
  catalogSd.setLocation(hiveSd.getLocation());
  catalogSd.setOutputFormat(hiveSd.getOutputFormat());
  catalogSd.setSerdeInfo(convertSerDeInfo(hiveSd.getSerdeInfo()));
  catalogSd.setSkewedInfo(convertSkewedInfo(hiveSd.getSkewedInfo()));
  catalogSd.setSortColumns(convertOrderList(hiveSd.getSortCols()));
  catalogSd.setStoredAsSubDirectories(hiveSd.isStoredAsSubDirectories());

  return catalogSd;
}
 
@Test
public void testCheckTableSchemaTypeMappingInvalid() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
          "col2:dynamo_col2#,hashKey:hashKey");
  parameters.put(DynamoDBConstants.DYNAMODB_TYPE_MAPPING, "col2:NS");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "string", ""));
  cols.add(new FieldSchema("col2", "bigint", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);

  exceptionRule.expect(MetaException.class);
  exceptionRule.expectMessage("The DynamoDB type NS does not support Hive type bigint");
  storageHandler.checkTableSchemaType(description, table);
}
 
源代码4 项目: presto   文件: ThriftMetastoreUtil.java
public static Partition fromMetastoreApiPartition(org.apache.hadoop.hive.metastore.api.Partition partition, List<FieldSchema> schema)
{
    StorageDescriptor storageDescriptor = partition.getSd();
    if (storageDescriptor == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition);
    }

    Partition.Builder partitionBuilder = Partition.builder()
            .setDatabaseName(partition.getDbName())
            .setTableName(partition.getTableName())
            .setValues(partition.getValues())
            .setColumns(schema.stream()
                    .map(ThriftMetastoreUtil::fromMetastoreApiFieldSchema)
                    .collect(toImmutableList()))
            .setParameters(partition.getParameters());

    // TODO is bucketing_version set on partition level??
    fromMetastoreApiStorageDescriptor(
            partition.getParameters(),
            storageDescriptor,
            partitionBuilder.getStorageBuilder(),
            format("%s.%s", partition.getTableName(), partition.getValues()));

    return partitionBuilder.build();
}
 
源代码5 项目: presto   文件: ThriftMetastoreUtil.java
private static void fromMetastoreApiStorageDescriptor(
        Map<String, String> tableParameters,
        StorageDescriptor storageDescriptor,
        Storage.Builder builder,
        String tablePartitionName)
{
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    builder.setStorageFormat(StorageFormat.createNullable(serdeInfo.getSerializationLib(), storageDescriptor.getInputFormat(), storageDescriptor.getOutputFormat()))
            .setLocation(nullToEmpty(storageDescriptor.getLocation()))
            .setBucketProperty(HiveBucketProperty.fromStorageDescriptor(tableParameters, storageDescriptor, tablePartitionName))
            .setSkewed(storageDescriptor.isSetSkewedInfo() && storageDescriptor.getSkewedInfo().isSetSkewedColNames() && !storageDescriptor.getSkewedInfo().getSkewedColNames().isEmpty())
            .setSerdeParameters(serdeInfo.getParameters() == null ? ImmutableMap.of() : serdeInfo.getParameters());
}
 
@Test
public void testCheckTableSchemaTypeValid() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
      "col2:dynamo_col2#,hashKey:hashKey");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "string", ""));
  cols.add(new FieldSchema("col2", "bigint", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);
  // This check is expected to pass for the given input
  storageHandler.checkTableSchemaType(description, table);
}
 
源代码7 项目: flink   文件: HiveCatalog.java
private Partition instantiateHivePartition(Table hiveTable, CatalogPartitionSpec partitionSpec, CatalogPartition catalogPartition)
		throws PartitionSpecInvalidException {
	List<String> partCols = getFieldNames(hiveTable.getPartitionKeys());
	List<String> partValues = getOrderedFullPartitionValues(
		partitionSpec, partCols, new ObjectPath(hiveTable.getDbName(), hiveTable.getTableName()));
	// validate partition values
	for (int i = 0; i < partCols.size(); i++) {
		if (StringUtils.isNullOrWhitespaceOnly(partValues.get(i))) {
			throw new PartitionSpecInvalidException(getName(), partCols,
				new ObjectPath(hiveTable.getDbName(), hiveTable.getTableName()), partitionSpec);
		}
	}
	// TODO: handle GenericCatalogPartition
	StorageDescriptor sd = hiveTable.getSd().deepCopy();
	sd.setLocation(catalogPartition.getProperties().remove(HiveCatalogConfig.PARTITION_LOCATION));

	Map<String, String> properties = new HashMap<>(catalogPartition.getProperties());
	properties.put(HiveCatalogConfig.COMMENT, catalogPartition.getComment());

	return HiveTableUtil.createHivePartition(
			hiveTable.getDbName(),
			hiveTable.getTableName(),
			partValues,
			sd,
			properties);
}
 
源代码8 项目: flink   文件: HiveTableUtil.java
/**
 * Create properties info to initialize a SerDe.
 * @param storageDescriptor
 * @return
 */
public static Properties createPropertiesFromStorageDescriptor(StorageDescriptor storageDescriptor) {
	SerDeInfo serDeInfo = storageDescriptor.getSerdeInfo();
	Map<String, String> parameters = serDeInfo.getParameters();
	Properties properties = new Properties();
	properties.setProperty(
			serdeConstants.SERIALIZATION_FORMAT,
			parameters.get(serdeConstants.SERIALIZATION_FORMAT));
	List<String> colTypes = new ArrayList<>();
	List<String> colNames = new ArrayList<>();
	List<FieldSchema> cols = storageDescriptor.getCols();
	for (FieldSchema col: cols){
		colTypes.add(col.getType());
		colNames.add(col.getName());
	}
	properties.setProperty(serdeConstants.LIST_COLUMNS, StringUtils.join(colNames, String.valueOf(SerDeUtils.COMMA)));
	// Note: serdeConstants.COLUMN_NAME_DELIMITER is not defined in previous Hive. We use a literal to save on shim
	properties.setProperty("column.name.delimite", String.valueOf(SerDeUtils.COMMA));
	properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, StringUtils.join(colTypes, DEFAULT_LIST_COLUMN_TYPES_SEPARATOR));
	properties.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
	properties.putAll(parameters);
	return properties;
}
 
/**
 * @param entity, name of the entity to be changed, e.g. hive table or partition
 * @param sd, StorageDescriptor of the entity
 */
public static void updateAvroSchemaURL(String entity, StorageDescriptor sd, HiveCopyEntityHelper hiveHelper) {
  String oldAvroSchemaURL = sd.getSerdeInfo().getParameters().get(HIVE_TABLE_AVRO_SCHEMA_URL);
  if (oldAvroSchemaURL != null) {

    Path oldAvroSchemaPath = new Path(oldAvroSchemaURL);
    URI sourceFileSystemURI = hiveHelper.getDataset().getFs().getUri();

    if (PathUtils.isAbsoluteAndSchemeAuthorityNull(oldAvroSchemaPath)
        || (oldAvroSchemaPath.toUri().getScheme().equals(sourceFileSystemURI.getScheme())
        && oldAvroSchemaPath.toUri().getAuthority().equals(sourceFileSystemURI.getAuthority()))) {

      String newAvroSchemaURL = hiveHelper.getTargetPathHelper().getTargetPath(oldAvroSchemaPath, hiveHelper.getTargetFileSystem(),
          Optional.<Partition>absent(), true).toString();

      sd.getSerdeInfo().getParameters().put(HIVE_TABLE_AVRO_SCHEMA_URL, newAvroSchemaURL);
      log.info(String.format("For entity %s, change %s from %s to %s", entity,
          HIVE_TABLE_AVRO_SCHEMA_URL, oldAvroSchemaURL, newAvroSchemaURL));
    }
  }
}
 
源代码10 项目: flink   文件: HiveStatsUtil.java
private static ColumnStatistics createHiveColumnStatistics(
		Map<String, CatalogColumnStatisticsDataBase> colStats,
		StorageDescriptor sd,
		ColumnStatisticsDesc desc) {
	List<ColumnStatisticsObj> colStatsList = new ArrayList<>();

	for (FieldSchema field : sd.getCols()) {
		String hiveColName = field.getName();
		String hiveColType = field.getType();
		CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName());
		if (null != flinkColStat) {
			ColumnStatisticsData statsData =
					getColumnStatisticsData(HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)), flinkColStat);
			ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData);
			colStatsList.add(columnStatisticsObj);
		}
	}

	return new ColumnStatistics(desc, colStatsList);
}
 
private String getFilePath(Table tbl) throws Exception {

        StorageDescriptor descTable = tbl.getSd();

        InputFormat<?, ?> fformat = HiveDataFragmenter.makeInputFormat(descTable.getInputFormat(), jobConf);

        FileInputFormat.setInputPaths(jobConf, new Path(descTable.getLocation()));

        InputSplit[] splits;
        try {
            splits = fformat.getSplits(jobConf, 1);
        } catch (org.apache.hadoop.mapred.InvalidInputException e) {
            LOG.debug("getSplits failed on " + e.getMessage());
            throw new RuntimeException("Unable to get file path for table.");
        }

        for (InputSplit split : splits) {
            FileSplit fsp = (FileSplit) split;
            String[] hosts = fsp.getLocations();
            String filepath = fsp.getPath().toString();
            return filepath;
        }
        throw new RuntimeException("Unable to get file path for table.");
    }
 
public Table makeMetastoreTableObject(HiveMetaStoreClient client,
    String dbName, String tabName, List<FieldSchema> cols) throws Exception {
  Table tbl = new Table();
  tbl.setDbName(dbName);
  tbl.setTableName(tabName);
  StorageDescriptor sd = new StorageDescriptor();
  tbl.setSd(sd);
  tbl.setParameters(new HashMap<String, String>());
  sd.setCols(cols);
  sd.setCompressed(false);
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setName(tbl.getTableName());
  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  sd.getSerdeInfo().getParameters()
      .put(serdeConstants.SERIALIZATION_FORMAT, "1");
  sd.setSortCols(new ArrayList<Order>());
  return tbl;
}
 
源代码13 项目: metacat   文件: HiveConnectorTableService.java
private HiveStorageFormat extractHiveStorageFormat(final Table table) throws MetaException {
    final StorageDescriptor descriptor = table.getSd();
    if (descriptor == null) {
        throw new MetaException("Table is missing storage descriptor");
    }
    final SerDeInfo serdeInfo = descriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new MetaException(
            "Table storage descriptor is missing SerDe info");
    }
    final String outputFormat = descriptor.getOutputFormat();
    final String serializationLib = serdeInfo.getSerializationLib();

    for (HiveStorageFormat format : HiveStorageFormat.values()) {
        if (format.getOutputFormat().equals(outputFormat) && format.getSerde().equals(serializationLib)) {
            return format;
        }
    }
    throw new MetaException(
        String.format("Output format %s with SerDe %s is not supported", outputFormat, serializationLib));
}
 
@Test
public void testCheckTableSchemaMappingMissingColumn() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$,hashMap:hashMap");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "string", ""));
  cols.add(new FieldSchema("col2", "tinyint", ""));
  cols.add(new FieldSchema("col3", "string", ""));
  cols.add(new FieldSchema("hashMap", "map<string,string>", ""));
  sd.setCols(cols);
  table.setSd(sd);

  exceptionRule.expect(MetaException.class);
  exceptionRule.expectMessage("Could not find column mapping for column: col2");
  storageHandler.checkTableSchemaMapping(description, table);
}
 
@Test
public void testCheckListTableSchemaTypeValid() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
          "col2:dynamo_col2#,col3:dynamo_col3#,col4:dynamo_col4#,col5:dynamo_col5#," +
          "col6:dynamo_col6#,col7:dynamo_col7#,hashKey:hashKey");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "map<string,bigint>", ""));
  cols.add(new FieldSchema("col2", "array<map<string,bigint>>", ""));
  cols.add(new FieldSchema("col3", "array<map<string,double>>", ""));
  cols.add(new FieldSchema("col4", "array<map<string,string>>", ""));
  cols.add(new FieldSchema("col5", "array<bigint>", ""));
  cols.add(new FieldSchema("col6", "array<double>", ""));
  cols.add(new FieldSchema("col7", "array<string>", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);
  // This check is expected to pass for the given input
  storageHandler.checkTableSchemaType(description, table);
}
 
源代码16 项目: beeju   文件: HiveServer2CoreTest.java
private Table createUnpartitionedTable(String databaseName, String tableName, HiveServer2Core server)
    throws Exception {
  Table table = new Table();
  table.setDbName(databaseName);
  table.setTableName(tableName);
  table.setSd(new StorageDescriptor());
  table.getSd().setCols(Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)));
  table.getSd().setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
  table.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
  table.getSd().setSerdeInfo(new SerDeInfo());
  table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe");
  HiveMetaStoreClient client = server.getCore().newClient();
  client.createTable(table);
  client.close();
  return table;
}
 
源代码17 项目: beeju   文件: HiveServer2CoreTest.java
private Table createPartitionedTable(String databaseName, String tableName, HiveServer2Core server) throws Exception {
  Table table = new Table();
  table.setDbName(DATABASE);
  table.setTableName(tableName);
  table.setPartitionKeys(Arrays.asList(new FieldSchema("partcol", "int", null)));
  table.setSd(new StorageDescriptor());
  table.getSd().setCols(Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)));
  table.getSd().setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
  table.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
  table.getSd().setSerdeInfo(new SerDeInfo());
  table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe");
  HiveMetaStoreClient client = server.getCore().newClient();
  client.createTable(table);
  client.close();
  return table;
}
 
@Test
public void testCheckStructTableSchemaTypeInvalid() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
      "col2:dynamo_col2#,hashKey:hashKey");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "struct<bignum:bigint,smallnum:tinyint>", ""));
  cols.add(new FieldSchema("col2", "array<map<string,bigint>>", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);

  exceptionRule.expect(MetaException.class);
  exceptionRule.expectMessage("The hive type struct<bignum:bigint,smallnum:tinyint> is not " +
      "supported in DynamoDB");
  storageHandler.checkTableSchemaType(description, table);
}
 
private void setupHiveTables() throws TException, IOException {
  List<FieldSchema> partitionKeys = Lists.newArrayList(newFieldSchema("p1"), newFieldSchema("p2"));

  File tableLocation = new File("db1", "table1");
  StorageDescriptor sd = newStorageDescriptor(tableLocation, "col0");
  table1 = newTable("table1", "db1", partitionKeys, sd);
  Partition partition1 = newPartition(table1, "value1", "value2");
  Partition partition2 = newPartition(table1, "value11", "value22");
  table1Partitions = Arrays.asList(partition1, partition2); //
  table1PartitionNames = Arrays
      .asList(Warehouse.makePartName(partitionKeys, partition1.getValues()),
          Warehouse.makePartName(partitionKeys, partition2.getValues()));

  File tableLocation2 = new File("db2", "table2");
  StorageDescriptor sd2 = newStorageDescriptor(tableLocation2, "col0");
  table2 = newTable("table2", "db2", partitionKeys, sd2);
}
 
源代码20 项目: circus-train   文件: ReplicaTest.java
private Table newTable() {
  Table table = new Table();
  table.setDbName(DB_NAME);
  table.setTableName(TABLE_NAME);
  table.setTableType(TableType.EXTERNAL_TABLE.name());

  StorageDescriptor sd = new StorageDescriptor();
  sd.setLocation(tableLocation);
  table.setSd(sd);

  HashMap<String, String> parameters = new HashMap<>();
  parameters.put(StatsSetupConst.ROW_COUNT, "1");
  table.setParameters(parameters);

  table.setPartitionKeys(PARTITIONS);
  return table;
}
 
源代码21 项目: circus-train   文件: DestructiveReplicaTest.java
@Before
public void setUp() {
  SourceTable sourceTable = new SourceTable();
  sourceTable.setDatabaseName(DATABASE);
  sourceTable.setTableName(TABLE);
  tableReplication.setSourceTable(sourceTable);
  ReplicaTable replicaTable = new ReplicaTable();
  replicaTable.setDatabaseName(DATABASE);
  replicaTable.setTableName(REPLICA_TABLE);
  tableReplication.setReplicaTable(replicaTable);
  when(replicaMetaStoreClientSupplier.get()).thenReturn(client);
  replica = new DestructiveReplica(replicaMetaStoreClientSupplier, cleanupLocationManager, tableReplication);

  table = new Table();
  table.setDbName(DATABASE);
  table.setTableName(REPLICA_TABLE);
  table.setPartitionKeys(Lists.newArrayList(new FieldSchema("part1", "string", "")));
  Map<String, String> parameters = new HashMap<>();
  parameters.put(CircusTrainTableParameter.SOURCE_TABLE.parameterName(), DATABASE + "." + TABLE);
  parameters.put(REPLICATION_EVENT.parameterName(), EVENT_ID);
  table.setParameters(parameters);
  StorageDescriptor sd1 = new StorageDescriptor();
  sd1.setLocation(tableLocation.toString());
  table.setSd(sd1);
}
 
源代码22 项目: flink   文件: HiveTableSink.java
private Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(String[] partitionColumns,
		StorageDescriptor sd) {
	String serLib = sd.getSerdeInfo().getSerializationLib().toLowerCase();
	int formatFieldCount = tableSchema.getFieldCount() - partitionColumns.length;
	String[] formatNames = new String[formatFieldCount];
	LogicalType[] formatTypes = new LogicalType[formatFieldCount];
	for (int i = 0; i < formatFieldCount; i++) {
		formatNames[i] = tableSchema.getFieldName(i).get();
		formatTypes[i] = tableSchema.getFieldDataType(i).get().getLogicalType();
	}
	RowType formatType = RowType.of(formatTypes, formatNames);
	Configuration formatConf = new Configuration(jobConf);
	sd.getSerdeInfo().getParameters().forEach(formatConf::set);
	if (serLib.contains("parquet")) {
		return Optional.of(ParquetRowDataBuilder.createWriterFactory(
				formatType, formatConf, hiveVersion.startsWith("3.")));
	} else if (serLib.contains("orc")) {
		TypeDescription typeDescription = OrcSplitReaderUtil.logicalTypeToOrcType(formatType);
		return Optional.of(hiveShim.createOrcBulkWriterFactory(
				formatConf, typeDescription.toString(), formatTypes));
	} else {
		return Optional.empty();
	}
}
 
@Test
public void testCheckTableSchemaTypeInvalidType() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
      "col2:dynamo_col2#,hashKey:hashKey");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "string", ""));
  cols.add(new FieldSchema("col2", "tinyint", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);

  exceptionRule.expect(MetaException.class);
  exceptionRule.expectMessage("The hive type tinyint is not supported in DynamoDB");
  storageHandler.checkTableSchemaType(description, table);
}
 
源代码24 项目: flink   文件: HiveTableSource.java
public static HiveTablePartition toHiveTablePartition(
		List<String> partitionKeys,
		String[] fieldNames,
		DataType[] fieldTypes,
		HiveShim shim,
		Properties tableProps,
		String defaultPartitionName,
		Partition partition) {
	StorageDescriptor sd = partition.getSd();
	Map<String, Object> partitionColValues = new HashMap<>();
	List<String> nameList = Arrays.asList(fieldNames);
	for (int i = 0; i < partitionKeys.size(); i++) {
		String partitionColName = partitionKeys.get(i);
		String partitionValue = partition.getValues().get(i);
		DataType type = fieldTypes[nameList.indexOf(partitionColName)];
		Object partitionObject;
		if (defaultPartitionName.equals(partitionValue)) {
			LogicalTypeRoot typeRoot = type.getLogicalType().getTypeRoot();
			// while this is inline with Hive, seems it should be null for string columns as well
			partitionObject = typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? defaultPartitionName : null;
		} else {
			partitionObject = restorePartitionValueFromFromType(shim, partitionValue, type);
		}
		partitionColValues.put(partitionColName, partitionObject);
	}
	return new HiveTablePartition(sd, partitionColValues, tableProps);
}
 
public static StorageDescriptor create(String location) {
  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setInputFormat(AVRO_INPUT_FORMAT);
  storageDescriptor.setOutputFormat(AVRO_OUTPUT_FORMAT);
  storageDescriptor.setLocation(location);
  storageDescriptor.setCols(emptyList());

  SerDeInfo serdeInfo = new SerDeInfo();
  serdeInfo.setSerializationLib(AVRO_SERDE);
  storageDescriptor.setSerdeInfo(serdeInfo);

  return storageDescriptor;
}
 
源代码26 项目: data-highway   文件: AvroHiveTableStrategyTest.java
@Test
public void newHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table result = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("1"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
源代码27 项目: data-highway   文件: AvroHiveTableStrategyTest.java
@Test
public void alterHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  when(uriResolver.resolve(schema2, TABLE, 2))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table table = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  Table result = underTest.alterHiveTable(table, schema2, 2);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("2"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
源代码28 项目: presto   文件: ThriftMetastoreUtil.java
private static SerDeInfo getSerdeInfo(org.apache.hadoop.hive.metastore.api.Table table)
{
    StorageDescriptor storageDescriptor = table.getSd();
    if (storageDescriptor == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table does not contain a storage descriptor: " + table);
    }
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    return serdeInfo;
}
 
private Optional<Table> createEvolvedDestinationTable(String tableName, String dbName, String location,
    boolean withComment) {
  List<FieldSchema> cols = new ArrayList<>();
  // Existing columns that match avroToOrcSchemaEvolutionTest/source_schema_evolution_enabled.ddl
  cols.add(new FieldSchema("parentFieldRecord__nestedFieldRecord__superNestedFieldString", "string",
      withComment ? "from flatten_source parentFieldRecord.nestedFieldRecord.superNestedFieldString" : ""));
  cols.add(new FieldSchema("parentFieldRecord__nestedFieldRecord__superNestedFieldInt", "int",
      withComment ? "from flatten_source parentFieldRecord.nestedFieldRecord.superNestedFieldInt" : ""));
  cols.add(new FieldSchema("parentFieldRecord__nestedFieldString", "string",
      withComment ? "from flatten_source parentFieldRecord.nestedFieldString" : ""));
  // The following column is skipped (simulating un-evolved schema):
  // Column name   : parentFieldRecord__nestedFieldInt
  // Column type   : int
  // Column comment: from flatten_source parentFieldRecord.nestedFieldInt
  cols.add(new FieldSchema("parentFieldInt", "int",
      withComment ? "from flatten_source parentFieldInt" : ""));
  // Extra schema
  cols.add(new FieldSchema("parentFieldRecord__nestedFieldString2", "string",
      withComment ? "from flatten_source parentFieldRecord.nestedFieldString2" : ""));

  String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
  String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat";
  StorageDescriptor storageDescriptor = new StorageDescriptor(cols, location, inputFormat, outputFormat, false, 0,
      new SerDeInfo(), null, Lists.<Order>newArrayList(), null);
  Table table = new Table(tableName, dbName, "ketl_dev", 0, 0, 0, storageDescriptor,
      Lists.<FieldSchema>newArrayList(), Maps.<String,String>newHashMap(), "", "", "");

  return Optional.of(table);
}
 
源代码30 项目: presto   文件: ThriftMetastoreUtil.java
private static StorageDescriptor makeStorageDescriptor(String tableName, List<Column> columns, Storage storage)
{
    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(tableName);
    serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable());
    serdeInfo.setParameters(storage.getSerdeParameters());

    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(emptyToNull(storage.getLocation()));
    sd.setCols(columns.stream()
            .map(ThriftMetastoreUtil::toMetastoreApiFieldSchema)
            .collect(toImmutableList()));
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable());
    sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable());
    sd.setSkewedInfoIsSet(storage.isSkewed());
    sd.setParameters(ImmutableMap.of());

    Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty();
    if (bucketProperty.isPresent()) {
        sd.setNumBuckets(bucketProperty.get().getBucketCount());
        sd.setBucketCols(bucketProperty.get().getBucketedBy());
        if (!bucketProperty.get().getSortedBy().isEmpty()) {
            sd.setSortCols(bucketProperty.get().getSortedBy().stream()
                    .map(column -> new Order(column.getColumnName(), column.getOrder().getHiveOrder()))
                    .collect(toImmutableList()));
        }
    }

    return sd;
}