use of org.apache.iceberg.Table in project hive by apache.
the class HiveIcebergSerDe method initialize.
@Override
public void initialize(@Nullable Configuration configuration, Properties serDeProperties, Properties partitionProperties) throws SerDeException {
super.initialize(configuration, serDeProperties, partitionProperties);
if (serDeProperties.get(InputFormatConfig.TABLE_SCHEMA) != null) {
this.tableSchema = SchemaParser.fromJson((String) serDeProperties.get(InputFormatConfig.TABLE_SCHEMA));
if (serDeProperties.get(InputFormatConfig.PARTITION_SPEC) != null) {
PartitionSpec spec = PartitionSpecParser.fromJson(tableSchema, serDeProperties.getProperty(InputFormatConfig.PARTITION_SPEC));
this.partitionColumns = spec.fields().stream().map(PartitionField::name).collect(Collectors.toList());
} else {
this.partitionColumns = ImmutableList.of();
}
} else {
try {
Table table = IcebergTableUtil.getTable(configuration, serDeProperties);
// always prefer the original table schema if there is one
this.tableSchema = table.schema();
this.partitionColumns = table.spec().fields().stream().map(PartitionField::name).collect(Collectors.toList());
LOG.info("Using schema from existing table {}", SchemaParser.toJson(tableSchema));
} catch (Exception e) {
// During table creation we might not have the schema information from the Iceberg table, nor from the HMS
// table. In this case we have to generate the schema using the serdeProperties which contains the info
// provided in the CREATE TABLE query.
boolean autoConversion = configuration.getBoolean(InputFormatConfig.SCHEMA_AUTO_CONVERSION, false);
// If we can not load the table try the provided hive schema
this.tableSchema = hiveSchemaOrThrow(e, autoConversion);
// This is only for table creation, it is ok to have an empty partition column list
this.partitionColumns = ImmutableList.of();
// create table for CTAS
if (e instanceof NoSuchTableException && Boolean.parseBoolean(serDeProperties.getProperty(hive_metastoreConstants.TABLE_IS_CTAS))) {
if (!Catalogs.hiveCatalog(configuration, serDeProperties)) {
throw new SerDeException(CTAS_EXCEPTION_MSG);
}
createTableForCTAS(configuration, serDeProperties);
}
}
}
Schema projectedSchema;
if (serDeProperties.get(HiveIcebergStorageHandler.WRITE_KEY) != null) {
// when writing out data, we should not do projection pushdown
projectedSchema = tableSchema;
} else {
configuration.setBoolean(InputFormatConfig.CASE_SENSITIVE, false);
String[] selectedColumns = ColumnProjectionUtils.getReadColumnNames(configuration);
// When same table is joined multiple times, it is possible some selected columns are duplicated,
// in this case wrong recordStructField position leads wrong value or ArrayIndexOutOfBoundException
String[] distinctSelectedColumns = Arrays.stream(selectedColumns).distinct().toArray(String[]::new);
projectedSchema = distinctSelectedColumns.length > 0 ? tableSchema.caseInsensitiveSelect(distinctSelectedColumns) : tableSchema;
// or we cannot find selectOperator's column from inspector
if (projectedSchema.columns().size() != distinctSelectedColumns.length) {
projectedSchema = tableSchema;
}
}
try {
this.inspector = IcebergObjectInspector.create(projectedSchema);
} catch (Exception e) {
throw new SerDeException(e);
}
}
use of org.apache.iceberg.Table in project hive by apache.
the class HiveIcebergStorageHandler method getPartitionTransformSpec.
@Override
public List<PartitionTransformSpec> getPartitionTransformSpec(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
List<PartitionTransformSpec> result = new ArrayList<>();
TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
Table table = IcebergTableUtil.getTable(conf, tableDesc.getProperties());
return table.spec().fields().stream().map(f -> {
PartitionTransformSpec spec = new PartitionTransformSpec();
spec.setColumnName(table.schema().findColumnName(f.sourceId()));
// right now the only way to fetch the transform type and its params is through the toString() call
String transformName = f.transform().toString().toUpperCase();
// if the transform name contains '[' it means it has some config params
if (transformName.contains("[")) {
spec.setTransformType(PartitionTransformSpec.TransformType.valueOf(transformName.substring(0, transformName.indexOf("["))));
spec.setTransformParam(Optional.of(Integer.valueOf(transformName.substring(transformName.indexOf("[") + 1, transformName.indexOf("]")))));
} else {
spec.setTransformType(PartitionTransformSpec.TransformType.valueOf(transformName));
spec.setTransformParam(Optional.empty());
}
return spec;
}).collect(Collectors.toList());
}
use of org.apache.iceberg.Table in project hive by apache.
the class HiveIcebergStorageHandler method table.
/**
* Returns the Table serialized to the configuration based on the table name.
* If configuration is missing from the FileIO of the table, it will be populated with the input config.
*
* @param config The configuration used to get the data from
* @param name The name of the table we need as returned by TableDesc.getTableName()
* @return The Table
*/
public static Table table(Configuration config, String name) {
Table table = SerializationUtil.deserializeFromBase64(config.get(InputFormatConfig.SERIALIZED_TABLE_PREFIX + name));
checkAndSetIoConfig(config, table);
return table;
}
use of org.apache.iceberg.Table in project hive by apache.
the class HiveIcebergStorageHandler method getBasicStatistics.
@Override
public Map<String, String> getBasicStatistics(Partish partish) {
org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable();
TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
Table table = Catalogs.loadTable(conf, tableDesc.getProperties());
Map<String, String> stats = new HashMap<>();
if (table.currentSnapshot() != null) {
Map<String, String> summary = table.currentSnapshot().summary();
if (summary != null) {
if (summary.containsKey(SnapshotSummary.TOTAL_DATA_FILES_PROP)) {
stats.put(StatsSetupConst.NUM_FILES, summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
}
if (summary.containsKey(SnapshotSummary.TOTAL_RECORDS_PROP)) {
stats.put(StatsSetupConst.ROW_COUNT, summary.get(SnapshotSummary.TOTAL_RECORDS_PROP));
}
if (summary.containsKey(SnapshotSummary.TOTAL_FILE_SIZE_PROP)) {
stats.put(StatsSetupConst.TOTAL_SIZE, summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP));
}
}
} else {
stats.put(StatsSetupConst.NUM_FILES, "0");
stats.put(StatsSetupConst.ROW_COUNT, "0");
stats.put(StatsSetupConst.TOTAL_SIZE, "0");
}
return stats;
}
use of org.apache.iceberg.Table in project hive by apache.
the class TestInputFormatReaderDeletes method createTable.
@Override
protected Table createTable(String name, Schema schema, PartitionSpec spec) throws IOException {
Table table;
File location = temp.newFolder(inputFormat, fileFormat.name());
Assert.assertTrue(location.delete());
helper = new TestHelper(conf, tables, location.toString(), schema, spec, fileFormat, temp);
table = helper.createTable();
TableOperations ops = ((BaseTable) table).operations();
TableMetadata meta = ops.current();
ops.commit(meta, meta.upgradeToFormatVersion(2));
return table;
}
Aggregations