use of org.apache.iceberg.Schema in project hive by apache.
the class TestHiveIcebergSchemaEvolution method testRemoveColumnFromIcebergTable.
@Test
public void testRemoveColumnFromIcebergTable() throws IOException {
// Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data.
Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
// Remove the first_name column from the table.
icebergTable.updateSchema().deleteColumn("first_name").commit();
Schema customerSchemaWithoutFirstName = new Schema(optional(1, "customer_id", Types.LongType.get()), optional(2, "last_name", Types.StringType.get(), "This is last name"));
TestHelper.RecordsBuilder customersWithoutFirstNameBuilder = TestHelper.RecordsBuilder.newInstance(customerSchemaWithoutFirstName).add(0L, "Brown").add(1L, "Green").add(2L, "Pink");
List<Record> customersWithoutFirstName = customersWithoutFirstNameBuilder.build();
// Run a 'select *' from Hive to see if the result doesn't contain the first_name column any more.
List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers");
HiveIcebergTestUtils.validateData(customersWithoutFirstName, HiveIcebergTestUtils.valueForRow(customerSchemaWithoutFirstName, rows), 0);
// Run a 'select first_name' and check if an exception is thrown.
AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, "Invalid table alias or column reference 'first_name'", () -> {
shell.executeStatement("SELECT first_name FROM default.customers");
});
// Insert an entry from Hive to check if it can be inserted without the first_name column.
shell.executeStatement("INSERT INTO default.customers values (4L, 'Magenta')");
rows = shell.executeStatement("SELECT * FROM default.customers");
customersWithoutFirstNameBuilder.add(4L, "Magenta");
customersWithoutFirstName = customersWithoutFirstNameBuilder.build();
HiveIcebergTestUtils.validateData(customersWithoutFirstName, HiveIcebergTestUtils.valueForRow(customerSchemaWithoutFirstName, rows), 0);
}
use of org.apache.iceberg.Schema in project hive by apache.
the class TestHiveIcebergStorageHandlerNoScan method testPartitionTransform.
@Test
public void testPartitionTransform() {
Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "year_field", Types.DateType.get()), optional(3, "month_field", Types.TimestampType.withZone()), optional(4, "day_field", Types.TimestampType.withoutZone()), optional(5, "hour_field", Types.TimestampType.withoutZone()), optional(6, "truncate_field", Types.StringType.get()), optional(7, "bucket_field", Types.StringType.get()), optional(8, "identity_field", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).year("year_field").month("month_field").day("day_field").hour("hour_field").truncate("truncate_field", 2).bucket("bucket_field", 2).identity("identity_field").build();
TableIdentifier identifier = TableIdentifier.of("default", "part_test");
shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " PARTITIONED BY SPEC (year(year_field), month(month_field), day(day_field), hour(hour_field), " + "truncate(2, truncate_field), bucket(2, bucket_field), identity_field)" + " STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + " TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
Table table = testTables.loadTable(identifier);
Assert.assertEquals(spec, table.spec());
}
use of org.apache.iceberg.Schema in project hive by apache.
the class TestIcebergRecordObjectInspector method testIcebergRecordObjectInspector.
@Test
public void testIcebergRecordObjectInspector() {
Schema schema = new Schema(required(1, "integer_field", Types.IntegerType.get()), required(2, "struct_field", Types.StructType.of(Types.NestedField.required(3, "string_field", Types.StringType.get()))));
Record record = RandomGenericData.generate(schema, 1, 0L).get(0);
Record innerRecord = record.get(1, Record.class);
StructObjectInspector soi = (StructObjectInspector) IcebergObjectInspector.create(schema);
Assert.assertEquals(ImmutableList.of(record.get(0), record.get(1)), soi.getStructFieldsDataAsList(record));
StructField integerField = soi.getStructFieldRef("integer_field");
Assert.assertEquals(record.get(0), soi.getStructFieldData(record, integerField));
StructField structField = soi.getStructFieldRef("struct_field");
Object innerData = soi.getStructFieldData(record, structField);
Assert.assertEquals(innerRecord, innerData);
StructObjectInspector innerSoi = (StructObjectInspector) structField.getFieldObjectInspector();
StructField stringField = innerSoi.getStructFieldRef("string_field");
Assert.assertEquals(ImmutableList.of(innerRecord.get(0)), innerSoi.getStructFieldsDataAsList(innerRecord));
Assert.assertEquals(innerRecord.get(0), innerSoi.getStructFieldData(innerData, stringField));
}
use of org.apache.iceberg.Schema in project hive by apache.
the class HiveIcebergSerDe method initialize.
@Override
public void initialize(@Nullable Configuration configuration, Properties serDeProperties, Properties partitionProperties) throws SerDeException {
super.initialize(configuration, serDeProperties, partitionProperties);
if (serDeProperties.get(InputFormatConfig.TABLE_SCHEMA) != null) {
this.tableSchema = SchemaParser.fromJson((String) serDeProperties.get(InputFormatConfig.TABLE_SCHEMA));
if (serDeProperties.get(InputFormatConfig.PARTITION_SPEC) != null) {
PartitionSpec spec = PartitionSpecParser.fromJson(tableSchema, serDeProperties.getProperty(InputFormatConfig.PARTITION_SPEC));
this.partitionColumns = spec.fields().stream().map(PartitionField::name).collect(Collectors.toList());
} else {
this.partitionColumns = ImmutableList.of();
}
} else {
try {
Table table = IcebergTableUtil.getTable(configuration, serDeProperties);
// always prefer the original table schema if there is one
this.tableSchema = table.schema();
this.partitionColumns = table.spec().fields().stream().map(PartitionField::name).collect(Collectors.toList());
LOG.info("Using schema from existing table {}", SchemaParser.toJson(tableSchema));
} catch (Exception e) {
// During table creation we might not have the schema information from the Iceberg table, nor from the HMS
// table. In this case we have to generate the schema using the serdeProperties which contains the info
// provided in the CREATE TABLE query.
boolean autoConversion = configuration.getBoolean(InputFormatConfig.SCHEMA_AUTO_CONVERSION, false);
// If we can not load the table try the provided hive schema
this.tableSchema = hiveSchemaOrThrow(e, autoConversion);
// This is only for table creation, it is ok to have an empty partition column list
this.partitionColumns = ImmutableList.of();
// create table for CTAS
if (e instanceof NoSuchTableException && Boolean.parseBoolean(serDeProperties.getProperty(hive_metastoreConstants.TABLE_IS_CTAS))) {
if (!Catalogs.hiveCatalog(configuration, serDeProperties)) {
throw new SerDeException(CTAS_EXCEPTION_MSG);
}
createTableForCTAS(configuration, serDeProperties);
}
}
}
Schema projectedSchema;
if (serDeProperties.get(HiveIcebergStorageHandler.WRITE_KEY) != null) {
// when writing out data, we should not do projection pushdown
projectedSchema = tableSchema;
} else {
configuration.setBoolean(InputFormatConfig.CASE_SENSITIVE, false);
String[] selectedColumns = ColumnProjectionUtils.getReadColumnNames(configuration);
// When same table is joined multiple times, it is possible some selected columns are duplicated,
// in this case wrong recordStructField position leads wrong value or ArrayIndexOutOfBoundException
String[] distinctSelectedColumns = Arrays.stream(selectedColumns).distinct().toArray(String[]::new);
projectedSchema = distinctSelectedColumns.length > 0 ? tableSchema.caseInsensitiveSelect(distinctSelectedColumns) : tableSchema;
// or we cannot find selectOperator's column from inspector
if (projectedSchema.columns().size() != distinctSelectedColumns.length) {
projectedSchema = tableSchema;
}
}
try {
this.inspector = IcebergObjectInspector.create(projectedSchema);
} catch (Exception e) {
throw new SerDeException(e);
}
}
use of org.apache.iceberg.Schema in project hive by apache.
the class TestInputFormatReaderDeletes method rowSet.
@Override
public StructLikeSet rowSet(String name, Table table, String... columns) {
InputFormatConfig.ConfigBuilder builder = new InputFormatConfig.ConfigBuilder(conf).readFrom(table.location());
Schema projected = table.schema().select(columns);
StructLikeSet set = StructLikeSet.create(projected.asStruct());
set.addAll(TestIcebergInputFormats.TESTED_INPUT_FORMATS.stream().filter(recordFactory -> recordFactory.name().equals(inputFormat)).map(recordFactory -> recordFactory.create(builder.project(projected).conf()).getRecords()).flatMap(List::stream).map(record -> new InternalRecordWrapper(projected.asStruct()).wrap(record)).collect(Collectors.toList()));
return set;
}
Aggregations