use of org.apache.iceberg.Schema in project metacat by Netflix.
the class IcebergTableHandler method update.
/**
* Updates the iceberg schema if the provided tableInfo has updated field comments.
*
* @param tableInfo table information
* @return true if an update is done
*/
public boolean update(final TableInfo tableInfo) {
boolean result = false;
final List<FieldInfo> fields = tableInfo.getFields();
if (fields != null && !fields.isEmpty() && // This parameter is only sent during data change and not during schema change.
Strings.isNullOrEmpty(tableInfo.getMetadata().get(DirectSqlTable.PARAM_PREVIOUS_METADATA_LOCATION))) {
final QualifiedName tableName = tableInfo.getName();
final String tableMetadataLocation = HiveTableUtil.getIcebergTableMetadataLocation(tableInfo);
if (Strings.isNullOrEmpty(tableMetadataLocation)) {
final String message = String.format("No metadata location specified for table %s", tableName);
log.error(message);
throw new MetacatBadRequestException(message);
}
final IcebergMetastoreTables icebergMetastoreTables = new IcebergMetastoreTables(new IcebergTableOps(conf, tableMetadataLocation, connectorContext.getConfig(), icebergTableOpsProxy));
final Table table = icebergMetastoreTables.loadTable(HiveTableUtil.qualifiedNameToTableIdentifier(tableName));
final UpdateSchema updateSchema = table.updateSchema();
final Schema schema = table.schema();
for (FieldInfo field : fields) {
final Types.NestedField iField = schema.findField(field.getName());
if (iField != null && !Objects.equals(field.getComment(), iField.doc())) {
updateSchema.updateColumnDoc(field.getName(), field.getComment());
result = true;
}
}
if (result) {
updateSchema.commit();
final String newTableMetadataLocation = icebergMetastoreTables.getTableOps().currentMetadataLocation();
if (!tableMetadataLocation.equalsIgnoreCase(newTableMetadataLocation)) {
tableInfo.getMetadata().put(DirectSqlTable.PARAM_PREVIOUS_METADATA_LOCATION, tableMetadataLocation);
tableInfo.getMetadata().put(DirectSqlTable.PARAM_METADATA_LOCATION, newTableMetadataLocation);
}
}
}
return result;
}
use of org.apache.iceberg.Schema in project metacat by Netflix.
the class HiveTypeConverter method icebergeSchemaTofieldDtos.
/**
* Converts iceberg schema to field dto.
*
* @param schema schema
* @param partitionFields partitioned fields
* @return list of field Info
*/
public List<FieldInfo> icebergeSchemaTofieldDtos(final Schema schema, final List<PartitionField> partitionFields) {
final List<FieldInfo> fields = Lists.newArrayList();
final List<String> partitionNames = partitionFields.stream().map(f -> schema.findField(f.sourceId()).name()).collect(Collectors.toList());
for (Types.NestedField field : schema.columns()) {
final FieldInfo fieldInfo = new FieldInfo();
fieldInfo.setName(field.name());
final org.apache.iceberg.types.Type fieldType = field.type();
fieldInfo.setSourceType(fieldType.toString());
fieldInfo.setType(toMetacatType(fromIcebergToHiveType(fieldType)));
fieldInfo.setIsNullable(field.isOptional());
fieldInfo.setComment(field.doc());
fieldInfo.setPartitionKey(partitionNames.contains(field.name()));
fields.add(fieldInfo);
}
return fields;
}
use of org.apache.iceberg.Schema in project incubator-gobblin by apache.
the class IcebergUtils method getIcebergSchema.
/**
* Given a avro schema string and a hive table,
* calculate the iceberg table schema and partition schema.
* (E.g. we use 'datepartition' as the partition column, which is not included inside the data schema,
* we'll need to add that column to data schema to construct table schema
*/
public static IcebergDataAndPartitionSchema getIcebergSchema(String schema, org.apache.hadoop.hive.metastore.api.Table table) {
org.apache.iceberg.shaded.org.apache.avro.Schema icebergDataSchema = new org.apache.iceberg.shaded.org.apache.avro.Schema.Parser().parse(schema);
Types.StructType dataStructType = AvroSchemaUtil.convert(icebergDataSchema).asStructType();
List<Types.NestedField> dataFields = Lists.newArrayList(dataStructType.fields());
org.apache.iceberg.shaded.org.apache.avro.Schema icebergPartitionSchema = parseSchemaFromCols(table.getPartitionKeys(), table.getDbName(), table.getTableName(), true);
Types.StructType partitionStructType = AvroSchemaUtil.convert(icebergPartitionSchema).asStructType();
List<Types.NestedField> partitionFields = partitionStructType.fields();
Preconditions.checkArgument(partitionFields.stream().allMatch(f -> f.type().isPrimitiveType()), "Only primitive fields are supported for partition columns");
dataFields.addAll(partitionFields);
Types.StructType updatedStructType = Types.StructType.of(dataFields);
updatedStructType = (Types.StructType) TypeUtil.assignFreshIds(updatedStructType, new AtomicInteger(0)::incrementAndGet);
return new IcebergDataAndPartitionSchema(new org.apache.iceberg.Schema(updatedStructType.fields()), new org.apache.iceberg.Schema(partitionFields));
}
use of org.apache.iceberg.Schema in project hive by apache.
the class HiveTableTest method testColumnTypeChangeInMetastore.
@Test
public void testColumnTypeChangeInMetastore() throws TException {
Table icebergTable = catalog.loadTable(TABLE_IDENTIFIER);
Schema expectedSchema = new Schema(Types.StructType.of(required(1, "id", Types.LongType.get()), optional(2, "data", Types.LongType.get()), optional(3, "string", Types.StringType.get()), optional(4, "int", Types.IntegerType.get())).fields());
// Add columns with different types, then verify we could delete one column in hive metastore
// as hive conf METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES was set to false. If this was set to true,
// an InvalidOperationException would thrown in method MetaStoreUtils#throwExceptionIfIncompatibleColTypeChange()
icebergTable.updateSchema().addColumn("data", Types.LongType.get()).addColumn("string", Types.StringType.get()).addColumn("int", Types.IntegerType.get()).commit();
Assert.assertEquals("Schema should match expected", expectedSchema.asStruct(), icebergTable.schema().asStruct());
expectedSchema = new Schema(Types.StructType.of(required(1, "id", Types.LongType.get()), optional(2, "data", Types.LongType.get()), optional(4, "int", Types.IntegerType.get())).fields());
icebergTable.updateSchema().deleteColumn("string").commit();
Assert.assertEquals("Schema should match expected", expectedSchema.asStruct(), icebergTable.schema().asStruct());
}
use of org.apache.iceberg.Schema in project hive by apache.
the class TestHiveCatalog method testCreateTableBuilder.
@Test
public void testCreateTableBuilder() throws Exception {
Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
String location = temp.newFolder("tbl").toString();
try {
Table table = catalog.buildTable(tableIdent, schema).withPartitionSpec(spec).withLocation(location).withProperty("key1", "value1").withProperty("key2", "value2").create();
Assert.assertEquals(location, table.location());
Assert.assertEquals(2, table.schema().columns().size());
Assert.assertEquals(1, table.spec().fields().size());
Assert.assertEquals("value1", table.properties().get("key1"));
Assert.assertEquals("value2", table.properties().get("key2"));
} finally {
catalog.dropTable(tableIdent);
}
}
Aggregations