use of org.apache.iceberg.Schema in project hive by apache.
the class HiveIcebergSerDe method hiveSchemaOrThrow.
/**
* Gets the hive schema and throws an exception if it is not provided. In the later case it adds the
* previousException as a root cause.
* @param previousException If we had an exception previously
* @param autoConversion When <code>true</code>, convert unsupported types to more permissive ones, like tinyint to
* int
* @return The hive schema parsed from the serDeProperties provided when the SerDe was initialized
* @throws SerDeException If there is no schema information in the serDeProperties
*/
private Schema hiveSchemaOrThrow(Exception previousException, boolean autoConversion) throws SerDeException {
List<String> names = Lists.newArrayList();
names.addAll(getColumnNames());
names.addAll(getPartitionColumnNames());
List<TypeInfo> types = Lists.newArrayList();
types.addAll(getColumnTypes());
types.addAll(getPartitionColumnTypes());
List<String> comments = Lists.newArrayList();
comments.addAll(getColumnComments());
comments.addAll(getPartitionColumnComments());
if (!names.isEmpty() && !types.isEmpty()) {
Schema hiveSchema = HiveSchemaUtil.convert(names, types, comments, autoConversion);
LOG.info("Using hive schema {}", SchemaParser.toJson(hiveSchema));
return hiveSchema;
} else {
throw new SerDeException("Please provide an existing table or a valid schema", previousException);
}
}
use of org.apache.iceberg.Schema in project hive by apache.
the class HiveIcebergStorageHandler method overlayTableProperties.
/**
* Stores the serializable table data in the configuration.
* Currently the following is handled:
* <ul>
* <li>- Table - in case the table is serializable</li>
* <li>- Location</li>
* <li>- Schema</li>
* <li>- Partition specification</li>
* <li>- FileIO for handling table files</li>
* <li>- Location provider used for file generation</li>
* <li>- Encryption manager for encryption handling</li>
* </ul>
* @param configuration The configuration storing the catalog information
* @param tableDesc The table which we want to store to the configuration
* @param map The map of the configuration properties which we append with the serialized data
*/
@VisibleForTesting
static void overlayTableProperties(Configuration configuration, TableDesc tableDesc, Map<String, String> map) {
Properties props = tableDesc.getProperties();
Table table = IcebergTableUtil.getTable(configuration, props);
String schemaJson = SchemaParser.toJson(table.schema());
Maps.fromProperties(props).entrySet().stream().filter(// map overrides tableDesc properties
entry -> !map.containsKey(entry.getKey())).forEach(entry -> map.put(entry.getKey(), entry.getValue()));
map.put(InputFormatConfig.TABLE_IDENTIFIER, props.getProperty(Catalogs.NAME));
map.put(InputFormatConfig.TABLE_LOCATION, table.location());
map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
props.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(table.spec()));
// serialize table object into config
Table serializableTable = SerializableTable.copyOf(table);
checkAndSkipIoConfigSerialization(configuration, serializableTable);
map.put(InputFormatConfig.SERIALIZED_TABLE_PREFIX + tableDesc.getTableName(), SerializationUtil.serializeToBase64(serializableTable));
// We need to remove this otherwise the job.xml will be invalid as column comments are separated with '\0' and
// the serialization utils fail to serialize this character
map.remove("columns.comments");
// save schema into table props as well to avoid repeatedly hitting the HMS during serde initializations
// this is an exception to the interface documentation, but it's a safe operation to add this property
props.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
}
use of org.apache.iceberg.Schema in project hive by apache.
the class TestHiveCatalog method testTableName.
@Test
public void testTableName() {
Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
try {
catalog.buildTable(tableIdent, schema).withPartitionSpec(spec).create();
Table table = catalog.loadTable(tableIdent);
Assert.assertEquals("Name must match", "hive.hivedb.tbl", table.name());
TableIdentifier snapshotsTableIdent = TableIdentifier.of(DB_NAME, "tbl", "snapshots");
Table snapshotsTable = catalog.loadTable(snapshotsTableIdent);
Assert.assertEquals("Name must match", "hive.hivedb.tbl.snapshots", snapshotsTable.name());
} finally {
catalog.dropTable(tableIdent);
}
}
use of org.apache.iceberg.Schema in project hive by apache.
the class HiveIcebergTestUtils method createEqualityDeleteFile.
/**
* @param table The table to create the delete file for
* @param deleteFilePath The path where the delete file should be created, relative to the table location root
* @param equalityFields List of field names that should play a role in the equality check
* @param fileFormat The file format that should be used for writing out the delete file
* @param rowsToDelete The rows that should be deleted. It's enough to fill out the fields that are relevant for the
* equality check, as listed in equalityFields, the rest of the fields are ignored
* @return The DeleteFile created
* @throws IOException If there is an error during DeleteFile write
*/
public static DeleteFile createEqualityDeleteFile(Table table, String deleteFilePath, List<String> equalityFields, FileFormat fileFormat, List<Record> rowsToDelete) throws IOException {
List<Integer> equalityFieldIds = equalityFields.stream().map(id -> table.schema().findField(id).fieldId()).collect(Collectors.toList());
Schema eqDeleteRowSchema = table.schema().select(equalityFields.toArray(new String[] {}));
FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), ArrayUtil.toIntArray(equalityFieldIds), eqDeleteRowSchema, null);
EncryptedOutputFile outputFile = table.encryption().encrypt(HadoopOutputFile.fromPath(new org.apache.hadoop.fs.Path(table.location(), deleteFilePath), new Configuration()));
PartitionKey part = new PartitionKey(table.spec(), eqDeleteRowSchema);
part.partition(rowsToDelete.get(0));
EqualityDeleteWriter<Record> eqWriter = appenderFactory.newEqDeleteWriter(outputFile, fileFormat, part);
try (EqualityDeleteWriter<Record> writer = eqWriter) {
writer.deleteAll(rowsToDelete);
}
return eqWriter.toDeleteFile();
}
use of org.apache.iceberg.Schema in project hive by apache.
the class TestHiveSchemaUtil method testConversionWithoutLastComment.
@Test
public void testConversionWithoutLastComment() {
Schema expected = new Schema(optional(1, "customer_id", Types.LongType.get(), "customer comment"), optional(2, "first_name", Types.StringType.get(), null));
Schema schema = HiveSchemaUtil.convert(Arrays.asList("customer_id", "first_name"), Arrays.asList(TypeInfoUtils.getTypeInfoFromTypeString(serdeConstants.BIGINT_TYPE_NAME), TypeInfoUtils.getTypeInfoFromTypeString(serdeConstants.STRING_TYPE_NAME)), Arrays.asList("customer comment"));
Assert.assertEquals(expected.asStruct(), schema.asStruct());
}
Aggregations