use of org.apache.iceberg.Table in project hive by apache.
the class TestHiveCatalog method testCreateTableBuilder.
@Test
public void testCreateTableBuilder() throws Exception {
Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
String location = temp.newFolder("tbl").toString();
try {
Table table = catalog.buildTable(tableIdent, schema).withPartitionSpec(spec).withLocation(location).withProperty("key1", "value1").withProperty("key2", "value2").create();
Assert.assertEquals(location, table.location());
Assert.assertEquals(2, table.schema().columns().size());
Assert.assertEquals(1, table.spec().fields().size());
Assert.assertEquals("value1", table.properties().get("key1"));
Assert.assertEquals("value2", table.properties().get("key2"));
} finally {
catalog.dropTable(tableIdent);
}
}
use of org.apache.iceberg.Table in project hive by apache.
the class TestHiveCatalog method testReplaceTxnBuilder.
@Test
public void testReplaceTxnBuilder() throws Exception {
Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
String location = temp.newFolder("tbl").toString();
try {
Transaction createTxn = catalog.buildTable(tableIdent, schema).withPartitionSpec(spec).withLocation(location).withProperty("key1", "value1").createOrReplaceTransaction();
createTxn.commitTransaction();
Table table = catalog.loadTable(tableIdent);
Assert.assertEquals(1, table.spec().fields().size());
String newLocation = temp.newFolder("tbl-2").toString();
Transaction replaceTxn = catalog.buildTable(tableIdent, schema).withProperty("key2", "value2").withLocation(newLocation).replaceTransaction();
replaceTxn.commitTransaction();
table = catalog.loadTable(tableIdent);
Assert.assertEquals(newLocation, table.location());
Assert.assertNull(table.currentSnapshot());
PartitionSpec v1Expected = PartitionSpec.builderFor(table.schema()).alwaysNull("data", "data_bucket").withSpecId(1).build();
Assert.assertEquals("Table should have a spec with one void field", v1Expected, table.spec());
Assert.assertEquals("value1", table.properties().get("key1"));
Assert.assertEquals("value2", table.properties().get("key2"));
} finally {
catalog.dropTable(tableIdent);
}
}
use of org.apache.iceberg.Table in project hive by apache.
the class TestHiveCatalog method testCreateTableWithCaching.
@Test
public void testCreateTableWithCaching() throws Exception {
Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
String location = temp.newFolder("tbl").toString();
ImmutableMap<String, String> properties = ImmutableMap.of("key1", "value1", "key2", "value2");
Catalog cachingCatalog = CachingCatalog.wrap(catalog);
try {
Table table = cachingCatalog.createTable(tableIdent, schema, spec, location, properties);
Assert.assertEquals(location, table.location());
Assert.assertEquals(2, table.schema().columns().size());
Assert.assertEquals(1, table.spec().fields().size());
Assert.assertEquals("value1", table.properties().get("key1"));
Assert.assertEquals("value2", table.properties().get("key2"));
} finally {
cachingCatalog.dropTable(tableIdent);
}
}
use of org.apache.iceberg.Table in project hive by apache.
the class HiveIcebergOutputFormat method writer.
private static HiveIcebergRecordWriter writer(JobConf jc) {
TaskAttemptID taskAttemptID = TezUtil.taskAttemptWrapper(jc);
// It gets the config from the FileSinkOperator which has its own config for every target table
Table table = HiveIcebergStorageHandler.table(jc, jc.get(hive_metastoreConstants.META_TABLE_NAME));
Schema schema = HiveIcebergStorageHandler.schema(jc);
PartitionSpec spec = table.spec();
FileFormat fileFormat = FileFormat.valueOf(PropertyUtil.propertyAsString(table.properties(), TableProperties.DEFAULT_FILE_FORMAT, TableProperties.DEFAULT_FILE_FORMAT_DEFAULT).toUpperCase(Locale.ENGLISH));
long targetFileSize = PropertyUtil.propertyAsLong(table.properties(), TableProperties.WRITE_TARGET_FILE_SIZE_BYTES, TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT);
FileIO io = table.io();
int partitionId = taskAttemptID.getTaskID().getId();
int taskId = taskAttemptID.getId();
String operationId = jc.get(HiveConf.ConfVars.HIVEQUERYID.varname) + "-" + taskAttemptID.getJobID();
OutputFileFactory outputFileFactory = OutputFileFactory.builderFor(table, partitionId, taskId).format(fileFormat).operationId(operationId).build();
String tableName = jc.get(Catalogs.NAME);
HiveFileWriterFactory hfwf = new HiveFileWriterFactory(table, fileFormat, schema, null, fileFormat, null, null, null, null);
return new HiveIcebergRecordWriter(schema, spec, fileFormat, hfwf, outputFileFactory, io, targetFileSize, taskAttemptID, tableName);
}
use of org.apache.iceberg.Table in project hive by apache.
the class HiveIcebergSerDe method createTableForCTAS.
private void createTableForCTAS(Configuration configuration, Properties serDeProperties) {
serDeProperties.setProperty(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(tableSchema));
// build partition spec, if any
if (!getPartitionColumnNames().isEmpty()) {
List<FieldSchema> partitionFields = IntStream.range(0, getPartitionColumnNames().size()).mapToObj(i -> new FieldSchema(getPartitionColumnNames().get(i), getPartitionColumnTypes().get(i).getTypeName(), null)).collect(Collectors.toList());
PartitionSpec spec = HiveSchemaUtil.spec(tableSchema, partitionFields);
serDeProperties.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(spec));
}
// clean up the properties for table creation (so that internal serde props don't become table props)
Properties createProps = getCTASTableCreationProperties(serDeProperties);
// create CTAS table
LOG.info("Creating table {} for CTAS with schema: {}, and spec: {}", serDeProperties.get(Catalogs.NAME), tableSchema, serDeProperties.get(InputFormatConfig.PARTITION_SPEC));
Catalogs.createTable(configuration, createProps);
// set this in the query state so that we can rollback the table in the lifecycle hook in case of failures
SessionStateUtil.addResource(configuration, InputFormatConfig.CTAS_TABLE_NAME, serDeProperties.getProperty(Catalogs.NAME));
}
Aggregations