Search in sources :

Example 91 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveCatalog method testCreateTableBuilder.

@Test
public void testCreateTableBuilder() throws Exception {
    Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
    PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
    TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
    String location = temp.newFolder("tbl").toString();
    try {
        Table table = catalog.buildTable(tableIdent, schema).withPartitionSpec(spec).withLocation(location).withProperty("key1", "value1").withProperty("key2", "value2").create();
        Assert.assertEquals(location, table.location());
        Assert.assertEquals(2, table.schema().columns().size());
        Assert.assertEquals(1, table.spec().fields().size());
        Assert.assertEquals("value1", table.properties().get("key1"));
        Assert.assertEquals("value2", table.properties().get("key2"));
    } finally {
        catalog.dropTable(tableIdent);
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 92 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveCatalog method testReplaceTxnBuilder.

@Test
public void testReplaceTxnBuilder() throws Exception {
    Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
    PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
    TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
    String location = temp.newFolder("tbl").toString();
    try {
        Transaction createTxn = catalog.buildTable(tableIdent, schema).withPartitionSpec(spec).withLocation(location).withProperty("key1", "value1").createOrReplaceTransaction();
        createTxn.commitTransaction();
        Table table = catalog.loadTable(tableIdent);
        Assert.assertEquals(1, table.spec().fields().size());
        String newLocation = temp.newFolder("tbl-2").toString();
        Transaction replaceTxn = catalog.buildTable(tableIdent, schema).withProperty("key2", "value2").withLocation(newLocation).replaceTransaction();
        replaceTxn.commitTransaction();
        table = catalog.loadTable(tableIdent);
        Assert.assertEquals(newLocation, table.location());
        Assert.assertNull(table.currentSnapshot());
        PartitionSpec v1Expected = PartitionSpec.builderFor(table.schema()).alwaysNull("data", "data_bucket").withSpecId(1).build();
        Assert.assertEquals("Table should have a spec with one void field", v1Expected, table.spec());
        Assert.assertEquals("value1", table.properties().get("key1"));
        Assert.assertEquals("value2", table.properties().get("key2"));
    } finally {
        catalog.dropTable(tableIdent);
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Transaction(org.apache.iceberg.Transaction) Schema(org.apache.iceberg.Schema) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 93 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveCatalog method testCreateTableWithCaching.

@Test
public void testCreateTableWithCaching() throws Exception {
    Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
    PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
    TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
    String location = temp.newFolder("tbl").toString();
    ImmutableMap<String, String> properties = ImmutableMap.of("key1", "value1", "key2", "value2");
    Catalog cachingCatalog = CachingCatalog.wrap(catalog);
    try {
        Table table = cachingCatalog.createTable(tableIdent, schema, spec, location, properties);
        Assert.assertEquals(location, table.location());
        Assert.assertEquals(2, table.schema().columns().size());
        Assert.assertEquals(1, table.spec().fields().size());
        Assert.assertEquals("value1", table.properties().get("key1"));
        Assert.assertEquals("value2", table.properties().get("key2"));
    } finally {
        cachingCatalog.dropTable(tableIdent);
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) PartitionSpec(org.apache.iceberg.PartitionSpec) Catalog(org.apache.iceberg.catalog.Catalog) CachingCatalog(org.apache.iceberg.CachingCatalog) Test(org.junit.Test)

Example 94 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class HiveIcebergOutputFormat method writer.

private static HiveIcebergRecordWriter writer(JobConf jc) {
    TaskAttemptID taskAttemptID = TezUtil.taskAttemptWrapper(jc);
    // It gets the config from the FileSinkOperator which has its own config for every target table
    Table table = HiveIcebergStorageHandler.table(jc, jc.get(hive_metastoreConstants.META_TABLE_NAME));
    Schema schema = HiveIcebergStorageHandler.schema(jc);
    PartitionSpec spec = table.spec();
    FileFormat fileFormat = FileFormat.valueOf(PropertyUtil.propertyAsString(table.properties(), TableProperties.DEFAULT_FILE_FORMAT, TableProperties.DEFAULT_FILE_FORMAT_DEFAULT).toUpperCase(Locale.ENGLISH));
    long targetFileSize = PropertyUtil.propertyAsLong(table.properties(), TableProperties.WRITE_TARGET_FILE_SIZE_BYTES, TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT);
    FileIO io = table.io();
    int partitionId = taskAttemptID.getTaskID().getId();
    int taskId = taskAttemptID.getId();
    String operationId = jc.get(HiveConf.ConfVars.HIVEQUERYID.varname) + "-" + taskAttemptID.getJobID();
    OutputFileFactory outputFileFactory = OutputFileFactory.builderFor(table, partitionId, taskId).format(fileFormat).operationId(operationId).build();
    String tableName = jc.get(Catalogs.NAME);
    HiveFileWriterFactory hfwf = new HiveFileWriterFactory(table, fileFormat, schema, null, fileFormat, null, null, null, null);
    return new HiveIcebergRecordWriter(schema, spec, fileFormat, hfwf, outputFileFactory, io, targetFileSize, taskAttemptID, tableName);
}
Also used : OutputFileFactory(org.apache.iceberg.io.OutputFileFactory) Table(org.apache.iceberg.Table) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) Schema(org.apache.iceberg.Schema) FileFormat(org.apache.iceberg.FileFormat) PartitionSpec(org.apache.iceberg.PartitionSpec) FileIO(org.apache.iceberg.io.FileIO)

Example 95 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class HiveIcebergSerDe method createTableForCTAS.

private void createTableForCTAS(Configuration configuration, Properties serDeProperties) {
    serDeProperties.setProperty(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(tableSchema));
    // build partition spec, if any
    if (!getPartitionColumnNames().isEmpty()) {
        List<FieldSchema> partitionFields = IntStream.range(0, getPartitionColumnNames().size()).mapToObj(i -> new FieldSchema(getPartitionColumnNames().get(i), getPartitionColumnTypes().get(i).getTypeName(), null)).collect(Collectors.toList());
        PartitionSpec spec = HiveSchemaUtil.spec(tableSchema, partitionFields);
        serDeProperties.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(spec));
    }
    // clean up the properties for table creation (so that internal serde props don't become table props)
    Properties createProps = getCTASTableCreationProperties(serDeProperties);
    // create CTAS table
    LOG.info("Creating table {} for CTAS with schema: {}, and spec: {}", serDeProperties.get(Catalogs.NAME), tableSchema, serDeProperties.get(InputFormatConfig.PARTITION_SPEC));
    Catalogs.createTable(configuration, createProps);
    // set this in the query state so that we can rollback the table in the lifecycle hook in case of failures
    SessionStateUtil.addResource(configuration, InputFormatConfig.CTAS_TABLE_NAME, serDeProperties.getProperty(Catalogs.NAME));
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) HiveSchemaUtil(org.apache.iceberg.hive.HiveSchemaUtil) Catalogs(org.apache.iceberg.mr.Catalogs) ColumnProjectionUtils(org.apache.hadoop.hive.serde2.ColumnProjectionUtils) LoggerFactory(org.slf4j.LoggerFactory) Writable(org.apache.hadoop.io.Writable) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) PartitionField(org.apache.iceberg.PartitionField) Lists(org.apache.iceberg.relocated.com.google.common.collect.Lists) IcebergObjectInspector(org.apache.iceberg.mr.hive.serde.objectinspector.IcebergObjectInspector) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Container(org.apache.iceberg.mr.mapred.Container) Nullable(javax.annotation.Nullable) Properties(java.util.Properties) Logger(org.slf4j.Logger) Table(org.apache.iceberg.Table) Collection(java.util.Collection) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) InputFormatConfig(org.apache.iceberg.mr.InputFormatConfig) Maps(org.apache.iceberg.relocated.com.google.common.collect.Maps) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Schema(org.apache.iceberg.Schema) Collectors(java.util.stream.Collectors) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) SchemaParser(org.apache.iceberg.SchemaParser) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) List(java.util.List) Record(org.apache.iceberg.data.Record) SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats) PartitionSpec(org.apache.iceberg.PartitionSpec) TableProperties(org.apache.iceberg.TableProperties) SessionStateUtil(org.apache.hadoop.hive.ql.session.SessionStateUtil) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Properties(java.util.Properties) TableProperties(org.apache.iceberg.TableProperties) PartitionSpec(org.apache.iceberg.PartitionSpec)

Aggregations

Table (org.apache.iceberg.Table)188 Test (org.junit.Test)132 Schema (org.apache.iceberg.Schema)66 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)56 Record (org.apache.iceberg.data.Record)56 PartitionSpec (org.apache.iceberg.PartitionSpec)51 IOException (java.io.IOException)27 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)27 List (java.util.List)22 Map (java.util.Map)20 DataFile (org.apache.iceberg.DataFile)19 NoSuchTableException (org.apache.iceberg.exceptions.NoSuchTableException)19 Collectors (java.util.stream.Collectors)18 BaseTable (org.apache.iceberg.BaseTable)18 Types (org.apache.iceberg.types.Types)18 Properties (java.util.Properties)17 Configuration (org.apache.hadoop.conf.Configuration)17 Path (org.apache.hadoop.fs.Path)17 FileFormat (org.apache.iceberg.FileFormat)16 ArrayList (java.util.ArrayList)15