Search in sources :

Example 11 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveCatalog method testCreateTableDefaultSortOrder.

@Test
public void testCreateTableDefaultSortOrder() {
    Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
    PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
    TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
    try {
        Table table = catalog.createTable(tableIdent, schema, spec);
        Assert.assertEquals("Order ID must match", 0, table.sortOrder().orderId());
        Assert.assertTrue("Order must unsorted", table.sortOrder().isUnsorted());
    } finally {
        catalog.dropTable(tableIdent);
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 12 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveCatalog method testCreateTableCustomSortOrder.

@Test
public void testCreateTableCustomSortOrder() {
    Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
    PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
    SortOrder order = SortOrder.builderFor(schema).asc("id", NULLS_FIRST).build();
    TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
    try {
        Table table = catalog.buildTable(tableIdent, schema).withPartitionSpec(spec).withSortOrder(order).create();
        SortOrder sortOrder = table.sortOrder();
        Assert.assertEquals("Order ID must match", 1, sortOrder.orderId());
        Assert.assertEquals("Order must have 1 field", 1, sortOrder.fields().size());
        Assert.assertEquals("Direction must match ", ASC, sortOrder.fields().get(0).direction());
        Assert.assertEquals("Null order must match ", NULLS_FIRST, sortOrder.fields().get(0).nullOrder());
        Transform<?, ?> transform = Transforms.identity(Types.IntegerType.get());
        Assert.assertEquals("Transform must match", transform, sortOrder.fields().get(0).transform());
    } finally {
        catalog.dropTable(tableIdent);
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) SortOrder(org.apache.iceberg.SortOrder) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 13 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveCatalog method testCreateTableTxnBuilder.

@Test
public void testCreateTableTxnBuilder() throws Exception {
    Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
    TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
    String location = temp.newFolder("tbl").toString();
    try {
        Transaction txn = catalog.buildTable(tableIdent, schema).withLocation(location).createTransaction();
        txn.commitTransaction();
        Table table = catalog.loadTable(tableIdent);
        Assert.assertEquals(location, table.location());
        Assert.assertEquals(2, table.schema().columns().size());
        Assert.assertTrue(table.spec().isUnpartitioned());
    } finally {
        catalog.dropTable(tableIdent);
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Transaction(org.apache.iceberg.Transaction) Schema(org.apache.iceberg.Schema) Test(org.junit.Test)

Example 14 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveSchemaUtil method testSchemaConvertToIcebergSchemaForEveryPrimitiveType.

@Test
public void testSchemaConvertToIcebergSchemaForEveryPrimitiveType() {
    Schema schemaWithEveryType = HiveSchemaUtil.convert(getSupportedFieldSchemas());
    Assert.assertEquals(getSchemaWithSupportedTypes().asStruct(), schemaWithEveryType.asStruct());
}
Also used : Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Test(org.junit.Test)

Example 15 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class Catalogs method createTable.

/**
 * Creates an Iceberg table using the catalog specified by the configuration.
 * <p>
 * The properties should contain the following values:
 * <ul>
 * <li>Table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) is required
 * <li>Table schema ({@link InputFormatConfig#TABLE_SCHEMA}) is required
 * <li>Partition specification ({@link InputFormatConfig#PARTITION_SPEC}) is optional. Table will be unpartitioned if
 *  not provided
 * </ul><p>
 * Other properties will be handled over to the Table creation. The controlling properties above will not be
 * propagated.
 * @param conf a Hadoop conf
 * @param props the controlling properties
 * @return the created Iceberg table
 */
public static Table createTable(Configuration conf, Properties props) {
    String schemaString = props.getProperty(InputFormatConfig.TABLE_SCHEMA);
    Preconditions.checkNotNull(schemaString, "Table schema not set");
    Schema schema = SchemaParser.fromJson(props.getProperty(InputFormatConfig.TABLE_SCHEMA));
    String specString = props.getProperty(InputFormatConfig.PARTITION_SPEC);
    PartitionSpec spec = PartitionSpec.unpartitioned();
    if (specString != null) {
        spec = PartitionSpecParser.fromJson(schema, specString);
    }
    String location = props.getProperty(LOCATION);
    String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME);
    // Create a table property map without the controlling properties
    Map<String, String> map = Maps.newHashMapWithExpectedSize(props.size());
    for (Object key : props.keySet()) {
        if (!PROPERTIES_TO_REMOVE.contains(key)) {
            map.put(key.toString(), props.get(key).toString());
        }
    }
    Optional<Catalog> catalog = loadCatalog(conf, catalogName);
    if (catalog.isPresent()) {
        String name = props.getProperty(NAME);
        Preconditions.checkNotNull(name, "Table identifier not set");
        return catalog.get().createTable(TableIdentifier.parse(name), schema, spec, location, map);
    }
    Preconditions.checkNotNull(location, "Table location not set");
    return new HadoopTables(conf).create(schema, spec, map, location);
}
Also used : Schema(org.apache.iceberg.Schema) HadoopTables(org.apache.iceberg.hadoop.HadoopTables) PartitionSpec(org.apache.iceberg.PartitionSpec) Catalog(org.apache.iceberg.catalog.Catalog)

Aggregations

Schema (org.apache.iceberg.Schema)126 Test (org.junit.Test)93 Record (org.apache.iceberg.data.Record)68 Table (org.apache.iceberg.Table)55 PartitionSpec (org.apache.iceberg.PartitionSpec)39 GenericRecord (org.apache.iceberg.data.GenericRecord)36 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)30 List (java.util.List)21 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)20 IOException (java.io.IOException)16 Types (org.apache.iceberg.types.Types)16 ArrayList (java.util.ArrayList)15 Map (java.util.Map)14 HashMap (java.util.HashMap)13 FileFormat (org.apache.iceberg.FileFormat)13 UpdateSchema (org.apache.iceberg.UpdateSchema)12 Path (org.apache.hadoop.fs.Path)11 Collectors (java.util.stream.Collectors)10 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)10 TestHelper (org.apache.iceberg.mr.TestHelper)9