Search in sources :

Example 1 with StructLike

use of org.apache.iceberg.StructLike in project presto by prestodb.

the class PartitionTable method getPartitions.

private Map<StructLikeWrapper, Partition> getPartitions(TableScan tableScan) {
    try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
        Map<StructLikeWrapper, Partition> partitions = new HashMap<>();
        for (FileScanTask fileScanTask : fileScanTasks) {
            DataFile dataFile = fileScanTask.file();
            Types.StructType structType = fileScanTask.spec().partitionType();
            StructLike partitionStruct = dataFile.partition();
            StructLikeWrapper partitionWrapper = StructLikeWrapper.forType(structType).set(partitionStruct);
            if (!partitions.containsKey(partitionWrapper)) {
                Partition partition = new Partition(idToTypeMapping, nonPartitionPrimitiveColumns, partitionStruct, dataFile.recordCount(), dataFile.fileSizeInBytes(), toMap(dataFile.lowerBounds()), toMap(dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.columnSizes());
                partitions.put(partitionWrapper, partition);
                continue;
            }
            Partition partition = partitions.get(partitionWrapper);
            partition.incrementFileCount();
            partition.incrementRecordCount(dataFile.recordCount());
            partition.incrementSize(dataFile.fileSizeInBytes());
            partition.updateMin(toMap(dataFile.lowerBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
            partition.updateMax(toMap(dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
            partition.updateNullCount(dataFile.nullValueCounts());
        }
        return partitions;
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : DataFile(org.apache.iceberg.DataFile) Types(org.apache.iceberg.types.Types) HashMap(java.util.HashMap) StructLikeWrapper(org.apache.iceberg.util.StructLikeWrapper) UncheckedIOException(java.io.UncheckedIOException) StructLike(org.apache.iceberg.StructLike) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) FileScanTask(org.apache.iceberg.FileScanTask)

Example 2 with StructLike

use of org.apache.iceberg.StructLike in project hive by apache.

the class TestHiveIcebergStorageHandlerLocalScan method runCreateAndReadTest.

private void runCreateAndReadTest(TableIdentifier identifier, String createSQL, Schema expectedSchema, PartitionSpec expectedSpec, Map<StructLike, List<Record>> data) throws IOException {
    shell.executeStatement(createSQL);
    org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
    Assert.assertEquals(expectedSchema.asStruct(), icebergTable.schema().asStruct());
    Assert.assertEquals(expectedSpec, icebergTable.spec());
    List<Record> expected = Lists.newArrayList();
    for (StructLike partition : data.keySet()) {
        testTables.appendIcebergTable(shell.getHiveConf(), icebergTable, fileFormat, partition, data.get(partition));
        expected.addAll(data.get(partition));
    }
    List<Object[]> descRows = shell.executeStatement("SELECT * FROM " + identifier.toString());
    List<Record> records = HiveIcebergTestUtils.valueForRow(icebergTable.schema(), descRows);
    HiveIcebergTestUtils.validateData(expected, records, 0);
}
Also used : GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) StructLike(org.apache.iceberg.StructLike)

Example 3 with StructLike

use of org.apache.iceberg.StructLike in project hive by apache.

the class TestHiveIcebergStorageHandlerLocalScan method testCreateTableWithColumnSpecificationPartitioned.

@Test
public void testCreateTableWithColumnSpecificationPartitioned() throws IOException {
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name").build();
    Map<StructLike, List<Record>> data = ImmutableMap.of(Row.of("Brown"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(0)), Row.of("Green"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(1)), Row.of("Pink"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(2)));
    String createSql = "CREATE EXTERNAL TABLE " + identifier + " (customer_id BIGINT, first_name STRING COMMENT 'This is first name') " + "PARTITIONED BY (last_name STRING COMMENT 'This is last name') STORED BY " + "ICEBERG " + testTables.locationForCreateTableSQL(identifier) + testTables.propertiesForCreateTableSQL(ImmutableMap.of());
    runCreateAndReadTest(identifier, createSql, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, data);
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) ArrayList(java.util.ArrayList) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) List(java.util.List) StructLike(org.apache.iceberg.StructLike) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 4 with StructLike

use of org.apache.iceberg.StructLike in project hive by apache.

the class IcebergInternalRecordWrapper method converter.

private static Function<Object, Object> converter(Type type) {
    switch(type.typeId()) {
        case TIMESTAMP:
            return timestamp -> DateTimeUtil.timestamptzFromMicros((Long) timestamp);
        case STRUCT:
            IcebergInternalRecordWrapper wrapper = new IcebergInternalRecordWrapper(type.asStructType(), type.asStructType());
            return struct -> wrapper.wrap((StructLike) struct);
        case LIST:
            if (Type.TypeID.STRUCT.equals(type.asListType().elementType().typeId())) {
                StructType listElementSchema = type.asListType().elementType().asStructType();
                Function<Type, IcebergInternalRecordWrapper> createWrapper = t -> new IcebergInternalRecordWrapper(listElementSchema, listElementSchema);
                return list -> {
                    return ((List<?>) list).stream().map(item -> createWrapper.apply(type).wrap((StructLike) item)).collect(Collectors.toList());
                };
            }
            break;
        default:
    }
    return null;
}
Also used : Arrays(java.util.Arrays) Array(java.lang.reflect.Array) Types(org.apache.iceberg.types.Types) HashMap(java.util.HashMap) StructLike(org.apache.iceberg.StructLike) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) Type(org.apache.iceberg.types.Type) DateTimeUtil(org.apache.iceberg.util.DateTimeUtil) List(java.util.List) Record(org.apache.iceberg.data.Record) Map(java.util.Map) StructType(org.apache.iceberg.types.Types.StructType) Type(org.apache.iceberg.types.Type) StructType(org.apache.iceberg.types.Types.StructType) StructType(org.apache.iceberg.types.Types.StructType) StructLike(org.apache.iceberg.StructLike)

Example 5 with StructLike

use of org.apache.iceberg.StructLike in project incubator-gobblin by apache.

the class IcebergMetadataWriter method getIcebergPartitionVal.

/**
 * Obtain Iceberg partition value with a collection of {@link HiveSpec}.
 * @param specs A collection of {@link HiveSpec}s.
 * @param filePath URI of file, used for logging purpose in this method.
 * @param partitionSpec The scheme of partition.
 * @return The value of partition based on the given {@link PartitionSpec}.
 * @throws IOException
 */
private StructLike getIcebergPartitionVal(Collection<HiveSpec> specs, String filePath, PartitionSpec partitionSpec) throws IOException {
    if (specs == null || specs.isEmpty()) {
        throw new IOException("Cannot get hive spec for " + filePath);
    }
    HivePartition hivePartition = specs.iterator().next().getPartition().orNull();
    StructLike partitionVal = hivePartition == null ? null : IcebergUtils.getPartition(partitionSpec.partitionType(), hivePartition.getValues());
    return partitionVal;
}
Also used : IOException(java.io.IOException) StructLike(org.apache.iceberg.StructLike) HivePartition(org.apache.gobblin.hive.HivePartition)

Aggregations

StructLike (org.apache.iceberg.StructLike)11 List (java.util.List)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 PartitionSpec (org.apache.iceberg.PartitionSpec)4 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)4 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)4 Test (org.junit.Test)4 IOException (java.io.IOException)3 DataFile (org.apache.iceberg.DataFile)3 HashSet (java.util.HashSet)2 Path (org.apache.hadoop.fs.Path)2 Record (org.apache.iceberg.data.Record)2 Type (org.apache.iceberg.types.Type)2 Types (org.apache.iceberg.types.Types)2 UncheckedIOException (java.io.UncheckedIOException)1 Array (java.lang.reflect.Array)1 ByteBuffer (java.nio.ByteBuffer)1 Arrays (java.util.Arrays)1 Map (java.util.Map)1