Search in sources :

Example 6 with Schema

use of org.apache.iceberg.Schema in project presto by prestodb.

the class IcebergPageSinkProvider method createPageSink.

private ConnectorPageSink createPageSink(ConnectorSession session, IcebergWritableTableHandle tableHandle) {
    HdfsContext hdfsContext = new HdfsContext(session, tableHandle.getSchemaName(), tableHandle.getTableName());
    Schema schema = SchemaParser.fromJson(tableHandle.getSchemaAsJson());
    PartitionSpec partitionSpec = PartitionSpecParser.fromJson(schema, tableHandle.getPartitionSpecAsJson());
    LocationProvider locationProvider = getLocationProvider(new SchemaTableName(tableHandle.getSchemaName(), tableHandle.getTableName()), tableHandle.getOutputPath(), tableHandle.getStorageProperties());
    return new IcebergPageSink(schema, partitionSpec, locationProvider, fileWriterFactory, pageIndexerFactory, hdfsEnvironment, hdfsContext, tableHandle.getInputColumns(), jsonCodec, session, tableHandle.getFileFormat());
}
Also used : IcebergUtil.getLocationProvider(com.facebook.presto.iceberg.IcebergUtil.getLocationProvider) LocationProvider(org.apache.iceberg.io.LocationProvider) Schema(org.apache.iceberg.Schema) HdfsContext(com.facebook.presto.hive.HdfsContext) PartitionSpec(org.apache.iceberg.PartitionSpec) SchemaTableName(com.facebook.presto.spi.SchemaTableName)

Example 7 with Schema

use of org.apache.iceberg.Schema in project presto by prestodb.

the class IcebergHadoopMetadata method beginCreateTable.

@Override
public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorNewTableLayout> layout) {
    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();
    Schema schema = toIcebergSchema(tableMetadata.getColumns());
    PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(tableMetadata.getProperties()));
    ImmutableMap.Builder<String, String> propertiesBuilder = ImmutableMap.builder();
    FileFormat fileFormat = getFileFormat(tableMetadata.getProperties());
    propertiesBuilder.put(DEFAULT_FILE_FORMAT, fileFormat.toString());
    if (tableMetadata.getComment().isPresent()) {
        propertiesBuilder.put(TABLE_COMMENT, tableMetadata.getComment().get());
    }
    String formatVersion = getFormatVersion(tableMetadata.getProperties());
    if (formatVersion != null) {
        propertiesBuilder.put(FORMAT_VERSION, formatVersion);
    }
    try {
        transaction = resourceFactory.getCatalog(session).newCreateTableTransaction(toIcebergTableIdentifier(schemaTableName), schema, partitionSpec, propertiesBuilder.build());
    } catch (AlreadyExistsException e) {
        throw new TableAlreadyExistsException(schemaTableName);
    }
    Table icebergTable = transaction.table();
    return new IcebergWritableTableHandle(schemaName, tableName, SchemaParser.toJson(icebergTable.schema()), PartitionSpecParser.toJson(icebergTable.spec()), getColumns(icebergTable.schema(), typeManager), icebergTable.location(), fileFormat, icebergTable.properties());
}
Also used : TableAlreadyExistsException(com.facebook.presto.hive.TableAlreadyExistsException) SystemTable(com.facebook.presto.spi.SystemTable) IcebergUtil.getHadoopIcebergTable(com.facebook.presto.iceberg.IcebergUtil.getHadoopIcebergTable) Table(org.apache.iceberg.Table) AlreadyExistsException(org.apache.iceberg.exceptions.AlreadyExistsException) TableAlreadyExistsException(com.facebook.presto.hive.TableAlreadyExistsException) Schema(org.apache.iceberg.Schema) FileFormat(org.apache.iceberg.FileFormat) IcebergTableProperties.getFileFormat(com.facebook.presto.iceberg.IcebergTableProperties.getFileFormat) SchemaTableName(com.facebook.presto.spi.SchemaTableName) PartitionSpec(org.apache.iceberg.PartitionSpec) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap)

Example 8 with Schema

use of org.apache.iceberg.Schema in project presto by prestodb.

the class IcebergFileWriterFactory method createOrcWriter.

private IcebergFileWriter createOrcWriter(Path outputPath, Schema icebergSchema, JobConf jobConf, ConnectorSession session) {
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), outputPath, jobConf);
        DataSink orcDataSink = hdfsEnvironment.doAs(session.getUser(), () -> new OutputStreamDataSink(fileSystem.create(outputPath)));
        Callable<Void> rollbackAction = () -> {
            hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.delete(outputPath, false));
            return null;
        };
        List<Types.NestedField> columnFields = icebergSchema.columns();
        List<String> fileColumnNames = columnFields.stream().map(Types.NestedField::name).collect(toImmutableList());
        List<Type> fileColumnTypes = columnFields.stream().map(Types.NestedField::type).map(type -> toPrestoType(type, typeManager)).collect(toImmutableList());
        Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
        if (isOrcOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsOrcDataSource(new OrcDataSourceId(outputPath.toString()), hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.getFileStatus(outputPath).getLen()), getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), false, hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.open(outputPath)), readStats);
                } catch (IOException e) {
                    throw new PrestoException(ICEBERG_WRITE_VALIDATION_FAILED, e);
                }
            });
        }
        return new IcebergOrcFileWriter(icebergSchema, orcDataSink, rollbackAction, ORC, fileColumnNames, fileColumnTypes, toOrcType(icebergSchema), getCompressionCodec(session).getOrcCompressionKind(), orcFileWriterConfig.toOrcWriterOptionsBuilder().withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMinSize(HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(session)).build()).withDictionaryMaxMemory(HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(HiveSessionProperties.getOrcStringStatisticsLimit(session)).build(), IntStream.range(0, fileColumnNames.size()).toArray(), ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), UTC, validationInputFactory, getOrcOptimizedWriterValidateMode(session), orcWriterStats, dwrfEncryptionProvider, Optional.empty());
    } catch (IOException e) {
        throw new PrestoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating ORC file", e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) Types(org.apache.iceberg.types.Types) FileSystem(org.apache.hadoop.fs.FileSystem) DataSink(com.facebook.presto.common.io.DataSink) IcebergSessionProperties.getOrcMaxBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) NodeVersion(com.facebook.presto.hive.NodeVersion) PRESTO_VERSION_NAME(com.facebook.presto.hive.HiveMetadata.PRESTO_VERSION_NAME) Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getParquetWriterBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetWriterBlockSize) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) HdfsContext(com.facebook.presto.hive.HdfsContext) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) ParquetSchemaUtil.convert(org.apache.iceberg.parquet.ParquetSchemaUtil.convert) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveDwrfEncryptionProvider(com.facebook.presto.hive.HiveDwrfEncryptionProvider) Schema(org.apache.iceberg.Schema) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) List(java.util.List) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) ICEBERG_WRITER_OPEN_ERROR(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_WRITER_OPEN_ERROR) IcebergSessionProperties.isOrcOptimizedWriterValidate(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcOptimizedWriterValidate) Optional(java.util.Optional) OutputStreamDataSink(com.facebook.presto.common.io.OutputStreamDataSink) HiveSessionProperties(com.facebook.presto.hive.HiveSessionProperties) IntStream(java.util.stream.IntStream) HiveSessionProperties.getParquetWriterPageSize(com.facebook.presto.hive.HiveSessionProperties.getParquetWriterPageSize) Callable(java.util.concurrent.Callable) PrestoException(com.facebook.presto.spi.PrestoException) Supplier(java.util.function.Supplier) Inject(javax.inject.Inject) IcebergSessionProperties.getCompressionCodec(com.facebook.presto.iceberg.IcebergSessionProperties.getCompressionCodec) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) PrimitiveTypeMapBuilder.makeTypeMap(com.facebook.presto.iceberg.util.PrimitiveTypeMapBuilder.makeTypeMap) TypeConverter.toOrcType(com.facebook.presto.iceberg.TypeConverter.toOrcType) OrcWriterStats(com.facebook.presto.orc.OrcWriterStats) Type(com.facebook.presto.common.type.Type) IcebergSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) DefaultOrcWriterFlushPolicy(com.facebook.presto.orc.DefaultOrcWriterFlushPolicy) OrcFileWriterConfig(com.facebook.presto.hive.OrcFileWriterConfig) ParquetWriterOptions(com.facebook.presto.parquet.writer.ParquetWriterOptions) IOException(java.io.IOException) UTC(org.joda.time.DateTimeZone.UTC) FileFormat(org.apache.iceberg.FileFormat) ICEBERG_WRITE_VALIDATION_FAILED(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_WRITE_VALIDATION_FAILED) JobConf(org.apache.hadoop.mapred.JobConf) IcebergSessionProperties.getOrcOptimizedWriterValidateMode(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcOptimizedWriterValidateMode) PRESTO_QUERY_ID_NAME(com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_QUERY_ID_NAME) IcebergSessionProperties.getOrcStreamBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) DataSink(com.facebook.presto.common.io.DataSink) OutputStreamDataSink(com.facebook.presto.common.io.OutputStreamDataSink) Types(org.apache.iceberg.types.Types) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) TypeConverter.toOrcType(com.facebook.presto.iceberg.TypeConverter.toOrcType) Type(com.facebook.presto.common.type.Type) FileSystem(org.apache.hadoop.fs.FileSystem) Supplier(java.util.function.Supplier) OutputStreamDataSink(com.facebook.presto.common.io.OutputStreamDataSink)

Example 9 with Schema

use of org.apache.iceberg.Schema in project presto by prestodb.

the class TestPartitionFields method partitionSpec.

private static PartitionSpec partitionSpec(Consumer<PartitionSpec.Builder> consumer) {
    Schema schema = new Schema(NestedField.required(1, "order_key", LongType.get()), NestedField.required(2, "ts", TimestampType.withoutZone()), NestedField.required(3, "price", DoubleType.get()), NestedField.optional(4, "comment", StringType.get()), NestedField.optional(5, "notes", ListType.ofRequired(6, StringType.get())));
    PartitionSpec.Builder builder = PartitionSpec.builderFor(schema);
    consumer.accept(builder);
    return builder.build();
}
Also used : Schema(org.apache.iceberg.Schema) PartitionSpec(org.apache.iceberg.PartitionSpec)

Example 10 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveCatalog method testDropNamespace.

@Test
public void testDropNamespace() throws TException {
    Namespace namespace = Namespace.of("dbname_drop");
    TableIdentifier identifier = TableIdentifier.of(namespace, "table");
    Schema schema = new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields());
    catalog.createNamespace(namespace, meta);
    catalog.createTable(identifier, schema);
    Map<String, String> nameMata = catalog.loadNamespaceMetadata(namespace);
    Assert.assertTrue(nameMata.get("owner").equals("apache"));
    Assert.assertTrue(nameMata.get("group").equals("iceberg"));
    AssertHelpers.assertThrows("Should fail to drop namespace is not empty" + namespace, NamespaceNotEmptyException.class, "Namespace dbname_drop is not empty. One or more tables exist.", () -> {
        catalog.dropNamespace(namespace);
    });
    Assert.assertTrue(catalog.dropTable(identifier, true));
    Assert.assertTrue("Should fail to drop namespace if it is not empty", catalog.dropNamespace(namespace));
    Assert.assertFalse("Should fail to drop when namespace doesn't exist", catalog.dropNamespace(Namespace.of("db.ns1")));
    AssertHelpers.assertThrows("Should fail to drop namespace exist" + namespace, NoSuchNamespaceException.class, "Namespace does not exist: ", () -> {
        catalog.loadNamespaceMetadata(namespace);
    });
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Schema(org.apache.iceberg.Schema) Namespace(org.apache.iceberg.catalog.Namespace) Test(org.junit.Test)

Aggregations

Schema (org.apache.iceberg.Schema)126 Test (org.junit.Test)93 Record (org.apache.iceberg.data.Record)68 Table (org.apache.iceberg.Table)55 PartitionSpec (org.apache.iceberg.PartitionSpec)39 GenericRecord (org.apache.iceberg.data.GenericRecord)36 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)30 List (java.util.List)21 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)20 IOException (java.io.IOException)16 Types (org.apache.iceberg.types.Types)16 ArrayList (java.util.ArrayList)15 Map (java.util.Map)14 HashMap (java.util.HashMap)13 FileFormat (org.apache.iceberg.FileFormat)13 UpdateSchema (org.apache.iceberg.UpdateSchema)12 Path (org.apache.hadoop.fs.Path)11 Collectors (java.util.stream.Collectors)10 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)10 TestHelper (org.apache.iceberg.mr.TestHelper)9