use of org.apache.iceberg.Schema in project presto by prestodb.
the class IcebergPageSinkProvider method createPageSink.
private ConnectorPageSink createPageSink(ConnectorSession session, IcebergWritableTableHandle tableHandle) {
HdfsContext hdfsContext = new HdfsContext(session, tableHandle.getSchemaName(), tableHandle.getTableName());
Schema schema = SchemaParser.fromJson(tableHandle.getSchemaAsJson());
PartitionSpec partitionSpec = PartitionSpecParser.fromJson(schema, tableHandle.getPartitionSpecAsJson());
LocationProvider locationProvider = getLocationProvider(new SchemaTableName(tableHandle.getSchemaName(), tableHandle.getTableName()), tableHandle.getOutputPath(), tableHandle.getStorageProperties());
return new IcebergPageSink(schema, partitionSpec, locationProvider, fileWriterFactory, pageIndexerFactory, hdfsEnvironment, hdfsContext, tableHandle.getInputColumns(), jsonCodec, session, tableHandle.getFileFormat());
}
use of org.apache.iceberg.Schema in project presto by prestodb.
the class IcebergHadoopMetadata method beginCreateTable.
@Override
public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorNewTableLayout> layout) {
SchemaTableName schemaTableName = tableMetadata.getTable();
String schemaName = schemaTableName.getSchemaName();
String tableName = schemaTableName.getTableName();
Schema schema = toIcebergSchema(tableMetadata.getColumns());
PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(tableMetadata.getProperties()));
ImmutableMap.Builder<String, String> propertiesBuilder = ImmutableMap.builder();
FileFormat fileFormat = getFileFormat(tableMetadata.getProperties());
propertiesBuilder.put(DEFAULT_FILE_FORMAT, fileFormat.toString());
if (tableMetadata.getComment().isPresent()) {
propertiesBuilder.put(TABLE_COMMENT, tableMetadata.getComment().get());
}
String formatVersion = getFormatVersion(tableMetadata.getProperties());
if (formatVersion != null) {
propertiesBuilder.put(FORMAT_VERSION, formatVersion);
}
try {
transaction = resourceFactory.getCatalog(session).newCreateTableTransaction(toIcebergTableIdentifier(schemaTableName), schema, partitionSpec, propertiesBuilder.build());
} catch (AlreadyExistsException e) {
throw new TableAlreadyExistsException(schemaTableName);
}
Table icebergTable = transaction.table();
return new IcebergWritableTableHandle(schemaName, tableName, SchemaParser.toJson(icebergTable.schema()), PartitionSpecParser.toJson(icebergTable.spec()), getColumns(icebergTable.schema(), typeManager), icebergTable.location(), fileFormat, icebergTable.properties());
}
use of org.apache.iceberg.Schema in project presto by prestodb.
the class IcebergFileWriterFactory method createOrcWriter.
private IcebergFileWriter createOrcWriter(Path outputPath, Schema icebergSchema, JobConf jobConf, ConnectorSession session) {
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), outputPath, jobConf);
DataSink orcDataSink = hdfsEnvironment.doAs(session.getUser(), () -> new OutputStreamDataSink(fileSystem.create(outputPath)));
Callable<Void> rollbackAction = () -> {
hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.delete(outputPath, false));
return null;
};
List<Types.NestedField> columnFields = icebergSchema.columns();
List<String> fileColumnNames = columnFields.stream().map(Types.NestedField::name).collect(toImmutableList());
List<Type> fileColumnTypes = columnFields.stream().map(Types.NestedField::type).map(type -> toPrestoType(type, typeManager)).collect(toImmutableList());
Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
if (isOrcOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsOrcDataSource(new OrcDataSourceId(outputPath.toString()), hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.getFileStatus(outputPath).getLen()), getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), false, hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.open(outputPath)), readStats);
} catch (IOException e) {
throw new PrestoException(ICEBERG_WRITE_VALIDATION_FAILED, e);
}
});
}
return new IcebergOrcFileWriter(icebergSchema, orcDataSink, rollbackAction, ORC, fileColumnNames, fileColumnTypes, toOrcType(icebergSchema), getCompressionCodec(session).getOrcCompressionKind(), orcFileWriterConfig.toOrcWriterOptionsBuilder().withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMinSize(HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(session)).build()).withDictionaryMaxMemory(HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(HiveSessionProperties.getOrcStringStatisticsLimit(session)).build(), IntStream.range(0, fileColumnNames.size()).toArray(), ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), UTC, validationInputFactory, getOrcOptimizedWriterValidateMode(session), orcWriterStats, dwrfEncryptionProvider, Optional.empty());
} catch (IOException e) {
throw new PrestoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating ORC file", e);
}
}
use of org.apache.iceberg.Schema in project presto by prestodb.
the class TestPartitionFields method partitionSpec.
private static PartitionSpec partitionSpec(Consumer<PartitionSpec.Builder> consumer) {
Schema schema = new Schema(NestedField.required(1, "order_key", LongType.get()), NestedField.required(2, "ts", TimestampType.withoutZone()), NestedField.required(3, "price", DoubleType.get()), NestedField.optional(4, "comment", StringType.get()), NestedField.optional(5, "notes", ListType.ofRequired(6, StringType.get())));
PartitionSpec.Builder builder = PartitionSpec.builderFor(schema);
consumer.accept(builder);
return builder.build();
}
use of org.apache.iceberg.Schema in project hive by apache.
the class TestHiveCatalog method testDropNamespace.
@Test
public void testDropNamespace() throws TException {
Namespace namespace = Namespace.of("dbname_drop");
TableIdentifier identifier = TableIdentifier.of(namespace, "table");
Schema schema = new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields());
catalog.createNamespace(namespace, meta);
catalog.createTable(identifier, schema);
Map<String, String> nameMata = catalog.loadNamespaceMetadata(namespace);
Assert.assertTrue(nameMata.get("owner").equals("apache"));
Assert.assertTrue(nameMata.get("group").equals("iceberg"));
AssertHelpers.assertThrows("Should fail to drop namespace is not empty" + namespace, NamespaceNotEmptyException.class, "Namespace dbname_drop is not empty. One or more tables exist.", () -> {
catalog.dropNamespace(namespace);
});
Assert.assertTrue(catalog.dropTable(identifier, true));
Assert.assertTrue("Should fail to drop namespace if it is not empty", catalog.dropNamespace(namespace));
Assert.assertFalse("Should fail to drop when namespace doesn't exist", catalog.dropNamespace(Namespace.of("db.ns1")));
AssertHelpers.assertThrows("Should fail to drop namespace exist" + namespace, NoSuchNamespaceException.class, "Namespace does not exist: ", () -> {
catalog.loadNamespaceMetadata(namespace);
});
}
Aggregations