use of io.trino.plugin.iceberg.TableType.DATA in project trino by trinodb.
the class IcebergMetadata method getTableProperties.
@Override
public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle tableHandle) {
IcebergTableHandle table = (IcebergTableHandle) tableHandle;
if (table.getSnapshotId().isEmpty()) {
// TupleDomain.none() as the predicate
return new ConnectorTableProperties(TupleDomain.none(), Optional.empty(), Optional.empty(), Optional.empty(), ImmutableList.of());
}
Table icebergTable = catalog.loadTable(session, table.getSchemaTableName());
// Extract identity partition fields that are present in all partition specs, for creating the discrete predicates.
Set<Integer> partitionSourceIds = identityPartitionColumnsInAllSpecs(icebergTable);
TupleDomain<IcebergColumnHandle> enforcedPredicate = table.getEnforcedPredicate();
DiscretePredicates discretePredicates = null;
if (!partitionSourceIds.isEmpty()) {
// Extract identity partition columns
Map<Integer, IcebergColumnHandle> columns = getColumns(icebergTable.schema(), typeManager).stream().filter(column -> partitionSourceIds.contains(column.getId())).collect(toImmutableMap(IcebergColumnHandle::getId, Function.identity()));
Supplier<List<FileScanTask>> lazyFiles = Suppliers.memoize(() -> {
TableScan tableScan = icebergTable.newScan().useSnapshot(table.getSnapshotId().get()).filter(toIcebergExpression(enforcedPredicate)).includeColumnStats();
try (CloseableIterable<FileScanTask> iterator = tableScan.planFiles()) {
return ImmutableList.copyOf(iterator);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
Iterable<FileScanTask> files = () -> lazyFiles.get().iterator();
Iterable<TupleDomain<ColumnHandle>> discreteTupleDomain = Iterables.transform(files, fileScan -> {
// Extract partition values in the data file
Map<Integer, Optional<String>> partitionColumnValueStrings = getPartitionKeys(fileScan);
Map<ColumnHandle, NullableValue> partitionValues = partitionSourceIds.stream().filter(partitionColumnValueStrings::containsKey).collect(toImmutableMap(columns::get, columnId -> {
IcebergColumnHandle column = columns.get(columnId);
Object prestoValue = deserializePartitionValue(column.getType(), partitionColumnValueStrings.get(columnId).orElse(null), column.getName());
return NullableValue.of(column.getType(), prestoValue);
}));
return TupleDomain.fromFixedValues(partitionValues);
});
discretePredicates = new DiscretePredicates(columns.values().stream().map(ColumnHandle.class::cast).collect(toImmutableList()), discreteTupleDomain);
}
return new ConnectorTableProperties(// over all tableScan.planFiles() and caching partition values in table handle.
enforcedPredicate.transformKeys(ColumnHandle.class::cast), // TODO: implement table partitioning
Optional.empty(), Optional.empty(), Optional.ofNullable(discretePredicates), ImmutableList.of());
}
use of io.trino.plugin.iceberg.TableType.DATA in project trino by trinodb.
the class IcebergMetadata method finishInsert.
@Override
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
IcebergWritableTableHandle table = (IcebergWritableTableHandle) insertHandle;
Table icebergTable = transaction.table();
List<CommitTaskData> commitTasks = fragments.stream().map(slice -> commitTaskCodec.fromJson(slice.getBytes())).collect(toImmutableList());
Type[] partitionColumnTypes = icebergTable.spec().fields().stream().map(field -> field.transform().getResultType(icebergTable.schema().findType(field.sourceId()))).toArray(Type[]::new);
AppendFiles appendFiles = transaction.newAppend();
ImmutableSet.Builder<String> writtenFiles = ImmutableSet.builder();
for (CommitTaskData task : commitTasks) {
DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()).withPath(task.getPath()).withFileSizeInBytes(task.getFileSizeInBytes()).withFormat(table.getFileFormat().toIceberg()).withMetrics(task.getMetrics().metrics());
if (!icebergTable.spec().fields().isEmpty()) {
String partitionDataJson = task.getPartitionDataJson().orElseThrow(() -> new VerifyException("No partition data for partitioned table"));
builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes));
}
appendFiles.appendFile(builder.build());
writtenFiles.add(task.getPath());
}
// try to leave as little garbage as possible behind
if (table.getRetryMode() != NO_RETRIES) {
cleanExtraOutputFiles(session, writtenFiles.build());
}
appendFiles.commit();
transaction.commitTransaction();
transaction = null;
return Optional.of(new HiveWrittenPartitions(commitTasks.stream().map(CommitTaskData::getPath).collect(toImmutableList())));
}
use of io.trino.plugin.iceberg.TableType.DATA in project trino by trinodb.
the class IcebergMetadata method getWriteLayout.
private Optional<ConnectorTableLayout> getWriteLayout(Schema tableSchema, PartitionSpec partitionSpec, boolean forceRepartitioning) {
if (partitionSpec.isUnpartitioned()) {
return Optional.empty();
}
Map<Integer, IcebergColumnHandle> columnById = getColumns(tableSchema, typeManager).stream().collect(toImmutableMap(IcebergColumnHandle::getId, identity()));
List<IcebergColumnHandle> partitioningColumns = partitionSpec.fields().stream().sorted(Comparator.comparing(PartitionField::sourceId)).map(field -> requireNonNull(columnById.get(field.sourceId()), () -> "Cannot find source column for partitioning field " + field)).distinct().collect(toImmutableList());
List<String> partitioningColumnNames = partitioningColumns.stream().map(IcebergColumnHandle::getName).collect(toImmutableList());
if (!forceRepartitioning && partitionSpec.fields().stream().allMatch(field -> field.transform().isIdentity())) {
// Do not set partitioningHandle, to let engine determine whether to repartition data or not, on stat-based basis.
return Optional.of(new ConnectorTableLayout(partitioningColumnNames));
}
IcebergPartitioningHandle partitioningHandle = new IcebergPartitioningHandle(toPartitionFields(partitionSpec), partitioningColumns);
return Optional.of(new ConnectorTableLayout(partitioningHandle, partitioningColumnNames));
}
use of io.trino.plugin.iceberg.TableType.DATA in project trino by trinodb.
the class IcebergMetadata method finishRefreshMaterializedView.
@Override
public Optional<ConnectorOutputMetadata> finishRefreshMaterializedView(ConnectorSession session, ConnectorTableHandle tableHandle, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics, List<ConnectorTableHandle> sourceTableHandles) {
// delete before insert .. simulating overwrite
executeDelete(session, tableHandle);
IcebergWritableTableHandle table = (IcebergWritableTableHandle) insertHandle;
Table icebergTable = transaction.table();
List<CommitTaskData> commitTasks = fragments.stream().map(slice -> commitTaskCodec.fromJson(slice.getBytes())).collect(toImmutableList());
Type[] partitionColumnTypes = icebergTable.spec().fields().stream().map(field -> field.transform().getResultType(icebergTable.schema().findType(field.sourceId()))).toArray(Type[]::new);
AppendFiles appendFiles = transaction.newFastAppend();
ImmutableSet.Builder<String> writtenFiles = ImmutableSet.builder();
for (CommitTaskData task : commitTasks) {
DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()).withPath(task.getPath()).withFileSizeInBytes(task.getFileSizeInBytes()).withFormat(table.getFileFormat().toIceberg()).withMetrics(task.getMetrics().metrics());
if (!icebergTable.spec().fields().isEmpty()) {
String partitionDataJson = task.getPartitionDataJson().orElseThrow(() -> new VerifyException("No partition data for partitioned table"));
builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes));
}
appendFiles.appendFile(builder.build());
writtenFiles.add(task.getPath());
}
String dependencies = sourceTableHandles.stream().map(handle -> (IcebergTableHandle) handle).filter(handle -> handle.getSnapshotId().isPresent()).map(handle -> handle.getSchemaTableName() + "=" + handle.getSnapshotId().get()).distinct().collect(joining(","));
// try to leave as little garbage as possible behind
if (table.getRetryMode() != NO_RETRIES) {
cleanExtraOutputFiles(session, writtenFiles.build());
}
// Update the 'dependsOnTables' property that tracks tables on which the materialized view depends and the corresponding snapshot ids of the tables
appendFiles.set(DEPENDS_ON_TABLES, dependencies);
appendFiles.commit();
transaction.commitTransaction();
transaction = null;
return Optional.of(new HiveWrittenPartitions(commitTasks.stream().map(CommitTaskData::getPath).collect(toImmutableList())));
}
use of io.trino.plugin.iceberg.TableType.DATA in project trino by trinodb.
the class IcebergMetadata method finishOptimize.
private void finishOptimize(ConnectorSession session, IcebergTableExecuteHandle executeHandle, Collection<Slice> fragments, List<Object> splitSourceInfo) {
IcebergOptimizeHandle optimizeHandle = (IcebergOptimizeHandle) executeHandle.getProcedureHandle();
Table icebergTable = transaction.table();
// paths to be deleted
Set<DataFile> scannedFiles = splitSourceInfo.stream().map(DataFile.class::cast).collect(toImmutableSet());
List<CommitTaskData> commitTasks = fragments.stream().map(slice -> commitTaskCodec.fromJson(slice.getBytes())).collect(toImmutableList());
Type[] partitionColumnTypes = icebergTable.spec().fields().stream().map(field -> field.transform().getResultType(icebergTable.schema().findType(field.sourceId()))).toArray(Type[]::new);
Set<DataFile> newFiles = new HashSet<>();
for (CommitTaskData task : commitTasks) {
DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()).withPath(task.getPath()).withFileSizeInBytes(task.getFileSizeInBytes()).withFormat(optimizeHandle.getFileFormat().toIceberg()).withMetrics(task.getMetrics().metrics());
if (!icebergTable.spec().fields().isEmpty()) {
String partitionDataJson = task.getPartitionDataJson().orElseThrow(() -> new VerifyException("No partition data for partitioned table"));
builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes));
}
newFiles.add(builder.build());
}
if (scannedFiles.isEmpty() && newFiles.isEmpty()) {
// Table scan turned out to be empty, nothing to commit
transaction = null;
return;
}
// try to leave as little garbage as possible behind
if (optimizeHandle.isRetriesEnabled()) {
cleanExtraOutputFiles(session, newFiles.stream().map(dataFile -> dataFile.path().toString()).collect(toImmutableSet()));
}
RewriteFiles rewriteFiles = transaction.newRewrite();
rewriteFiles.rewriteFiles(scannedFiles, newFiles);
rewriteFiles.commit();
transaction.commitTransaction();
transaction = null;
}
Aggregations