use of io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format in project trino by trinodb.
the class DeltaLakeMetadata method finishStatisticsCollection.
@Override
public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle table, Collection<ComputedStatistics> computedStatistics) {
DeltaLakeTableHandle tableHandle = (DeltaLakeTableHandle) table;
AnalyzeHandle analyzeHandle = tableHandle.getAnalyzeHandle().orElseThrow(() -> new IllegalArgumentException("analyzeHandle not set"));
String location = metastore.getTableLocation(tableHandle.getSchemaTableName(), session);
Optional<DeltaLakeStatistics> oldStatistics = statisticsAccess.readDeltaLakeStatistics(session, location);
// more elaborate logic for handling statistics model evaluation may need to be introduced in the future
// for now let's have a simple check rejecting update
oldStatistics.ifPresent(statistics -> checkArgument(statistics.getModelVersion() == DeltaLakeStatistics.CURRENT_MODEL_VERSION, "Existing table statistics are incompatible, run the drop statistics procedure on this table before re-analyzing"));
Map<String, DeltaLakeColumnStatistics> oldColumnStatistics = oldStatistics.map(DeltaLakeStatistics::getColumnStatistics).orElseGet(ImmutableMap::of);
Map<String, DeltaLakeColumnStatistics> newColumnStatistics = toDeltaLakeColumnStatistics(computedStatistics);
Map<String, DeltaLakeColumnStatistics> mergedColumnStatistics = new HashMap<>();
// only keep stats for existing columns
Set<String> newColumns = newColumnStatistics.keySet();
oldColumnStatistics.entrySet().stream().filter(entry -> newColumns.contains(entry.getKey())).forEach(entry -> mergedColumnStatistics.put(entry.getKey(), entry.getValue()));
newColumnStatistics.forEach((columnName, columnStatistics) -> {
mergedColumnStatistics.merge(columnName, columnStatistics, DeltaLakeColumnStatistics::update);
});
Optional<Instant> maxFileModificationTime = getMaxFileModificationTime(computedStatistics);
// We do not want to hinder our future calls to ANALYZE if one of the files we analyzed have modification time far in the future.
// Therefore we cap the value we store in extended_stats.json to current_time as observed on Trino coordinator.
Instant finalAlreadyAnalyzedModifiedTimeMax = Instant.now();
if (maxFileModificationTime.isPresent()) {
finalAlreadyAnalyzedModifiedTimeMax = Comparators.min(maxFileModificationTime.get(), finalAlreadyAnalyzedModifiedTimeMax);
}
// also ensure that we are not traveling back in time
if (oldStatistics.isPresent()) {
finalAlreadyAnalyzedModifiedTimeMax = Comparators.max(oldStatistics.get().getAlreadyAnalyzedModifiedTimeMax(), finalAlreadyAnalyzedModifiedTimeMax);
}
if (analyzeHandle.getColumns().isPresent() && !mergedColumnStatistics.keySet().equals(analyzeHandle.getColumns().get())) {
// sanity validation
throw new IllegalStateException(format("Unexpected columns in in mergedColumnStatistics %s; expected %s", mergedColumnStatistics.keySet(), analyzeHandle.getColumns().get()));
}
DeltaLakeStatistics mergedDeltaLakeStatistics = new DeltaLakeStatistics(finalAlreadyAnalyzedModifiedTimeMax, mergedColumnStatistics, analyzeHandle.getColumns());
statisticsAccess.updateDeltaLakeStatistics(session, location, mergedDeltaLakeStatistics);
}
use of io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format in project trino by trinodb.
the class DeltaLakeMetadata method appendInitialTableEntries.
private static void appendInitialTableEntries(TransactionLogWriter transactionLogWriter, List<DeltaLakeColumnHandle> columns, List<String> partitionColumnNames, Map<String, String> configuration, String operation, ConnectorSession session, String nodeVersion, String nodeId) {
long createdTime = System.currentTimeMillis();
transactionLogWriter.appendCommitInfoEntry(new CommitInfoEntry(0, createdTime, session.getUser(), session.getUser(), operation, ImmutableMap.of("queryId", session.getQueryId()), null, null, "trino-" + nodeVersion + "-" + nodeId, 0, ISOLATION_LEVEL, true));
transactionLogWriter.appendProtocolEntry(new ProtocolEntry(READER_VERSION, WRITER_VERSION));
transactionLogWriter.appendMetadataEntry(new MetadataEntry(randomUUID().toString(), null, null, new Format("parquet", ImmutableMap.of()), serializeSchemaAsJson(columns), partitionColumnNames, ImmutableMap.copyOf(configuration), createdTime));
}
use of io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format in project trino by trinodb.
the class DeltaLakeMetadata method beginInsert.
@Override
public ConnectorInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle, List<ColumnHandle> columns) {
DeltaLakeTableHandle table = (DeltaLakeTableHandle) tableHandle;
if (!allowWrite(session, table)) {
String fileSystem = new Path(table.getLocation()).toUri().getScheme();
throw new TrinoException(NOT_SUPPORTED, format("Inserts are not supported on the %s filesystem", fileSystem));
}
checkSupportedWriterVersion(session, table.getSchemaTableName());
List<DeltaLakeColumnHandle> inputColumns = columns.stream().map(handle -> (DeltaLakeColumnHandle) handle).collect(toImmutableList());
ConnectorTableMetadata tableMetadata = getTableMetadata(session, table);
// This check acts as a safeguard in cases where the input columns may differ from the table metadata case-sensitively
checkAllColumnsPassedOnInsert(tableMetadata, inputColumns);
String tableLocation = getLocation(tableMetadata.getProperties());
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsContext(session), new Path(tableLocation));
return new DeltaLakeInsertTableHandle(table.getSchemaName(), table.getTableName(), tableLocation, table.getMetadataEntry(), inputColumns, getMandatoryCurrentVersion(fileSystem, new Path(tableLocation)));
} catch (IOException e) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, e);
}
}
Aggregations