use of io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE in project trino by trinodb.
the class DeltaLakeMetadata method createTable.
@Override
public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) {
SchemaTableName schemaTableName = tableMetadata.getTable();
String schemaName = schemaTableName.getSchemaName();
String tableName = schemaTableName.getTableName();
Database schema = metastore.getDatabase(schemaName).orElseThrow(() -> new SchemaNotFoundException(schemaName));
boolean external = true;
String location = getLocation(tableMetadata.getProperties());
if (location == null) {
Optional<String> schemaLocation = getSchemaLocation(schema);
if (schemaLocation.isEmpty()) {
throw new TrinoException(NOT_SUPPORTED, "The 'location' property must be specified either for the table or the schema");
}
location = new Path(schemaLocation.get(), tableName).toString();
checkPathContainsNoFiles(session, new Path(location));
external = false;
}
Path targetPath = new Path(location);
ensurePathExists(session, targetPath);
Path deltaLogDirectory = getTransactionLogDir(targetPath);
Optional<Long> checkpointInterval = DeltaLakeTableProperties.getCheckpointInterval(tableMetadata.getProperties());
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsContext(session), targetPath);
if (!fileSystem.exists(deltaLogDirectory)) {
validateTableColumns(tableMetadata);
List<String> partitionColumns = getPartitionedBy(tableMetadata.getProperties());
List<DeltaLakeColumnHandle> deltaLakeColumns = tableMetadata.getColumns().stream().map(column -> toColumnHandle(column, partitionColumns)).collect(toImmutableList());
TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriterWithoutTransactionIsolation(session, targetPath.toString());
appendInitialTableEntries(transactionLogWriter, deltaLakeColumns, partitionColumns, buildDeltaMetadataConfiguration(checkpointInterval), CREATE_TABLE_OPERATION, session, nodeVersion, nodeId);
setRollback(() -> deleteRecursivelyIfExists(new HdfsContext(session), hdfsEnvironment, deltaLogDirectory));
transactionLogWriter.flush();
}
} catch (IOException e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Unable to access file system for: " + location, e);
}
Table.Builder tableBuilder = Table.builder().setDatabaseName(schemaName).setTableName(tableName).setOwner(Optional.of(session.getUser())).setTableType(external ? EXTERNAL_TABLE.name() : MANAGED_TABLE.name()).setDataColumns(DUMMY_DATA_COLUMNS).setParameters(deltaTableProperties(session, location, external));
setDeltaStorageFormat(tableBuilder, location, targetPath);
Table table = tableBuilder.build();
PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow());
metastore.createTable(session, table, principalPrivileges);
}
use of io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE in project trino by trinodb.
the class DeltaLakeMetadata method finishOptimize.
private void finishOptimize(ConnectorSession session, DeltaLakeTableExecuteHandle executeHandle, Collection<Slice> fragments, List<Object> splitSourceInfo) {
DeltaTableOptimizeHandle optimizeHandle = (DeltaTableOptimizeHandle) executeHandle.getProcedureHandle();
long readVersion = optimizeHandle.getCurrentVersion().orElseThrow(() -> new IllegalArgumentException("currentVersion not set"));
Optional<Long> checkpointInterval = optimizeHandle.getMetadataEntry().getCheckpointInterval();
String tableLocation = executeHandle.getTableLocation();
// paths to be deleted
Set<Path> scannedPaths = splitSourceInfo.stream().map(file -> new Path((String) file)).collect(toImmutableSet());
// files to be added
List<DataFileInfo> dataFileInfos = fragments.stream().map(Slice::getBytes).map(dataFileInfoCodec::fromJson).collect(toImmutableList());
boolean writeCommitted = false;
try {
TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, tableLocation);
long createdTime = Instant.now().toEpochMilli();
long commitVersion = readVersion + 1;
transactionLogWriter.appendCommitInfoEntry(new CommitInfoEntry(commitVersion, createdTime, session.getUser(), session.getUser(), OPTIMIZE_OPERATION, ImmutableMap.of("queryId", session.getQueryId()), null, null, "trino-" + nodeVersion + "-" + nodeId, readVersion, ISOLATION_LEVEL, true));
// TODO: Delta writes another field "operationMetrics" that I haven't
// seen before. It contains delete/update metrics. Investigate/include it.
long writeTimestamp = Instant.now().toEpochMilli();
for (Path scannedPath : scannedPaths) {
String relativePath = new Path(tableLocation).toUri().relativize(scannedPath.toUri()).toString();
transactionLogWriter.appendRemoveFileEntry(new RemoveFileEntry(relativePath, writeTimestamp, false));
}
// Note: during writes we want to preserve original case of partition columns
List<String> partitionColumns = optimizeHandle.getMetadataEntry().getOriginalPartitionColumns();
appendAddFileEntries(transactionLogWriter, dataFileInfos, partitionColumns, false);
transactionLogWriter.flush();
writeCommitted = true;
writeCheckpointIfNeeded(session, executeHandle.getSchemaTableName(), checkpointInterval, commitVersion);
} catch (Exception e) {
if (!writeCommitted) {
// TODO perhaps it should happen in a background thread
cleanupFailedWrite(session, tableLocation, dataFileInfos);
}
throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Failed to write Delta Lake transaction log entry", e);
}
}
use of io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE in project trino by trinodb.
the class DeltaLakePageSink method createParquetFileWriter.
private FileWriter createParquetFileWriter(Path path) {
ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxBlockSize(getParquetWriterBlockSize(session)).setMaxPageSize(getParquetWriterPageSize(session)).build();
CompressionCodecName compressionCodecName = getCompressionCodec(session).getParquetCompressionCodec();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, conf);
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
List<Type> parquetTypes = dataColumnTypes.stream().map(type -> {
if (type instanceof TimestampWithTimeZoneType) {
verify(((TimestampWithTimeZoneType) type).getPrecision() == 3, "Unsupported type: %s", type);
return TIMESTAMP_MILLIS;
}
return type;
}).collect(toImmutableList());
// we use identity column mapping; input page already contains only data columns per
// DataLagePageSink.getDataPage()
int[] identityMapping = new int[dataColumnTypes.size()];
for (int i = 0; i < identityMapping.length; ++i) {
identityMapping[i] = i;
}
ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(parquetTypes, dataColumnNames);
return new ParquetFileWriter(fileSystem.create(path), rollbackAction, parquetTypes, schemaConverter.getMessageType(), schemaConverter.getPrimitiveTypes(), parquetWriterOptions, identityMapping, compressionCodecName, trinoVersion);
} catch (IOException e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Error creating Parquet file", e);
}
}
Aggregations