use of io.trino.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR in project trino by trinodb.
the class HiveMetadata method finishOptimize.
private void finishOptimize(ConnectorSession session, ConnectorTableExecuteHandle tableExecuteHandle, Collection<Slice> fragments, List<Object> splitSourceInfo) {
// TODO lots of that is copied from finishInsert; rafactoring opportunity
HiveTableExecuteHandle handle = (HiveTableExecuteHandle) tableExecuteHandle;
checkArgument(handle.getWriteDeclarationId().isPresent(), "no write declaration id present in tableExecuteHandle");
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toImmutableList());
HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
Table table = metastore.getTable(handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && isRespectTableFormat(session)) {
throw new TrinoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during optimize");
}
// Support for bucketed tables disabled mostly so we do not need to think about grouped execution in an initial version. Possibly no change apart from testing required.
verify(handle.getBucketProperty().isEmpty(), "bucketed table not supported");
for (PartitionUpdate partitionUpdate : partitionUpdates) {
// sanity check
verify(partitionUpdate.getUpdateMode() == APPEND, "Expected partionUpdate mode to be APPEND but got %s", partitionUpdate.getUpdateMode());
if (partitionUpdate.getName().isEmpty()) {
// operating on an unpartitioned table
if (!table.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) {
throw new TrinoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during optimize");
}
metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), PartitionStatistics.empty(), handle.isRetriesEnabled());
} else {
// operating on a partition
List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), PartitionStatistics.empty(), handle.isRetriesEnabled());
}
}
// get filesystem
FileSystem fs;
try {
fs = hdfsEnvironment.getFileSystem(new HdfsContext(session), new Path(table.getStorage().getLocation()));
} catch (IOException e) {
throw new TrinoException(HIVE_FILESYSTEM_ERROR, e);
}
// path to be deleted
Set<Path> scannedPaths = splitSourceInfo.stream().map(file -> new Path((String) file)).collect(toImmutableSet());
// track remaining files to be delted for error reporting
Set<Path> remainingFilesToDelete = new HashSet<>(scannedPaths);
// delete loop
boolean someDeleted = false;
Optional<Path> firstScannedPath = Optional.empty();
try {
for (Path scannedPath : scannedPaths) {
if (firstScannedPath.isEmpty()) {
firstScannedPath = Optional.of(scannedPath);
}
retry().run("delete " + scannedPath, () -> fs.delete(scannedPath, false));
someDeleted = true;
remainingFilesToDelete.remove(scannedPath);
}
} catch (Exception e) {
if (!someDeleted && (firstScannedPath.isEmpty() || exists(fs, firstScannedPath.get()))) {
// fs.delete above could throw exception but file was actually deleted.
throw new TrinoException(HIVE_FILESYSTEM_ERROR, "Error while deleting original files", e);
}
// If we already deleted some original files we disable rollback routine so written files are not deleted.
// The reported exception message and log entry lists files which need to be cleaned up by user manually.
// Until table is cleaned up there will duplicate rows present.
metastore.dropDeclaredIntentionToWrite(handle.getWriteDeclarationId().get());
String errorMessage = "Error while deleting data files in FINISH phase of OPTIMIZE for table " + table.getTableName() + "; remaining files need to be deleted manually: " + remainingFilesToDelete;
log.error(e, "%s", errorMessage);
throw new TrinoException(HIVE_FILESYSTEM_ERROR, errorMessage, e);
}
}
use of io.trino.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR in project trino by trinodb.
the class SyncPartitionMetadataProcedure method doSyncPartitionMetadata.
private void doSyncPartitionMetadata(ConnectorSession session, ConnectorAccessControl accessControl, String schemaName, String tableName, String mode, boolean caseSensitive) {
SyncMode syncMode = toSyncMode(mode);
HdfsContext hdfsContext = new HdfsContext(session);
SemiTransactionalHiveMetastore metastore = hiveMetadataFactory.create(session.getIdentity(), true).getMetastore();
SchemaTableName schemaTableName = new SchemaTableName(schemaName, tableName);
Table table = metastore.getTable(schemaName, tableName).orElseThrow(() -> new TableNotFoundException(schemaTableName));
if (table.getPartitionColumns().isEmpty()) {
throw new TrinoException(INVALID_PROCEDURE_ARGUMENT, "Table is not partitioned: " + schemaTableName);
}
if (syncMode == SyncMode.ADD || syncMode == SyncMode.FULL) {
accessControl.checkCanInsertIntoTable(null, new SchemaTableName(schemaName, tableName));
}
if (syncMode == SyncMode.DROP || syncMode == SyncMode.FULL) {
accessControl.checkCanDeleteFromTable(null, new SchemaTableName(schemaName, tableName));
}
Path tableLocation = new Path(table.getStorage().getLocation());
Set<String> partitionsToAdd;
Set<String> partitionsToDrop;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(hdfsContext, tableLocation);
List<String> partitionsInMetastore = metastore.getPartitionNames(schemaName, tableName).orElseThrow(() -> new TableNotFoundException(schemaTableName));
List<String> partitionsInFileSystem = listDirectory(fileSystem, fileSystem.getFileStatus(tableLocation), table.getPartitionColumns(), table.getPartitionColumns().size(), caseSensitive).stream().map(fileStatus -> fileStatus.getPath().toUri()).map(uri -> tableLocation.toUri().relativize(uri).getPath()).collect(toImmutableList());
// partitions in file system but not in metastore
partitionsToAdd = difference(partitionsInFileSystem, partitionsInMetastore);
// partitions in metastore but not in file system
partitionsToDrop = difference(partitionsInMetastore, partitionsInFileSystem);
} catch (IOException e) {
throw new TrinoException(HIVE_FILESYSTEM_ERROR, e);
}
syncPartitions(partitionsToAdd, partitionsToDrop, syncMode, metastore, session, table);
}
use of io.trino.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR in project trino by trinodb.
the class S3SelectRecordCursorProvider method createRecordCursor.
@Override
public Optional<ReaderRecordCursorWithProjections> createRecordCursor(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Properties schema, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, TypeManager typeManager, boolean s3SelectPushdownEnabled) {
if (!s3SelectPushdownEnabled) {
return Optional.empty();
}
try {
this.hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
} catch (IOException e) {
throw new TrinoException(HIVE_FILESYSTEM_ERROR, "Failed getting FileSystem: " + path, e);
}
Optional<ReaderColumns> projectedReaderColumns = projectBaseColumns(columns);
// Ignore predicates on partial columns for now.
effectivePredicate = effectivePredicate.filter((column, domain) -> column.isBaseColumn());
String serdeName = getDeserializerClassName(schema);
if (CSV_SERDES.contains(serdeName)) {
List<HiveColumnHandle> readerColumns = projectedReaderColumns.map(ReaderColumns::get).map(readColumns -> readColumns.stream().map(HiveColumnHandle.class::cast).collect(toUnmodifiableList())).orElse(columns);
IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(typeManager);
String ionSqlQuery = queryBuilder.buildSql(readerColumns, effectivePredicate);
S3SelectLineRecordReader recordReader = new S3SelectCsvRecordReader(configuration, path, start, length, schema, ionSqlQuery, s3ClientFactory);
RecordCursor cursor = new S3SelectRecordCursor<>(configuration, path, recordReader, length, schema, readerColumns);
return Optional.of(new ReaderRecordCursorWithProjections(cursor, projectedReaderColumns));
}
// unsupported serdes
return Optional.empty();
}
use of io.trino.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR in project trino by trinodb.
the class GenericHiveRecordCursorProvider method createRecordCursor.
@Override
public Optional<ReaderRecordCursorWithProjections> createRecordCursor(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Properties schema, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, TypeManager typeManager, boolean s3SelectPushdownEnabled) {
configuration.setInt(LineRecordReader.MAX_LINE_LENGTH, textMaxLineLengthBytes);
// make sure the FileSystem is created with the proper Configuration object
try {
this.hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
} catch (IOException e) {
throw new TrinoException(HIVE_FILESYSTEM_ERROR, "Failed getting FileSystem: " + path, e);
}
Optional<ReaderColumns> projections = projectBaseColumns(columns);
List<HiveColumnHandle> readerColumns = projections.map(ReaderColumns::get).map(columnHandles -> columnHandles.stream().map(HiveColumnHandle.class::cast).collect(toUnmodifiableList())).orElse(columns);
RecordCursor cursor = hdfsEnvironment.doAs(session.getIdentity(), () -> {
RecordReader<?, ?> recordReader = HiveUtil.createRecordReader(configuration, path, start, length, schema, readerColumns);
try {
return new GenericHiveRecordCursor<>(configuration, path, genericRecordReader(recordReader), length, schema, readerColumns);
} catch (Exception e) {
try {
recordReader.close();
} catch (IOException closeException) {
if (e != closeException) {
e.addSuppressed(closeException);
}
}
throw e;
}
});
return Optional.of(new ReaderRecordCursorWithProjections(cursor, projections));
}
Aggregations