use of io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA in project trino by trinodb.
the class GlueStatConverter method toGlueColumnStatisticsData.
private static ColumnStatisticsData toGlueColumnStatisticsData(HiveColumnStatistics statistics, HiveType columnType, OptionalLong rowCount) {
TypeInfo typeInfo = columnType.getTypeInfo();
checkArgument(typeInfo.getCategory() == PRIMITIVE, "Unsupported statistics type: %s", columnType);
ColumnStatisticsData catalogColumnStatisticsData = new ColumnStatisticsData();
switch(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
case BOOLEAN:
{
BooleanColumnStatisticsData data = new BooleanColumnStatisticsData();
statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
statistics.getBooleanStatistics().ifPresent(booleanStatistics -> {
booleanStatistics.getFalseCount().ifPresent(data::setNumberOfFalses);
booleanStatistics.getTrueCount().ifPresent(data::setNumberOfTrues);
});
catalogColumnStatisticsData.setType(ColumnStatisticsType.BOOLEAN.toString());
catalogColumnStatisticsData.setBooleanColumnStatisticsData(data);
break;
}
case BINARY:
{
BinaryColumnStatisticsData data = new BinaryColumnStatisticsData();
statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
data.setMaximumLength(statistics.getMaxValueSizeInBytes().orElse(0));
data.setAverageLength(getAverageColumnLength(statistics.getTotalSizeInBytes(), rowCount, statistics.getNullsCount()).orElse(0));
catalogColumnStatisticsData.setType(ColumnStatisticsType.BINARY.toString());
catalogColumnStatisticsData.setBinaryColumnStatisticsData(data);
break;
}
case DATE:
{
DateColumnStatisticsData data = new DateColumnStatisticsData();
statistics.getDateStatistics().ifPresent(dateStatistics -> {
dateStatistics.getMin().ifPresent(value -> data.setMinimumValue(localDateToDate(value)));
dateStatistics.getMax().ifPresent(value -> data.setMaximumValue(localDateToDate(value)));
});
statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumberOfDistinctValues);
catalogColumnStatisticsData.setType(ColumnStatisticsType.DATE.toString());
catalogColumnStatisticsData.setDateColumnStatisticsData(data);
break;
}
case DECIMAL:
{
DecimalColumnStatisticsData data = new DecimalColumnStatisticsData();
statistics.getDecimalStatistics().ifPresent(decimalStatistics -> {
decimalStatistics.getMin().ifPresent(value -> data.setMinimumValue(bigDecimalToGlueDecimal(value)));
decimalStatistics.getMax().ifPresent(value -> data.setMaximumValue(bigDecimalToGlueDecimal(value)));
});
statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumberOfDistinctValues);
catalogColumnStatisticsData.setType(ColumnStatisticsType.DECIMAL.toString());
catalogColumnStatisticsData.setDecimalColumnStatisticsData(data);
break;
}
case FLOAT:
case DOUBLE:
{
DoubleColumnStatisticsData data = new DoubleColumnStatisticsData();
statistics.getDoubleStatistics().ifPresent(doubleStatistics -> {
doubleStatistics.getMin().ifPresent(data::setMinimumValue);
doubleStatistics.getMax().ifPresent(data::setMaximumValue);
});
statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumberOfDistinctValues);
catalogColumnStatisticsData.setType(ColumnStatisticsType.DOUBLE.toString());
catalogColumnStatisticsData.setDoubleColumnStatisticsData(data);
break;
}
case BYTE:
case SHORT:
case INT:
case LONG:
case TIMESTAMP:
{
LongColumnStatisticsData data = new LongColumnStatisticsData();
statistics.getIntegerStatistics().ifPresent(stats -> {
stats.getMin().ifPresent(data::setMinimumValue);
stats.getMax().ifPresent(data::setMaximumValue);
});
statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumberOfDistinctValues);
catalogColumnStatisticsData.setType(ColumnStatisticsType.LONG.toString());
catalogColumnStatisticsData.setLongColumnStatisticsData(data);
break;
}
case VARCHAR:
case CHAR:
case STRING:
{
StringColumnStatisticsData data = new StringColumnStatisticsData();
statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumberOfDistinctValues);
data.setMaximumLength(statistics.getMaxValueSizeInBytes().orElse(0));
data.setAverageLength(getAverageColumnLength(statistics.getTotalSizeInBytes(), rowCount, statistics.getNullsCount()).orElse(0));
catalogColumnStatisticsData.setType(ColumnStatisticsType.STRING.toString());
catalogColumnStatisticsData.setStringColumnStatisticsData(data);
break;
}
default:
throw new TrinoException(HIVE_INVALID_METADATA, "Invalid column statistics type: " + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
}
return catalogColumnStatisticsData;
}
use of io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA in project trino by trinodb.
the class TrinoHiveCatalog method doGetMaterializedView.
private Optional<ConnectorMaterializedViewDefinition> doGetMaterializedView(ConnectorSession session, SchemaTableName schemaViewName) {
Optional<io.trino.plugin.hive.metastore.Table> tableOptional = metastore.getTable(schemaViewName.getSchemaName(), schemaViewName.getTableName());
if (tableOptional.isEmpty()) {
return Optional.empty();
}
io.trino.plugin.hive.metastore.Table table = tableOptional.get();
if (!isPrestoView(table) || !isHiveOrPrestoView(table) || !table.getParameters().containsKey(STORAGE_TABLE)) {
return Optional.empty();
}
io.trino.plugin.hive.metastore.Table materializedView = tableOptional.get();
String storageTable = materializedView.getParameters().get(STORAGE_TABLE);
checkState(storageTable != null, "Storage table missing in definition of materialized view " + schemaViewName);
IcebergMaterializedViewDefinition definition = decodeMaterializedViewData(materializedView.getViewOriginalText().orElseThrow(() -> new TrinoException(HIVE_INVALID_METADATA, "No view original text: " + schemaViewName)));
Table icebergTable;
try {
icebergTable = loadTable(session, new SchemaTableName(schemaViewName.getSchemaName(), storageTable));
} catch (RuntimeException e) {
// The materialized view could be removed concurrently. This may manifest in a number of ways, e.g.
// - io.trino.spi.connector.TableNotFoundException
// - org.apache.iceberg.exceptions.NotFoundException when accessing manifest file
// - other failures when reading storage table's metadata files
// Retry, as we're catching broadly.
metastore.invalidateTable(schemaViewName.getSchemaName(), schemaViewName.getTableName());
metastore.invalidateTable(schemaViewName.getSchemaName(), storageTable);
throw new MaterializedViewMayBeBeingRemovedException(e);
}
ImmutableMap.Builder<String, Object> properties = ImmutableMap.builder();
properties.put(FILE_FORMAT_PROPERTY, IcebergUtil.getFileFormat(icebergTable));
if (!icebergTable.spec().fields().isEmpty()) {
properties.put(PARTITIONING_PROPERTY, toPartitionFields(icebergTable.spec()));
}
return Optional.of(new ConnectorMaterializedViewDefinition(definition.getOriginalSql(), Optional.of(new CatalogSchemaTableName(catalogName.toString(), new SchemaTableName(schemaViewName.getSchemaName(), storageTable))), definition.getCatalog(), definition.getSchema(), definition.getColumns().stream().map(column -> new ConnectorMaterializedViewDefinition.Column(column.getName(), column.getType())).collect(toImmutableList()), definition.getComment(), materializedView.getOwner(), properties.buildOrThrow()));
}
use of io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA in project trino by trinodb.
the class HiveMetadata method columnMetadataGetter.
private static Function<HiveColumnHandle, ColumnMetadata> columnMetadataGetter(Table table) {
ImmutableList.Builder<String> columnNames = ImmutableList.builder();
table.getPartitionColumns().stream().map(Column::getName).forEach(columnNames::add);
table.getDataColumns().stream().map(Column::getName).forEach(columnNames::add);
List<String> allColumnNames = columnNames.build();
if (allColumnNames.size() > Sets.newHashSet(allColumnNames).size()) {
throw new TrinoException(HIVE_INVALID_METADATA, format("Hive metadata for table %s is invalid: Table descriptor contains duplicate columns", table.getTableName()));
}
List<Column> tableColumns = table.getDataColumns();
ImmutableMap.Builder<String, Optional<String>> builder = ImmutableMap.builder();
for (Column field : concat(tableColumns, table.getPartitionColumns())) {
if (field.getComment().isPresent() && !field.getComment().get().equals("from deserializer")) {
builder.put(field.getName(), field.getComment());
} else {
builder.put(field.getName(), Optional.empty());
}
}
// add hidden columns
builder.put(PATH_COLUMN_NAME, Optional.empty());
if (table.getStorage().getBucketProperty().isPresent()) {
builder.put(BUCKET_COLUMN_NAME, Optional.empty());
}
builder.put(FILE_SIZE_COLUMN_NAME, Optional.empty());
builder.put(FILE_MODIFIED_TIME_COLUMN_NAME, Optional.empty());
if (!table.getPartitionColumns().isEmpty()) {
builder.put(PARTITION_COLUMN_NAME, Optional.empty());
}
if (isFullAcidTable(table.getParameters())) {
for (String name : AcidSchema.ACID_COLUMN_NAMES) {
builder.put(name, Optional.empty());
}
}
Map<String, Optional<String>> columnComment = builder.buildOrThrow();
return handle -> ColumnMetadata.builder().setName(handle.getName()).setType(handle.getType()).setComment(columnComment.get(handle.getName())).setExtraInfo(Optional.ofNullable(columnExtraInfo(handle.isPartitionKey()))).setHidden(handle.isHidden()).build();
}
use of io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA in project trino by trinodb.
the class HiveWriterFactory method createWriter.
public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt bucketNumber) {
if (bucketCount.isPresent()) {
checkArgument(bucketNumber.isPresent(), "Bucket not provided for bucketed table");
checkArgument(bucketNumber.getAsInt() < bucketCount.getAsInt(), "Bucket number %s must be less than bucket count %s", bucketNumber, bucketCount);
} else {
checkArgument(bucketNumber.isEmpty(), "Bucket number provided by for table that is not bucketed");
}
List<String> partitionValues = createPartitionValues(partitionColumnTypes, partitionColumns, position);
Optional<String> partitionName;
if (!partitionColumnNames.isEmpty()) {
partitionName = Optional.of(FileUtils.makePartName(partitionColumnNames, partitionValues));
} else {
partitionName = Optional.empty();
}
// attempt to get the existing partition (if this is an existing partitioned table)
Optional<Partition> partition = Optional.empty();
if (!partitionValues.isEmpty() && table != null) {
partition = pageSinkMetadataProvider.getPartition(partitionValues);
}
UpdateMode updateMode;
Properties schema;
WriteInfo writeInfo;
StorageFormat outputStorageFormat;
if (partition.isEmpty()) {
if (table == null) {
// Write to: a new partition in a new partitioned table,
// or a new unpartitioned table.
updateMode = UpdateMode.NEW;
schema = new Properties();
schema.setProperty(IOConstants.COLUMNS, dataColumns.stream().map(DataColumn::getName).collect(joining(",")));
schema.setProperty(IOConstants.COLUMNS_TYPES, dataColumns.stream().map(DataColumn::getHiveType).map(HiveType::getHiveTypeName).map(HiveTypeName::toString).collect(joining(":")));
if (partitionName.isEmpty()) {
// new unpartitioned table
writeInfo = locationService.getTableWriteInfo(locationHandle, false);
} else {
// a new partition in a new partitioned table
writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
if (!writeInfo.getWriteMode().isWritePathSameAsTargetPath()) {
// verify that the target directory for the partition does not already exist
if (HiveWriteUtils.pathExists(new HdfsContext(session), hdfsEnvironment, writeInfo.getTargetPath())) {
throw new TrinoException(HIVE_PATH_ALREADY_EXISTS, format("Target directory for new partition '%s' of table '%s.%s' already exists: %s", partitionName, schemaName, tableName, writeInfo.getTargetPath()));
}
}
}
} else {
// or an existing unpartitioned table
if (partitionName.isPresent()) {
// a new partition in an existing partitioned table
updateMode = UpdateMode.NEW;
writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
} else {
switch(insertExistingPartitionsBehavior) {
case APPEND:
updateMode = UpdateMode.APPEND;
writeInfo = locationService.getTableWriteInfo(locationHandle, false);
break;
case OVERWRITE:
updateMode = UpdateMode.OVERWRITE;
writeInfo = locationService.getTableWriteInfo(locationHandle, true);
break;
case ERROR:
throw new TrinoException(HIVE_TABLE_READ_ONLY, "Unpartitioned Hive tables are immutable");
default:
throw new IllegalArgumentException("Unsupported insert existing table behavior: " + insertExistingPartitionsBehavior);
}
}
schema = getHiveSchema(table);
}
if (partitionName.isPresent()) {
// Write to a new partition
outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat);
} else {
// Write to a new/existing unpartitioned table
outputStorageFormat = fromHiveStorageFormat(tableStorageFormat);
}
} else {
switch(insertExistingPartitionsBehavior) {
// Write to: an existing partition in an existing partitioned table
case APPEND:
// Append to an existing partition
updateMode = UpdateMode.APPEND;
// Check the column types in partition schema match the column types in table schema
List<Column> tableColumns = table.getDataColumns();
List<Column> existingPartitionColumns = partition.get().getColumns();
for (int i = 0; i < min(existingPartitionColumns.size(), tableColumns.size()); i++) {
HiveType tableType = tableColumns.get(i).getType();
HiveType partitionType = existingPartitionColumns.get(i).getType();
if (!tableType.equals(partitionType)) {
throw new TrinoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("" + "You are trying to write into an existing partition in a table. " + "The table schema has changed since the creation of the partition. " + "Inserting rows into such partition is not supported. " + "The column '%s' in table '%s' is declared as type '%s', " + "but partition '%s' declared column '%s' as type '%s'.", tableColumns.get(i).getName(), tableName, tableType, partitionName, existingPartitionColumns.get(i).getName(), partitionType));
}
}
HiveWriteUtils.checkPartitionIsWritable(partitionName.get(), partition.get());
outputStorageFormat = partition.get().getStorage().getStorageFormat();
schema = getHiveSchema(partition.get(), table);
writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
break;
case OVERWRITE:
// Overwrite an existing partition
//
// The behavior of overwrite considered as if first dropping the partition and inserting a new partition, thus:
// * No partition writable check is required.
// * Table schema and storage format is used for the new partition (instead of existing partition schema and storage format).
updateMode = UpdateMode.OVERWRITE;
outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat);
schema = getHiveSchema(table);
writeInfo = locationService.getPartitionWriteInfo(locationHandle, Optional.empty(), partitionName.get());
break;
case ERROR:
throw new TrinoException(HIVE_PARTITION_READ_ONLY, "Cannot insert into an existing partition of Hive table: " + partitionName.get());
default:
throw new IllegalArgumentException(format("Unsupported insert existing partitions behavior: %s", insertExistingPartitionsBehavior));
}
}
additionalTableParameters.forEach(schema::setProperty);
validateSchema(partitionName, schema);
int bucketToUse = bucketNumber.isEmpty() ? 0 : bucketNumber.getAsInt();
Path path;
String fileNameWithExtension;
if (transaction.isAcidTransactionRunning()) {
String subdir = computeAcidSubdir(transaction);
Path subdirPath = new Path(writeInfo.getWritePath(), subdir);
path = createHiveBucketPath(subdirPath, bucketToUse, table.getParameters());
fileNameWithExtension = path.getName();
} else {
String fileName = computeFileName(bucketNumber);
fileNameWithExtension = fileName + getFileExtension(conf, outputStorageFormat);
path = new Path(writeInfo.getWritePath(), fileNameWithExtension);
}
boolean useAcidSchema = isCreateTransactionalTable || (table != null && isFullAcidTable(table.getParameters()));
FileWriter hiveFileWriter = null;
for (HiveFileWriterFactory fileWriterFactory : fileWriterFactories) {
Optional<FileWriter> fileWriter = fileWriterFactory.createFileWriter(path, dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema, conf, session, bucketNumber, transaction, useAcidSchema, WriterKind.INSERT);
if (fileWriter.isPresent()) {
hiveFileWriter = fileWriter.get();
break;
}
}
if (hiveFileWriter == null) {
hiveFileWriter = new RecordFileWriter(path, dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema, partitionStorageFormat.getEstimatedWriterMemoryUsage(), conf, typeManager, parquetTimeZone, session);
}
String writerImplementation = hiveFileWriter.getClass().getName();
Consumer<HiveWriter> onCommit = hiveWriter -> {
Optional<Long> size;
try {
size = Optional.of(hiveWriter.getWrittenBytes());
} catch (RuntimeException e) {
// Do not fail the query if file system is not available
size = Optional.empty();
}
eventClient.post(new WriteCompletedEvent(session.getQueryId(), path.toString(), schemaName, tableName, partitionName.orElse(null), outputStorageFormat.getOutputFormat(), writerImplementation, nodeManager.getCurrentNode().getVersion(), nodeManager.getCurrentNode().getHost(), session.getIdentity().getPrincipal().map(Principal::getName).orElse(null), nodeManager.getEnvironment(), sessionProperties, size.orElse(null), hiveWriter.getRowCount()));
};
if (!sortedBy.isEmpty()) {
FileSystem fileSystem;
Path tempFilePath;
if (sortedWritingTempStagingPathEnabled) {
String tempPrefix = sortedWritingTempStagingPath.replace("${USER}", new HdfsContext(session).getIdentity().getUser());
tempFilePath = new Path(tempPrefix, ".tmp-sort." + path.getParent().getName() + "." + path.getName());
} else {
tempFilePath = new Path(path.getParent(), ".tmp-sort." + path.getName());
}
try {
Configuration configuration = new Configuration(conf);
// Explicitly set the default FS to local file system to avoid getting HDFS when sortedWritingTempStagingPath specifies no scheme
configuration.set(FS_DEFAULT_NAME_KEY, "file:///");
fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), tempFilePath, configuration);
} catch (IOException e) {
throw new TrinoException(HIVE_WRITER_OPEN_ERROR, e);
}
List<Type> types = dataColumns.stream().map(column -> column.getHiveType().getType(typeManager, getTimestampPrecision(session))).collect(toImmutableList());
Map<String, Integer> columnIndexes = new HashMap<>();
for (int i = 0; i < dataColumns.size(); i++) {
columnIndexes.put(dataColumns.get(i).getName(), i);
}
List<Integer> sortFields = new ArrayList<>();
List<SortOrder> sortOrders = new ArrayList<>();
for (SortingColumn column : sortedBy) {
Integer index = columnIndexes.get(column.getColumnName());
if (index == null) {
throw new TrinoException(HIVE_INVALID_METADATA, format("Sorting column '%s' does exist in table '%s.%s'", column.getColumnName(), schemaName, tableName));
}
sortFields.add(index);
sortOrders.add(column.getOrder().getSortOrder());
}
hiveFileWriter = new SortingFileWriter(fileSystem, tempFilePath, hiveFileWriter, sortBufferSize, maxOpenSortFiles, types, sortFields, sortOrders, pageSorter, typeManager.getTypeOperators(), OrcFileWriterFactory::createOrcDataSink);
}
return new HiveWriter(hiveFileWriter, partitionName, updateMode, fileNameWithExtension, writeInfo.getWritePath().toString(), writeInfo.getTargetPath().toString(), onCommit, hiveWriterStats);
}
Aggregations