use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class TestIonSqlQueryBuilder method testDecimalColumns.
@Test
public void testDecimalColumns() {
TypeManager typeManager = TESTING_TYPE_MANAGER;
IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(typeManager);
List<HiveColumnHandle> columns = ImmutableList.of(createBaseColumn("quantity", 0, HiveType.valueOf("decimal(20,0)"), DecimalType.createDecimalType(), REGULAR, Optional.empty()), createBaseColumn("extendedprice", 1, HiveType.valueOf("decimal(20,2)"), DecimalType.createDecimalType(), REGULAR, Optional.empty()), createBaseColumn("discount", 2, HiveType.valueOf("decimal(10,2)"), DecimalType.createDecimalType(), REGULAR, Optional.empty()));
DecimalType decimalType = DecimalType.createDecimalType(10, 2);
TupleDomain<HiveColumnHandle> tupleDomain = withColumnDomains(ImmutableMap.of(columns.get(0), Domain.create(ofRanges(Range.lessThan(DecimalType.createDecimalType(20, 0), longDecimal("50"))), false), columns.get(1), Domain.create(ofRanges(Range.equal(HiveType.valueOf("decimal(20,2)").getType(typeManager), longDecimal("0.05"))), false), columns.get(2), Domain.create(ofRanges(Range.range(decimalType, shortDecimal("0.0"), true, shortDecimal("0.02"), true)), false)));
assertEquals("SELECT s._1, s._2, s._3 FROM S3Object s WHERE ((case s._1 when '' then null else CAST(s._1 AS DECIMAL(20,0)) end < 50)) AND " + "(case s._2 when '' then null else CAST(s._2 AS DECIMAL(20,2)) end = 0.05) AND ((case s._3 when '' then null else CAST(s._3 AS DECIMAL(10,2)) " + "end >= 0.00 AND case s._3 when '' then null else CAST(s._3 AS DECIMAL(10,2)) end <= 0.02))", queryBuilder.buildSql(columns, tupleDomain));
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class DeltaLakePageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
DeltaLakeSplit split = (DeltaLakeSplit) connectorSplit;
DeltaLakeTableHandle table = (DeltaLakeTableHandle) connectorTable;
// We reach here when we could not prune the split using file level stats, table predicate
// and the dynamic filter in the coordinator during split generation. The file level stats
// in DeltaLakeSplit#filePredicate could help to prune this split when a more selective dynamic filter
// is available now, without having to access parquet file footer for row-group stats.
// We avoid sending DeltaLakeSplit#splitPredicate to workers by using table.getPredicate() here.
TupleDomain<DeltaLakeColumnHandle> filteredSplitPredicate = TupleDomain.intersect(ImmutableList.of(table.getNonPartitionConstraint(), split.getStatisticsPredicate(), dynamicFilter.getCurrentPredicate().transformKeys(DeltaLakeColumnHandle.class::cast)));
if (filteredSplitPredicate.isNone()) {
return new EmptyPageSource();
}
List<DeltaLakeColumnHandle> deltaLakeColumns = columns.stream().map(DeltaLakeColumnHandle.class::cast).collect(toImmutableList());
Map<String, Optional<String>> partitionKeys = split.getPartitionKeys();
List<DeltaLakeColumnHandle> regularColumns = deltaLakeColumns.stream().filter(column -> column.getColumnType() == REGULAR).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = regularColumns.stream().map(DeltaLakeColumnHandle::toHiveColumnHandle).collect(toImmutableList());
Path path = new Path(split.getPath());
HdfsContext hdfsContext = new HdfsContext(session);
TupleDomain<HiveColumnHandle> parquetPredicate = getParquetTupleDomain(filteredSplitPredicate.simplify(domainCompactionThreshold));
if (table.getWriteType().isPresent()) {
return new DeltaLakeUpdatablePageSource(table, deltaLakeColumns, partitionKeys, split.getPath(), split.getFileSize(), split.getFileModifiedTime(), session, executorService, hdfsEnvironment, hdfsContext, parquetDateTimeZone, parquetReaderOptions, parquetPredicate, typeManager, updateResultJsonCodec);
}
ReaderPageSource pageSource = ParquetPageSourceFactory.createPageSource(path, split.getStart(), split.getLength(), split.getFileSize(), hiveColumnHandles, parquetPredicate, true, hdfsEnvironment, hdfsEnvironment.getConfiguration(hdfsContext, path), session.getIdentity(), parquetDateTimeZone, fileFormatDataSourceStats, parquetReaderOptions.withMaxReadBlockSize(getParquetMaxReadBlockSize(session)).withUseColumnIndex(isParquetUseColumnIndex(session)));
verify(pageSource.getReaderColumns().isEmpty(), "All columns expected to be base columns");
return new DeltaLakePageSource(deltaLakeColumns, partitionKeys, pageSource.get(), split.getPath(), split.getFileSize(), split.getFileModifiedTime());
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class RowParametricType method createType.
@Override
public Type createType(TypeManager typeManager, List<TypeParameter> parameters) {
checkArgument(!parameters.isEmpty(), "Row type must have at least one parameter");
checkArgument(parameters.stream().allMatch(parameter -> parameter.getKind() == ParameterKind.NAMED_TYPE), "Expected only named types as a parameters, got %s", parameters);
List<TypeSignatureParameter> typeSignatureParameters = parameters.stream().map(TypeParameter::getNamedType).map(parameter -> TypeSignatureParameter.namedTypeParameter(new NamedTypeSignature(parameter.getName(), parameter.getType().getTypeSignature()))).collect(toList());
List<RowType.Field> fields = parameters.stream().map(TypeParameter::getNamedType).map(parameter -> new RowType.Field(parameter.getName().map(RowFieldName::getName), parameter.getType())).collect(toList());
return RowType.createWithTypeSignature(new TypeSignature(StandardTypes.ROW, typeSignatureParameters), fields);
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class IcebergSplitSource method getNextBatch.
@Override
public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHandle partitionHandle, int maxSize) {
long timeLeft = dynamicFilteringWaitTimeoutMillis - dynamicFilterWaitStopwatch.elapsed(MILLISECONDS);
if (dynamicFilter.isAwaitable() && timeLeft > 0) {
return dynamicFilter.isBlocked().thenApply(ignored -> EMPTY_BATCH).completeOnTimeout(EMPTY_BATCH, timeLeft, MILLISECONDS);
}
if (combinedScanIterable == null) {
// Used to avoid duplicating work if the Dynamic Filter was already pushed down to the Iceberg API
this.pushedDownDynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
TupleDomain<IcebergColumnHandle> fullPredicate = tableHandle.getUnenforcedPredicate().intersect(pushedDownDynamicFilterPredicate);
// TODO: (https://github.com/trinodb/trino/issues/9743): Consider removing TupleDomain#simplify
TupleDomain<IcebergColumnHandle> simplifiedPredicate = fullPredicate.simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
if (!simplifiedPredicate.equals(fullPredicate)) {
// Pushed down predicate was simplified, always evaluate it against individual splits
this.pushedDownDynamicFilterPredicate = TupleDomain.all();
}
TupleDomain<IcebergColumnHandle> effectivePredicate = tableHandle.getEnforcedPredicate().intersect(simplifiedPredicate);
if (effectivePredicate.isNone()) {
finish();
return completedFuture(NO_MORE_SPLITS_BATCH);
}
Expression filterExpression = toIcebergExpression(effectivePredicate);
this.combinedScanIterable = tableScan.filter(filterExpression).includeColumnStats().planTasks();
this.fileScanIterator = Streams.stream(combinedScanIterable).map(CombinedScanTask::files).flatMap(Collection::stream).iterator();
}
TupleDomain<IcebergColumnHandle> dynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
if (dynamicFilterPredicate.isNone()) {
finish();
return completedFuture(NO_MORE_SPLITS_BATCH);
}
Iterator<FileScanTask> fileScanTasks = Iterators.limit(fileScanIterator, maxSize);
ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
while (fileScanTasks.hasNext()) {
FileScanTask scanTask = fileScanTasks.next();
if (!scanTask.deletes().isEmpty()) {
throw new TrinoException(NOT_SUPPORTED, "Iceberg tables with delete files are not supported: " + tableHandle.getSchemaTableName());
}
if (maxScannedFileSizeInBytes.isPresent() && scanTask.file().fileSizeInBytes() > maxScannedFileSizeInBytes.get()) {
continue;
}
IcebergSplit icebergSplit = toIcebergSplit(scanTask);
Schema fileSchema = scanTask.spec().schema();
Set<IcebergColumnHandle> identityPartitionColumns = icebergSplit.getPartitionKeys().keySet().stream().map(fieldId -> getColumnHandle(fileSchema.findField(fieldId), typeManager)).collect(toImmutableSet());
Supplier<Map<ColumnHandle, NullableValue>> partitionValues = memoize(() -> {
Map<ColumnHandle, NullableValue> bindings = new HashMap<>();
for (IcebergColumnHandle partitionColumn : identityPartitionColumns) {
Object partitionValue = deserializePartitionValue(partitionColumn.getType(), icebergSplit.getPartitionKeys().get(partitionColumn.getId()).orElse(null), partitionColumn.getName());
NullableValue bindingValue = new NullableValue(partitionColumn.getType(), partitionValue);
bindings.put(partitionColumn, bindingValue);
}
return bindings;
});
if (!dynamicFilterPredicate.isAll() && !dynamicFilterPredicate.equals(pushedDownDynamicFilterPredicate)) {
if (!partitionMatchesPredicate(identityPartitionColumns, partitionValues, dynamicFilterPredicate)) {
continue;
}
if (!fileMatchesPredicate(fieldIdToType, dynamicFilterPredicate, scanTask.file().lowerBounds(), scanTask.file().upperBounds(), scanTask.file().nullValueCounts())) {
continue;
}
}
if (!partitionMatchesConstraint(identityPartitionColumns, partitionValues, constraint)) {
continue;
}
if (recordScannedFiles) {
scannedFiles.add(scanTask.file());
}
splits.add(icebergSplit);
}
return completedFuture(new ConnectorSplitBatch(splits.build(), isFinished()));
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class TestDeltaLakeMetadata method setUp.
@BeforeClass
public void setUp() throws IOException {
temporaryCatalogDirectory = createTempDirectory("HiveCatalog").toFile();
Map<String, String> config = ImmutableMap.<String, String>builder().put("hive.metastore", "file").put("hive.metastore.catalog.dir", temporaryCatalogDirectory.getPath()).buildOrThrow();
Bootstrap app = new Bootstrap(// connector dependencies
new JsonModule(), binder -> {
ConnectorContext context = new TestingConnectorContext();
binder.bind(NodeVersion.class).toInstance(new NodeVersion(context.getNodeManager().getCurrentNode().getVersion()));
binder.bind(CatalogName.class).toInstance(new CatalogName("test"));
binder.bind(TypeManager.class).toInstance(context.getTypeManager());
binder.bind(NodeManager.class).toInstance(context.getNodeManager());
binder.bind(PageIndexerFactory.class).toInstance(context.getPageIndexerFactory());
}, // connector modules
new DeltaLakeMetastoreModule(), new DeltaLakeModule(), // test setup
binder -> {
binder.bind(HdfsEnvironment.class).toInstance(HDFS_ENVIRONMENT);
}, new AbstractModule() {
@Provides
public DeltaLakeMetastore getDeltaLakeMetastore(@RawHiveMetastoreFactory HiveMetastoreFactory hiveMetastoreFactory, TransactionLogAccess transactionLogAccess, TypeManager typeManager, CachingDeltaLakeStatisticsAccess statistics) {
return new HiveMetastoreBackedDeltaLakeMetastore(hiveMetastoreFactory.createMetastore(Optional.empty()), transactionLogAccess, typeManager, statistics);
}
});
Injector injector = app.doNotInitializeLogging().setRequiredConfigurationProperties(config).initialize();
deltaLakeMetadataFactory = injector.getInstance(DeltaLakeMetadataFactory.class);
injector.getInstance(DeltaLakeMetastore.class).createDatabase(Database.builder().setDatabaseName(DATABASE_NAME).setOwnerName(Optional.of("test")).setOwnerType(Optional.of(USER)).setLocation(Optional.empty()).build());
}
Aggregations