use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.
the class ExtractSpatialJoins method tryCreateSpatialJoin.
private static Result tryCreateSpatialJoin(Context context, JoinNode joinNode, Expression filter, PlanNodeId nodeId, List<Symbol> outputSymbols, FunctionCall spatialFunction, Optional<Expression> radius, PlannerContext plannerContext, SplitManager splitManager, PageSourceManager pageSourceManager, TypeAnalyzer typeAnalyzer) {
// TODO Add support for distributed left spatial joins
Optional<String> spatialPartitioningTableName = joinNode.getType() == INNER ? getSpatialPartitioningTableName(context.getSession()) : Optional.empty();
Optional<KdbTree> kdbTree = spatialPartitioningTableName.map(tableName -> loadKdbTree(tableName, context.getSession(), plannerContext.getMetadata(), splitManager, pageSourceManager));
List<Expression> arguments = spatialFunction.getArguments();
verify(arguments.size() == 2);
Expression firstArgument = arguments.get(0);
Expression secondArgument = arguments.get(1);
Type sphericalGeographyType = plannerContext.getTypeManager().getType(SPHERICAL_GEOGRAPHY_TYPE_SIGNATURE);
if (typeAnalyzer.getType(context.getSession(), context.getSymbolAllocator().getTypes(), firstArgument).equals(sphericalGeographyType) || typeAnalyzer.getType(context.getSession(), context.getSymbolAllocator().getTypes(), secondArgument).equals(sphericalGeographyType)) {
return Result.empty();
}
Set<Symbol> firstSymbols = extractUnique(firstArgument);
Set<Symbol> secondSymbols = extractUnique(secondArgument);
if (firstSymbols.isEmpty() || secondSymbols.isEmpty()) {
return Result.empty();
}
Optional<Symbol> newFirstSymbol = newGeometrySymbol(context, firstArgument, plannerContext.getTypeManager());
Optional<Symbol> newSecondSymbol = newGeometrySymbol(context, secondArgument, plannerContext.getTypeManager());
PlanNode leftNode = joinNode.getLeft();
PlanNode rightNode = joinNode.getRight();
PlanNode newLeftNode;
PlanNode newRightNode;
// Check if the order of arguments of the spatial function matches the order of join sides
int alignment = checkAlignment(joinNode, firstSymbols, secondSymbols);
if (alignment > 0) {
newLeftNode = newFirstSymbol.map(symbol -> addProjection(context, leftNode, symbol, firstArgument)).orElse(leftNode);
newRightNode = newSecondSymbol.map(symbol -> addProjection(context, rightNode, symbol, secondArgument)).orElse(rightNode);
} else if (alignment < 0) {
newLeftNode = newSecondSymbol.map(symbol -> addProjection(context, leftNode, symbol, secondArgument)).orElse(leftNode);
newRightNode = newFirstSymbol.map(symbol -> addProjection(context, rightNode, symbol, firstArgument)).orElse(rightNode);
} else {
return Result.empty();
}
Expression newFirstArgument = toExpression(newFirstSymbol, firstArgument);
Expression newSecondArgument = toExpression(newSecondSymbol, secondArgument);
Optional<Symbol> leftPartitionSymbol = Optional.empty();
Optional<Symbol> rightPartitionSymbol = Optional.empty();
if (kdbTree.isPresent()) {
leftPartitionSymbol = Optional.of(context.getSymbolAllocator().newSymbol("pid", INTEGER));
rightPartitionSymbol = Optional.of(context.getSymbolAllocator().newSymbol("pid", INTEGER));
if (alignment > 0) {
newLeftNode = addPartitioningNodes(plannerContext, context, newLeftNode, leftPartitionSymbol.get(), kdbTree.get(), newFirstArgument, Optional.empty());
newRightNode = addPartitioningNodes(plannerContext, context, newRightNode, rightPartitionSymbol.get(), kdbTree.get(), newSecondArgument, radius);
} else {
newLeftNode = addPartitioningNodes(plannerContext, context, newLeftNode, leftPartitionSymbol.get(), kdbTree.get(), newSecondArgument, Optional.empty());
newRightNode = addPartitioningNodes(plannerContext, context, newRightNode, rightPartitionSymbol.get(), kdbTree.get(), newFirstArgument, radius);
}
}
Expression newSpatialFunction = FunctionCallBuilder.resolve(context.getSession(), plannerContext.getMetadata()).setName(spatialFunction.getName()).addArgument(GEOMETRY_TYPE_SIGNATURE, newFirstArgument).addArgument(GEOMETRY_TYPE_SIGNATURE, newSecondArgument).build();
Expression newFilter = replaceExpression(filter, ImmutableMap.of(spatialFunction, newSpatialFunction));
return Result.ofPlanNode(new SpatialJoinNode(nodeId, SpatialJoinNode.Type.fromJoinNodeType(joinNode.getType()), newLeftNode, newRightNode, outputSymbols, newFilter, leftPartitionSymbol, rightPartitionSymbol, kdbTree.map(KdbTreeUtils::toJson)));
}
use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.
the class PagesRTreeIndex method findJoinPositions.
/**
* Returns an array of addresses from {@link PagesIndex#valueAddresses} corresponding
* to rows with matching geometries.
* <p>
* The caller is responsible for calling {@link #isJoinPositionEligible(int, int, Page)}
* for each of these addresses to apply additional join filters.
*/
@Override
public int[] findJoinPositions(int probePosition, Page probe, int probeGeometryChannel, Optional<Integer> probePartitionChannel) {
Block probeGeometryBlock = probe.getBlock(probeGeometryChannel);
if (probeGeometryBlock.isNull(probePosition)) {
return EMPTY_ADDRESSES;
}
int probePartition = probePartitionChannel.map(channel -> toIntExact(INTEGER.getLong(probe.getBlock(channel), probePosition))).orElse(-1);
Slice slice = probeGeometryBlock.getSlice(probePosition, 0, probeGeometryBlock.getSliceLength(probePosition));
OGCGeometry probeGeometry = deserialize(slice);
verifyNotNull(probeGeometry);
if (probeGeometry.isEmpty()) {
return EMPTY_ADDRESSES;
}
boolean probeIsPoint = probeGeometry instanceof OGCPoint;
IntArrayList matchingPositions = new IntArrayList();
Envelope envelope = getEnvelope(probeGeometry);
rtree.query(envelope, item -> {
GeometryWithPosition geometryWithPosition = (GeometryWithPosition) item;
OGCGeometry buildGeometry = geometryWithPosition.getGeometry();
if (partitions.isEmpty() || (probePartition == geometryWithPosition.getPartition() && (probeIsPoint || (buildGeometry instanceof OGCPoint) || testReferencePoint(envelope, buildGeometry, probePartition)))) {
if (radiusChannel == -1) {
if (spatialRelationshipTest.apply(buildGeometry, probeGeometry, OptionalDouble.empty())) {
matchingPositions.add(geometryWithPosition.getPosition());
}
} else {
if (spatialRelationshipTest.apply(geometryWithPosition.getGeometry(), probeGeometry, OptionalDouble.of(getRadius(geometryWithPosition.getPosition())))) {
matchingPositions.add(geometryWithPosition.getPosition());
}
}
}
});
return matchingPositions.toIntArray(null);
}
use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.
the class OrcPageSourceFactory method createOrcPageSource.
private ConnectorPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, ConnectorIdentity identity, Configuration configuration, Path path, long start, long length, long estimatedFileSize, List<HiveColumnHandle> columns, List<HiveColumnHandle> projections, boolean useOrcColumnNames, boolean isFullAcid, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone legacyFileTimeZone, OrcReaderOptions options, Optional<AcidInfo> acidInfo, OptionalInt bucketNumber, boolean originalFile, AcidTransaction transaction, FileFormatDataSourceStats stats) {
for (HiveColumnHandle column : columns) {
checkArgument(column.getColumnType() == REGULAR, "column type must be regular: %s", column);
}
checkArgument(!effectivePredicate.isNone());
OrcDataSource orcDataSource;
boolean originalFilesPresent = acidInfo.isPresent() && !acidInfo.get().getOriginalFiles().isEmpty();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, path, configuration);
FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(path));
orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), estimatedFileSize, options, inputStream, stats);
} catch (Exception e) {
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
}
AggregatedMemoryContext memoryUsage = newSimpleAggregatedMemoryContext();
try {
Optional<OrcReader> optionalOrcReader = OrcReader.createOrcReader(orcDataSource, options);
if (optionalOrcReader.isEmpty()) {
return new EmptyPageSource();
}
OrcReader reader = optionalOrcReader.get();
if (!originalFile && acidInfo.isPresent() && !acidInfo.get().isOrcAcidVersionValidated()) {
validateOrcAcidVersion(path, reader);
}
List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
int actualColumnCount = columns.size() + (isFullAcid ? 3 : 0);
List<OrcColumn> fileReadColumns = new ArrayList<>(actualColumnCount);
List<Type> fileReadTypes = new ArrayList<>(actualColumnCount);
List<OrcReader.ProjectedLayout> fileReadLayouts = new ArrayList<>(actualColumnCount);
if (isFullAcid && !originalFilesPresent) {
verifyAcidSchema(reader, path);
Map<String, OrcColumn> acidColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
fileColumns = ensureColumnNameConsistency(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_STRUCT.toLowerCase(ENGLISH)).getNestedColumns(), columns);
fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ORIGINAL_TRANSACTION.toLowerCase(ENGLISH)));
fileReadTypes.add(BIGINT);
fileReadLayouts.add(fullyProjectedLayout());
fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_BUCKET.toLowerCase(ENGLISH)));
fileReadTypes.add(INTEGER);
fileReadLayouts.add(fullyProjectedLayout());
fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_ID.toLowerCase(ENGLISH)));
fileReadTypes.add(BIGINT);
fileReadLayouts.add(fullyProjectedLayout());
}
Map<String, OrcColumn> fileColumnsByName = ImmutableMap.of();
if (useOrcColumnNames || isFullAcid) {
verifyFileHasColumnNames(fileColumns, path);
// Convert column names read from ORC files to lower case to be consistent with those stored in Hive Metastore
fileColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
}
Map<String, List<List<String>>> projectionsByColumnName = ImmutableMap.of();
Map<Integer, List<List<String>>> projectionsByColumnIndex = ImmutableMap.of();
if (useOrcColumnNames || isFullAcid) {
projectionsByColumnName = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseColumnName, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
} else {
projectionsByColumnIndex = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseHiveColumnIndex, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
}
TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(options.isBloomFiltersEnabled()).setDomainCompactionThreshold(domainCompactionThreshold);
Map<HiveColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
List<ColumnAdaptation> columnAdaptations = new ArrayList<>(columns.size());
for (HiveColumnHandle column : columns) {
OrcColumn orcColumn = null;
OrcReader.ProjectedLayout projectedLayout = null;
Map<Optional<HiveColumnProjectionInfo>, Domain> columnDomains = null;
if (useOrcColumnNames || isFullAcid) {
String columnName = column.getName().toLowerCase(ENGLISH);
orcColumn = fileColumnsByName.get(columnName);
if (orcColumn != null) {
projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnName.get(columnName));
columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseColumnName().toLowerCase(ENGLISH).equals(columnName)).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
}
} else if (column.getBaseHiveColumnIndex() < fileColumns.size()) {
orcColumn = fileColumns.get(column.getBaseHiveColumnIndex());
if (orcColumn != null) {
projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnIndex.get(column.getBaseHiveColumnIndex()));
columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseHiveColumnIndex() == column.getBaseHiveColumnIndex()).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
}
}
Type readType = column.getType();
if (orcColumn != null) {
int sourceIndex = fileReadColumns.size();
columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
fileReadColumns.add(orcColumn);
fileReadTypes.add(readType);
fileReadLayouts.add(projectedLayout);
// Add predicates on top-level and nested columns
for (Map.Entry<Optional<HiveColumnProjectionInfo>, Domain> columnDomain : columnDomains.entrySet()) {
OrcColumn nestedColumn = getNestedColumn(orcColumn, columnDomain.getKey());
if (nestedColumn != null) {
predicateBuilder.addColumn(nestedColumn.getColumnId(), columnDomain.getValue());
}
}
} else {
columnAdaptations.add(ColumnAdaptation.nullColumn(readType));
}
}
OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, fileReadLayouts, predicateBuilder.build(), start, length, legacyFileTimeZone, memoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSource.getId(), exception), NameBasedFieldMapper::create);
Optional<OrcDeletedRows> deletedRows = acidInfo.map(info -> new OrcDeletedRows(path.getName(), new OrcDeleteDeltaPageSourceFactory(options, identity, configuration, hdfsEnvironment, stats), identity, configuration, hdfsEnvironment, info, bucketNumber, memoryUsage));
Optional<Long> originalFileRowId = acidInfo.filter(OrcPageSourceFactory::hasOriginalFiles).map(info -> OriginalFilesUtils.getPrecedingRowCount(acidInfo.get().getOriginalFiles(), path, hdfsEnvironment, identity, options, configuration, stats));
if (transaction.isDelete()) {
if (originalFile) {
int bucket = bucketNumber.orElse(0);
long startingRowId = originalFileRowId.orElse(0L);
columnAdaptations.add(ColumnAdaptation.originalFileRowIdColumn(startingRowId, bucket));
} else {
columnAdaptations.add(ColumnAdaptation.rowIdColumn());
}
} else if (transaction.isUpdate()) {
HiveUpdateProcessor updateProcessor = transaction.getUpdateProcessor().orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"));
List<HiveColumnHandle> dependencyColumns = projections.stream().filter(HiveColumnHandle::isBaseColumn).collect(toImmutableList());
if (originalFile) {
int bucket = bucketNumber.orElse(0);
long startingRowId = originalFileRowId.orElse(0L);
columnAdaptations.add(updatedRowColumnsWithOriginalFiles(startingRowId, bucket, updateProcessor, dependencyColumns));
} else {
columnAdaptations.add(updatedRowColumns(updateProcessor, dependencyColumns));
}
}
return new OrcPageSource(recordReader, columnAdaptations, orcDataSource, deletedRows, originalFileRowId, memoryUsage, stats);
} catch (Exception e) {
try {
orcDataSource.close();
} catch (IOException ignored) {
}
if (e instanceof TrinoException) {
throw (TrinoException) e;
}
String message = splitError(e, path, start, length);
if (e instanceof BlockMissingException) {
throw new TrinoException(HIVE_MISSING_DATA, message, e);
}
throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.
the class BenchmarkFileFormatsUtils method createTpchDataSet.
public static <E extends TpchEntity> TestData createTpchDataSet(FileFormat format, TpchTable<E> tpchTable, List<TpchColumn<E>> columns) {
List<String> columnNames = columns.stream().map(TpchColumn::getColumnName).collect(toList());
List<Type> columnTypes = columns.stream().map(BenchmarkFileFormatsUtils::getColumnType).map(type -> format.supportsDate() || !DATE.equals(type) ? type : createUnboundedVarcharType()).collect(toList());
PageBuilder pageBuilder = new PageBuilder(columnTypes);
ImmutableList.Builder<Page> pages = ImmutableList.builder();
long dataSize = 0;
for (E row : tpchTable.createGenerator(10, 1, 1)) {
pageBuilder.declarePosition();
for (int i = 0; i < columns.size(); i++) {
TpchColumn<E> column = columns.get(i);
BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
switch(column.getType().getBase()) {
case IDENTIFIER:
BIGINT.writeLong(blockBuilder, column.getIdentifier(row));
break;
case INTEGER:
INTEGER.writeLong(blockBuilder, column.getInteger(row));
break;
case DATE:
if (format.supportsDate()) {
DATE.writeLong(blockBuilder, column.getDate(row));
} else {
createUnboundedVarcharType().writeString(blockBuilder, column.getString(row));
}
break;
case DOUBLE:
DOUBLE.writeDouble(blockBuilder, column.getDouble(row));
break;
case VARCHAR:
createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(row)));
break;
default:
throw new IllegalArgumentException("Unsupported type " + column.getType());
}
}
if (pageBuilder.isFull()) {
Page page = pageBuilder.build();
pages.add(page);
pageBuilder.reset();
dataSize += page.getSizeInBytes();
if (dataSize >= MIN_DATA_SIZE) {
break;
}
}
}
if (!pageBuilder.isEmpty()) {
pages.add(pageBuilder.build());
}
return new TestData(columnNames, columnTypes, pages.build());
}
use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.
the class TestTableScanRedirectionWithPushdown method createLocalQueryRunner.
private LocalQueryRunner createLocalQueryRunner(ApplyTableScanRedirect applyTableScanRedirect, Optional<ApplyProjection> applyProjection, Optional<ApplyFilter> applyFilter) {
LocalQueryRunner queryRunner = LocalQueryRunner.create(MOCK_SESSION);
MockConnectorFactory.Builder builder = MockConnectorFactory.builder().withGetTableHandle((session, schemaTableName) -> new MockConnectorTableHandle(schemaTableName)).withGetColumns(name -> {
if (name.equals(SOURCE_TABLE)) {
return ImmutableList.of(new ColumnMetadata(SOURCE_COLUMN_NAME_A, INTEGER), new ColumnMetadata(SOURCE_COLUMN_NAME_B, INTEGER), new ColumnMetadata(SOURCE_COLUMN_NAME_C, VARCHAR), new ColumnMetadata(SOURCE_COLUMN_NAME_D, ROW_TYPE));
} else if (name.equals(DESTINATION_TABLE)) {
return ImmutableList.of(new ColumnMetadata(DESTINATION_COLUMN_NAME_A, INTEGER), new ColumnMetadata(DESTINATION_COLUMN_NAME_B, INTEGER), new ColumnMetadata(DESTINATION_COLUMN_NAME_C, ROW_TYPE), new ColumnMetadata(DESTINATION_COLUMN_NAME_D, BOGUS));
}
throw new IllegalArgumentException();
}).withApplyTableScanRedirect(applyTableScanRedirect);
applyProjection.ifPresent(builder::withApplyProjection);
applyFilter.ifPresent(builder::withApplyFilter);
queryRunner.createCatalog(MOCK_CATALOG, builder.build(), ImmutableMap.of());
return queryRunner;
}
Aggregations