Search in sources :

Example 1 with INTEGER

use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.

the class ExtractSpatialJoins method tryCreateSpatialJoin.

private static Result tryCreateSpatialJoin(Context context, JoinNode joinNode, Expression filter, PlanNodeId nodeId, List<Symbol> outputSymbols, FunctionCall spatialFunction, Optional<Expression> radius, PlannerContext plannerContext, SplitManager splitManager, PageSourceManager pageSourceManager, TypeAnalyzer typeAnalyzer) {
    // TODO Add support for distributed left spatial joins
    Optional<String> spatialPartitioningTableName = joinNode.getType() == INNER ? getSpatialPartitioningTableName(context.getSession()) : Optional.empty();
    Optional<KdbTree> kdbTree = spatialPartitioningTableName.map(tableName -> loadKdbTree(tableName, context.getSession(), plannerContext.getMetadata(), splitManager, pageSourceManager));
    List<Expression> arguments = spatialFunction.getArguments();
    verify(arguments.size() == 2);
    Expression firstArgument = arguments.get(0);
    Expression secondArgument = arguments.get(1);
    Type sphericalGeographyType = plannerContext.getTypeManager().getType(SPHERICAL_GEOGRAPHY_TYPE_SIGNATURE);
    if (typeAnalyzer.getType(context.getSession(), context.getSymbolAllocator().getTypes(), firstArgument).equals(sphericalGeographyType) || typeAnalyzer.getType(context.getSession(), context.getSymbolAllocator().getTypes(), secondArgument).equals(sphericalGeographyType)) {
        return Result.empty();
    }
    Set<Symbol> firstSymbols = extractUnique(firstArgument);
    Set<Symbol> secondSymbols = extractUnique(secondArgument);
    if (firstSymbols.isEmpty() || secondSymbols.isEmpty()) {
        return Result.empty();
    }
    Optional<Symbol> newFirstSymbol = newGeometrySymbol(context, firstArgument, plannerContext.getTypeManager());
    Optional<Symbol> newSecondSymbol = newGeometrySymbol(context, secondArgument, plannerContext.getTypeManager());
    PlanNode leftNode = joinNode.getLeft();
    PlanNode rightNode = joinNode.getRight();
    PlanNode newLeftNode;
    PlanNode newRightNode;
    // Check if the order of arguments of the spatial function matches the order of join sides
    int alignment = checkAlignment(joinNode, firstSymbols, secondSymbols);
    if (alignment > 0) {
        newLeftNode = newFirstSymbol.map(symbol -> addProjection(context, leftNode, symbol, firstArgument)).orElse(leftNode);
        newRightNode = newSecondSymbol.map(symbol -> addProjection(context, rightNode, symbol, secondArgument)).orElse(rightNode);
    } else if (alignment < 0) {
        newLeftNode = newSecondSymbol.map(symbol -> addProjection(context, leftNode, symbol, secondArgument)).orElse(leftNode);
        newRightNode = newFirstSymbol.map(symbol -> addProjection(context, rightNode, symbol, firstArgument)).orElse(rightNode);
    } else {
        return Result.empty();
    }
    Expression newFirstArgument = toExpression(newFirstSymbol, firstArgument);
    Expression newSecondArgument = toExpression(newSecondSymbol, secondArgument);
    Optional<Symbol> leftPartitionSymbol = Optional.empty();
    Optional<Symbol> rightPartitionSymbol = Optional.empty();
    if (kdbTree.isPresent()) {
        leftPartitionSymbol = Optional.of(context.getSymbolAllocator().newSymbol("pid", INTEGER));
        rightPartitionSymbol = Optional.of(context.getSymbolAllocator().newSymbol("pid", INTEGER));
        if (alignment > 0) {
            newLeftNode = addPartitioningNodes(plannerContext, context, newLeftNode, leftPartitionSymbol.get(), kdbTree.get(), newFirstArgument, Optional.empty());
            newRightNode = addPartitioningNodes(plannerContext, context, newRightNode, rightPartitionSymbol.get(), kdbTree.get(), newSecondArgument, radius);
        } else {
            newLeftNode = addPartitioningNodes(plannerContext, context, newLeftNode, leftPartitionSymbol.get(), kdbTree.get(), newSecondArgument, Optional.empty());
            newRightNode = addPartitioningNodes(plannerContext, context, newRightNode, rightPartitionSymbol.get(), kdbTree.get(), newFirstArgument, radius);
        }
    }
    Expression newSpatialFunction = FunctionCallBuilder.resolve(context.getSession(), plannerContext.getMetadata()).setName(spatialFunction.getName()).addArgument(GEOMETRY_TYPE_SIGNATURE, newFirstArgument).addArgument(GEOMETRY_TYPE_SIGNATURE, newSecondArgument).build();
    Expression newFilter = replaceExpression(filter, ImmutableMap.of(spatialFunction, newSpatialFunction));
    return Result.ofPlanNode(new SpatialJoinNode(nodeId, SpatialJoinNode.Type.fromJoinNodeType(joinNode.getType()), newLeftNode, newRightNode, outputSymbols, newFilter, leftPartitionSymbol, rightPartitionSymbol, kdbTree.map(KdbTreeUtils::toJson)));
}
Also used : EMPTY(io.trino.spi.connector.DynamicFilter.EMPTY) SpatialJoinUtils.extractSupportedSpatialComparisons(io.trino.util.SpatialJoinUtils.extractSupportedSpatialComparisons) SymbolsExtractor.extractUnique(io.trino.sql.planner.SymbolsExtractor.extractUnique) SplitBatch(io.trino.split.SplitSource.SplitBatch) SplitManager(io.trino.split.SplitManager) SystemSessionProperties.getSpatialPartitioningTableName(io.trino.SystemSessionProperties.getSpatialPartitioningTableName) FilterNode(io.trino.sql.planner.plan.FilterNode) PlanNode(io.trino.sql.planner.plan.PlanNode) LEFT(io.trino.sql.planner.plan.JoinNode.Type.LEFT) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) Map(java.util.Map) SpatialJoinNode(io.trino.sql.planner.plan.SpatialJoinNode) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) JoinNode(io.trino.sql.planner.plan.JoinNode) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Splitter(com.google.common.base.Splitter) FunctionCall(io.trino.sql.tree.FunctionCall) Patterns.join(io.trino.sql.planner.plan.Patterns.join) TypeSignature(io.trino.spi.type.TypeSignature) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TypeSignatureTranslator.toSqlType(io.trino.sql.analyzer.TypeSignatureTranslator.toSqlType) KdbTree(io.trino.geospatial.KdbTree) Assignments(io.trino.sql.planner.plan.Assignments) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) SplitSource(io.trino.split.SplitSource) Context(io.trino.sql.planner.iterative.Rule.Context) ComparisonExpression(io.trino.sql.tree.ComparisonExpression) String.format(java.lang.String.format) Constraint.alwaysTrue(io.trino.spi.connector.Constraint.alwaysTrue) LESS_THAN_OR_EQUAL(io.trino.sql.tree.ComparisonExpression.Operator.LESS_THAN_OR_EQUAL) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) INVALID_SPATIAL_PARTITIONING(io.trino.spi.StandardErrorCode.INVALID_SPATIAL_PARTITIONING) NOT_PARTITIONED(io.trino.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) Pattern(io.trino.matching.Pattern) SymbolReference(io.trino.sql.tree.SymbolReference) Split(io.trino.metadata.Split) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) ExpressionNodeInliner.replaceExpression(io.trino.sql.planner.ExpressionNodeInliner.replaceExpression) Expression(io.trino.sql.tree.Expression) Session(io.trino.Session) PlannerContext(io.trino.sql.PlannerContext) Iterables(com.google.common.collect.Iterables) INNER(io.trino.sql.planner.plan.JoinNode.Type.INNER) Type(io.trino.spi.type.Type) Patterns.filter(io.trino.sql.planner.plan.Patterns.filter) Page(io.trino.spi.Page) Capture.newCapture(io.trino.matching.Capture.newCapture) Cast(io.trino.sql.tree.Cast) KdbTreeUtils(io.trino.geospatial.KdbTreeUtils) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) FunctionCallBuilder(io.trino.sql.planner.FunctionCallBuilder) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) UNGROUPED_SCHEDULING(io.trino.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING) Objects.requireNonNull(java.util.Objects.requireNonNull) Result(io.trino.sql.planner.iterative.Rule.Result) ColumnHandle(io.trino.spi.connector.ColumnHandle) Rule(io.trino.sql.planner.iterative.Rule) Lifespan(io.trino.execution.Lifespan) ProjectNode(io.trino.sql.planner.plan.ProjectNode) Symbol(io.trino.sql.planner.Symbol) StringLiteral(io.trino.sql.tree.StringLiteral) SystemSessionProperties.isSpatialJoinEnabled(io.trino.SystemSessionProperties.isSpatialJoinEnabled) IOException(java.io.IOException) PageSourceManager(io.trino.split.PageSourceManager) LESS_THAN(io.trino.sql.tree.ComparisonExpression.Operator.LESS_THAN) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) UnnestNode(io.trino.sql.planner.plan.UnnestNode) Capture(io.trino.matching.Capture) QualifiedName(io.trino.sql.tree.QualifiedName) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) TableHandle(io.trino.metadata.TableHandle) TypeAnalyzer(io.trino.sql.planner.TypeAnalyzer) QualifiedObjectName(io.trino.metadata.QualifiedObjectName) Patterns.source(io.trino.sql.planner.plan.Patterns.source) Captures(io.trino.matching.Captures) Metadata(io.trino.metadata.Metadata) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TypeManager(io.trino.spi.type.TypeManager) SpatialJoinUtils.extractSupportedSpatialFunctions(io.trino.util.SpatialJoinUtils.extractSupportedSpatialFunctions) KdbTreeUtils(io.trino.geospatial.KdbTreeUtils) KdbTree(io.trino.geospatial.KdbTree) Symbol(io.trino.sql.planner.Symbol) SpatialJoinNode(io.trino.sql.planner.plan.SpatialJoinNode) TypeSignatureTranslator.toSqlType(io.trino.sql.analyzer.TypeSignatureTranslator.toSqlType) ArrayType(io.trino.spi.type.ArrayType) Type(io.trino.spi.type.Type) PlanNode(io.trino.sql.planner.plan.PlanNode) ComparisonExpression(io.trino.sql.tree.ComparisonExpression) ExpressionNodeInliner.replaceExpression(io.trino.sql.planner.ExpressionNodeInliner.replaceExpression) Expression(io.trino.sql.tree.Expression)

Example 2 with INTEGER

use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.

the class PagesRTreeIndex method findJoinPositions.

/**
 * Returns an array of addresses from {@link PagesIndex#valueAddresses} corresponding
 * to rows with matching geometries.
 * <p>
 * The caller is responsible for calling {@link #isJoinPositionEligible(int, int, Page)}
 * for each of these addresses to apply additional join filters.
 */
@Override
public int[] findJoinPositions(int probePosition, Page probe, int probeGeometryChannel, Optional<Integer> probePartitionChannel) {
    Block probeGeometryBlock = probe.getBlock(probeGeometryChannel);
    if (probeGeometryBlock.isNull(probePosition)) {
        return EMPTY_ADDRESSES;
    }
    int probePartition = probePartitionChannel.map(channel -> toIntExact(INTEGER.getLong(probe.getBlock(channel), probePosition))).orElse(-1);
    Slice slice = probeGeometryBlock.getSlice(probePosition, 0, probeGeometryBlock.getSliceLength(probePosition));
    OGCGeometry probeGeometry = deserialize(slice);
    verifyNotNull(probeGeometry);
    if (probeGeometry.isEmpty()) {
        return EMPTY_ADDRESSES;
    }
    boolean probeIsPoint = probeGeometry instanceof OGCPoint;
    IntArrayList matchingPositions = new IntArrayList();
    Envelope envelope = getEnvelope(probeGeometry);
    rtree.query(envelope, item -> {
        GeometryWithPosition geometryWithPosition = (GeometryWithPosition) item;
        OGCGeometry buildGeometry = geometryWithPosition.getGeometry();
        if (partitions.isEmpty() || (probePartition == geometryWithPosition.getPartition() && (probeIsPoint || (buildGeometry instanceof OGCPoint) || testReferencePoint(envelope, buildGeometry, probePartition)))) {
            if (radiusChannel == -1) {
                if (spatialRelationshipTest.apply(buildGeometry, probeGeometry, OptionalDouble.empty())) {
                    matchingPositions.add(geometryWithPosition.getPosition());
                }
            } else {
                if (spatialRelationshipTest.apply(geometryWithPosition.getGeometry(), probeGeometry, OptionalDouble.of(getRadius(geometryWithPosition.getPosition())))) {
                    matchingPositions.add(geometryWithPosition.getPosition());
                }
            }
        }
    });
    return matchingPositions.toIntArray(null);
}
Also used : Verify.verifyNotNull(com.google.common.base.Verify.verifyNotNull) GeometrySerde.deserialize(io.trino.geospatial.serde.GeometrySerde.deserialize) Slice(io.airlift.slice.Slice) Rectangle(io.trino.geospatial.Rectangle) PageBuilder(io.trino.spi.PageBuilder) OGCGeometry(com.esri.core.geometry.ogc.OGCGeometry) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) Type(io.trino.spi.type.Type) OptionalDouble(java.util.OptionalDouble) Page(io.trino.spi.Page) JoinUtils.channelsToPages(io.trino.operator.join.JoinUtils.channelsToPages) Block(io.trino.spi.block.Block) OGCPoint(com.esri.core.geometry.ogc.OGCPoint) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Math.toIntExact(java.lang.Math.toIntExact) JoinFilterFunctionFactory(io.trino.sql.gen.JoinFilterFunctionCompiler.JoinFilterFunctionFactory) INTEGER(io.trino.spi.type.IntegerType.INTEGER) SyntheticAddress.decodePosition(io.trino.operator.SyntheticAddress.decodePosition) SpatialPredicate(io.trino.operator.SpatialIndexBuilderOperator.SpatialPredicate) JoinFilterFunction(io.trino.operator.join.JoinFilterFunction) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) List(java.util.List) ClassLayout(org.openjdk.jol.info.ClassLayout) Optional(java.util.Optional) SyntheticAddress.decodeSliceIndex(io.trino.operator.SyntheticAddress.decodeSliceIndex) Envelope(org.locationtech.jts.geom.Envelope) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) Session(io.trino.Session) STRtree(org.locationtech.jts.index.strtree.STRtree) OGCGeometry(com.esri.core.geometry.ogc.OGCGeometry) OGCPoint(com.esri.core.geometry.ogc.OGCPoint) Slice(io.airlift.slice.Slice) Block(io.trino.spi.block.Block) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) Envelope(org.locationtech.jts.geom.Envelope) OGCPoint(com.esri.core.geometry.ogc.OGCPoint)

Example 3 with INTEGER

use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.

the class OrcPageSourceFactory method createOrcPageSource.

private ConnectorPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, ConnectorIdentity identity, Configuration configuration, Path path, long start, long length, long estimatedFileSize, List<HiveColumnHandle> columns, List<HiveColumnHandle> projections, boolean useOrcColumnNames, boolean isFullAcid, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone legacyFileTimeZone, OrcReaderOptions options, Optional<AcidInfo> acidInfo, OptionalInt bucketNumber, boolean originalFile, AcidTransaction transaction, FileFormatDataSourceStats stats) {
    for (HiveColumnHandle column : columns) {
        checkArgument(column.getColumnType() == REGULAR, "column type must be regular: %s", column);
    }
    checkArgument(!effectivePredicate.isNone());
    OrcDataSource orcDataSource;
    boolean originalFilesPresent = acidInfo.isPresent() && !acidInfo.get().getOriginalFiles().isEmpty();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, path, configuration);
        FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(path));
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), estimatedFileSize, options, inputStream, stats);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    AggregatedMemoryContext memoryUsage = newSimpleAggregatedMemoryContext();
    try {
        Optional<OrcReader> optionalOrcReader = OrcReader.createOrcReader(orcDataSource, options);
        if (optionalOrcReader.isEmpty()) {
            return new EmptyPageSource();
        }
        OrcReader reader = optionalOrcReader.get();
        if (!originalFile && acidInfo.isPresent() && !acidInfo.get().isOrcAcidVersionValidated()) {
            validateOrcAcidVersion(path, reader);
        }
        List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
        int actualColumnCount = columns.size() + (isFullAcid ? 3 : 0);
        List<OrcColumn> fileReadColumns = new ArrayList<>(actualColumnCount);
        List<Type> fileReadTypes = new ArrayList<>(actualColumnCount);
        List<OrcReader.ProjectedLayout> fileReadLayouts = new ArrayList<>(actualColumnCount);
        if (isFullAcid && !originalFilesPresent) {
            verifyAcidSchema(reader, path);
            Map<String, OrcColumn> acidColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
            fileColumns = ensureColumnNameConsistency(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_STRUCT.toLowerCase(ENGLISH)).getNestedColumns(), columns);
            fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ORIGINAL_TRANSACTION.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadLayouts.add(fullyProjectedLayout());
            fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_BUCKET.toLowerCase(ENGLISH)));
            fileReadTypes.add(INTEGER);
            fileReadLayouts.add(fullyProjectedLayout());
            fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_ID.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadLayouts.add(fullyProjectedLayout());
        }
        Map<String, OrcColumn> fileColumnsByName = ImmutableMap.of();
        if (useOrcColumnNames || isFullAcid) {
            verifyFileHasColumnNames(fileColumns, path);
            // Convert column names read from ORC files to lower case to be consistent with those stored in Hive Metastore
            fileColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
        }
        Map<String, List<List<String>>> projectionsByColumnName = ImmutableMap.of();
        Map<Integer, List<List<String>>> projectionsByColumnIndex = ImmutableMap.of();
        if (useOrcColumnNames || isFullAcid) {
            projectionsByColumnName = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseColumnName, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
        } else {
            projectionsByColumnIndex = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseHiveColumnIndex, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
        }
        TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(options.isBloomFiltersEnabled()).setDomainCompactionThreshold(domainCompactionThreshold);
        Map<HiveColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
        List<ColumnAdaptation> columnAdaptations = new ArrayList<>(columns.size());
        for (HiveColumnHandle column : columns) {
            OrcColumn orcColumn = null;
            OrcReader.ProjectedLayout projectedLayout = null;
            Map<Optional<HiveColumnProjectionInfo>, Domain> columnDomains = null;
            if (useOrcColumnNames || isFullAcid) {
                String columnName = column.getName().toLowerCase(ENGLISH);
                orcColumn = fileColumnsByName.get(columnName);
                if (orcColumn != null) {
                    projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnName.get(columnName));
                    columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseColumnName().toLowerCase(ENGLISH).equals(columnName)).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
                }
            } else if (column.getBaseHiveColumnIndex() < fileColumns.size()) {
                orcColumn = fileColumns.get(column.getBaseHiveColumnIndex());
                if (orcColumn != null) {
                    projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnIndex.get(column.getBaseHiveColumnIndex()));
                    columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseHiveColumnIndex() == column.getBaseHiveColumnIndex()).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
                }
            }
            Type readType = column.getType();
            if (orcColumn != null) {
                int sourceIndex = fileReadColumns.size();
                columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
                fileReadColumns.add(orcColumn);
                fileReadTypes.add(readType);
                fileReadLayouts.add(projectedLayout);
                // Add predicates on top-level and nested columns
                for (Map.Entry<Optional<HiveColumnProjectionInfo>, Domain> columnDomain : columnDomains.entrySet()) {
                    OrcColumn nestedColumn = getNestedColumn(orcColumn, columnDomain.getKey());
                    if (nestedColumn != null) {
                        predicateBuilder.addColumn(nestedColumn.getColumnId(), columnDomain.getValue());
                    }
                }
            } else {
                columnAdaptations.add(ColumnAdaptation.nullColumn(readType));
            }
        }
        OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, fileReadLayouts, predicateBuilder.build(), start, length, legacyFileTimeZone, memoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSource.getId(), exception), NameBasedFieldMapper::create);
        Optional<OrcDeletedRows> deletedRows = acidInfo.map(info -> new OrcDeletedRows(path.getName(), new OrcDeleteDeltaPageSourceFactory(options, identity, configuration, hdfsEnvironment, stats), identity, configuration, hdfsEnvironment, info, bucketNumber, memoryUsage));
        Optional<Long> originalFileRowId = acidInfo.filter(OrcPageSourceFactory::hasOriginalFiles).map(info -> OriginalFilesUtils.getPrecedingRowCount(acidInfo.get().getOriginalFiles(), path, hdfsEnvironment, identity, options, configuration, stats));
        if (transaction.isDelete()) {
            if (originalFile) {
                int bucket = bucketNumber.orElse(0);
                long startingRowId = originalFileRowId.orElse(0L);
                columnAdaptations.add(ColumnAdaptation.originalFileRowIdColumn(startingRowId, bucket));
            } else {
                columnAdaptations.add(ColumnAdaptation.rowIdColumn());
            }
        } else if (transaction.isUpdate()) {
            HiveUpdateProcessor updateProcessor = transaction.getUpdateProcessor().orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"));
            List<HiveColumnHandle> dependencyColumns = projections.stream().filter(HiveColumnHandle::isBaseColumn).collect(toImmutableList());
            if (originalFile) {
                int bucket = bucketNumber.orElse(0);
                long startingRowId = originalFileRowId.orElse(0L);
                columnAdaptations.add(updatedRowColumnsWithOriginalFiles(startingRowId, bucket, updateProcessor, dependencyColumns));
            } else {
                columnAdaptations.add(updatedRowColumns(updateProcessor, dependencyColumns));
            }
        }
        return new OrcPageSource(recordReader, columnAdaptations, orcDataSource, deletedRows, originalFileRowId, memoryUsage, stats);
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof TrinoException) {
            throw (TrinoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof BlockMissingException) {
            throw new TrinoException(HIVE_MISSING_DATA, message, e);
        }
        throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) HiveUpdateProcessor(io.trino.plugin.hive.HiveUpdateProcessor) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_CANNOT_OPEN_SPLIT(io.trino.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) OrcTypeKind(io.trino.orc.metadata.OrcType.OrcTypeKind) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) HiveSessionProperties.getOrcLazyReadSmallRanges(io.trino.plugin.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HiveSessionProperties.getOrcTinyStripeThreshold(io.trino.plugin.hive.HiveSessionProperties.getOrcTinyStripeThreshold) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ReaderColumns(io.trino.plugin.hive.ReaderColumns) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Slice(io.airlift.slice.Slice) STRUCT(io.trino.orc.metadata.OrcType.OrcTypeKind.STRUCT) ColumnAdaptation.updatedRowColumns(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation.updatedRowColumns) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) LONG(io.trino.orc.metadata.OrcType.OrcTypeKind.LONG) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) INT(io.trino.orc.metadata.OrcType.OrcTypeKind.INT) HIVE_BAD_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_BAD_DATA) Properties(java.util.Properties) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) NameBasedFieldMapper(io.trino.orc.NameBasedFieldMapper) HivePageSourceProvider.projectBaseColumns(io.trino.plugin.hive.HivePageSourceProvider.projectBaseColumns) HiveSessionProperties.isOrcNestedLazy(io.trino.plugin.hive.HiveSessionProperties.isOrcNestedLazy) OrcColumn(io.trino.orc.OrcColumn) HIVE_MISSING_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_MISSING_DATA) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) OrcRecordReader(io.trino.orc.OrcRecordReader) Path(org.apache.hadoop.fs.Path) OrcDataSource(io.trino.orc.OrcDataSource) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ImmutableMap(com.google.common.collect.ImmutableMap) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPageSource.handleException(io.trino.plugin.hive.orc.OrcPageSource.handleException) TrinoException(io.trino.spi.TrinoException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) OrcDataSourceId(io.trino.orc.OrcDataSourceId) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) PRESTO_WRITER_ID(io.trino.orc.metadata.OrcMetadataWriter.PRESTO_WRITER_ID) HivePageSourceFactory(io.trino.plugin.hive.HivePageSourceFactory) Pattern(java.util.regex.Pattern) TRINO_WRITER_ID(io.trino.orc.metadata.OrcMetadataWriter.TRINO_WRITER_ID) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) HiveUtil.isDeserializerClass(io.trino.plugin.hive.util.HiveUtil.isDeserializerClass) Type(io.trino.spi.type.Type) TupleDomainOrcPredicate(io.trino.orc.TupleDomainOrcPredicate) AcidSchema(io.trino.plugin.hive.acid.AcidSchema) HiveSessionProperties.isUseOrcColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseOrcColumnNames) OptionalInt(java.util.OptionalInt) Inject(javax.inject.Inject) HiveSessionProperties.getOrcStreamBufferSize(io.trino.plugin.hive.HiveSessionProperties.getOrcStreamBufferSize) ImmutableList(com.google.common.collect.ImmutableList) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Objects.requireNonNull(java.util.Objects.requireNonNull) Collectors.mapping(java.util.stream.Collectors.mapping) HiveSessionProperties.isOrcBloomFiltersEnabled(io.trino.plugin.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) HiveSessionProperties.getOrcMaxReadBlockSize(io.trino.plugin.hive.HiveSessionProperties.getOrcMaxReadBlockSize) OrcReader(io.trino.orc.OrcReader) HiveSessionProperties.getOrcMaxBufferSize(io.trino.plugin.hive.HiveSessionProperties.getOrcMaxBufferSize) NameBasedProjectedLayout.createProjectedLayout(io.trino.orc.OrcReader.NameBasedProjectedLayout.createProjectedLayout) UTF_8(java.nio.charset.StandardCharsets.UTF_8) TupleDomain(io.trino.spi.predicate.TupleDomain) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout) Maps(com.google.common.collect.Maps) HiveSessionProperties.getOrcMaxMergeDistance(io.trino.plugin.hive.HiveSessionProperties.getOrcMaxMergeDistance) ColumnAdaptation.updatedRowColumnsWithOriginalFiles(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation.updatedRowColumnsWithOriginalFiles) AcidInfo(io.trino.plugin.hive.AcidInfo) HiveColumnProjectionInfo(io.trino.plugin.hive.HiveColumnProjectionInfo) Collectors.toList(java.util.stream.Collectors.toList) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) HIVE_FILE_MISSING_COLUMN_NAMES(io.trino.plugin.hive.HiveErrorCode.HIVE_FILE_MISSING_COLUMN_NAMES) HiveConfig(io.trino.plugin.hive.HiveConfig) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) NameBasedFieldMapper(io.trino.orc.NameBasedFieldMapper) FileSystem(org.apache.hadoop.fs.FileSystem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Optional(java.util.Optional) OrcColumn(io.trino.orc.OrcColumn) OrcReader(io.trino.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) HiveUpdateProcessor(io.trino.plugin.hive.HiveUpdateProcessor) OrcDataSource(io.trino.orc.OrcDataSource) OrcDataSourceId(io.trino.orc.OrcDataSourceId) IOException(java.io.IOException) OrcRecordReader(io.trino.orc.OrcRecordReader) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) IOException(java.io.IOException) OrcPageSource.handleException(io.trino.plugin.hive.orc.OrcPageSource.handleException) TrinoException(io.trino.spi.TrinoException) FileNotFoundException(java.io.FileNotFoundException) Type(io.trino.spi.type.Type) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) TrinoException(io.trino.spi.TrinoException) NameBasedProjectedLayout.createProjectedLayout(io.trino.orc.OrcReader.NameBasedProjectedLayout.createProjectedLayout) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout)

Example 4 with INTEGER

use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.

the class BenchmarkFileFormatsUtils method createTpchDataSet.

public static <E extends TpchEntity> TestData createTpchDataSet(FileFormat format, TpchTable<E> tpchTable, List<TpchColumn<E>> columns) {
    List<String> columnNames = columns.stream().map(TpchColumn::getColumnName).collect(toList());
    List<Type> columnTypes = columns.stream().map(BenchmarkFileFormatsUtils::getColumnType).map(type -> format.supportsDate() || !DATE.equals(type) ? type : createUnboundedVarcharType()).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(columnTypes);
    ImmutableList.Builder<Page> pages = ImmutableList.builder();
    long dataSize = 0;
    for (E row : tpchTable.createGenerator(10, 1, 1)) {
        pageBuilder.declarePosition();
        for (int i = 0; i < columns.size(); i++) {
            TpchColumn<E> column = columns.get(i);
            BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
            switch(column.getType().getBase()) {
                case IDENTIFIER:
                    BIGINT.writeLong(blockBuilder, column.getIdentifier(row));
                    break;
                case INTEGER:
                    INTEGER.writeLong(blockBuilder, column.getInteger(row));
                    break;
                case DATE:
                    if (format.supportsDate()) {
                        DATE.writeLong(blockBuilder, column.getDate(row));
                    } else {
                        createUnboundedVarcharType().writeString(blockBuilder, column.getString(row));
                    }
                    break;
                case DOUBLE:
                    DOUBLE.writeDouble(blockBuilder, column.getDouble(row));
                    break;
                case VARCHAR:
                    createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(row)));
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported type " + column.getType());
            }
        }
        if (pageBuilder.isFull()) {
            Page page = pageBuilder.build();
            pages.add(page);
            pageBuilder.reset();
            dataSize += page.getSizeInBytes();
            if (dataSize >= MIN_DATA_SIZE) {
                break;
            }
        }
    }
    if (!pageBuilder.isEmpty()) {
        pages.add(pageBuilder.build());
    }
    return new TestData(columnNames, columnTypes, pages.build());
}
Also used : PageBuilder(io.trino.spi.PageBuilder) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Type(io.trino.spi.type.Type) RunResult(org.openjdk.jmh.results.RunResult) Page(io.trino.spi.Page) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) Random(java.util.Random) Statistics(org.openjdk.jmh.util.Statistics) ImmutableList(com.google.common.collect.ImmutableList) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) INTEGER(io.trino.spi.type.IntegerType.INTEGER) TpchEntity(io.trino.tpch.TpchEntity) TpchTable(io.trino.tpch.TpchTable) Collection(java.util.Collection) IOException(java.io.IOException) File(java.io.File) String.format(java.lang.String.format) UncheckedIOException(java.io.UncheckedIOException) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) DataSize(io.airlift.units.DataSize) TpchColumn(io.trino.tpch.TpchColumn) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) BIGINT(io.trino.spi.type.BigintType.BIGINT) BlockBuilder(io.trino.spi.block.BlockBuilder) DATE(io.trino.spi.type.DateType.DATE) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) DATE(io.trino.spi.type.DateType.DATE) ImmutableList(com.google.common.collect.ImmutableList) Page(io.trino.spi.Page) PageBuilder(io.trino.spi.PageBuilder) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) BlockBuilder(io.trino.spi.block.BlockBuilder)

Example 5 with INTEGER

use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.

the class TestTableScanRedirectionWithPushdown method createLocalQueryRunner.

private LocalQueryRunner createLocalQueryRunner(ApplyTableScanRedirect applyTableScanRedirect, Optional<ApplyProjection> applyProjection, Optional<ApplyFilter> applyFilter) {
    LocalQueryRunner queryRunner = LocalQueryRunner.create(MOCK_SESSION);
    MockConnectorFactory.Builder builder = MockConnectorFactory.builder().withGetTableHandle((session, schemaTableName) -> new MockConnectorTableHandle(schemaTableName)).withGetColumns(name -> {
        if (name.equals(SOURCE_TABLE)) {
            return ImmutableList.of(new ColumnMetadata(SOURCE_COLUMN_NAME_A, INTEGER), new ColumnMetadata(SOURCE_COLUMN_NAME_B, INTEGER), new ColumnMetadata(SOURCE_COLUMN_NAME_C, VARCHAR), new ColumnMetadata(SOURCE_COLUMN_NAME_D, ROW_TYPE));
        } else if (name.equals(DESTINATION_TABLE)) {
            return ImmutableList.of(new ColumnMetadata(DESTINATION_COLUMN_NAME_A, INTEGER), new ColumnMetadata(DESTINATION_COLUMN_NAME_B, INTEGER), new ColumnMetadata(DESTINATION_COLUMN_NAME_C, ROW_TYPE), new ColumnMetadata(DESTINATION_COLUMN_NAME_D, BOGUS));
        }
        throw new IllegalArgumentException();
    }).withApplyTableScanRedirect(applyTableScanRedirect);
    applyProjection.ifPresent(builder::withApplyProjection);
    applyFilter.ifPresent(builder::withApplyFilter);
    queryRunner.createCatalog(MOCK_CATALOG, builder.build(), ImmutableMap.of());
    return queryRunner;
}
Also used : ApplyTableScanRedirect(io.trino.connector.MockConnectorFactory.ApplyTableScanRedirect) TransactionBuilder.transaction(io.trino.transaction.TransactionBuilder.transaction) OPTIMIZED_AND_VALIDATED(io.trino.sql.planner.LogicalPlanner.Stage.OPTIMIZED_AND_VALIDATED) PlanMatchPattern(io.trino.sql.planner.assertions.PlanMatchPattern) Test(org.testng.annotations.Test) ApplyFilter(io.trino.connector.MockConnectorFactory.ApplyFilter) MockConnectorFactory(io.trino.connector.MockConnectorFactory) Arrays.asList(java.util.Arrays.asList) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) INTEGER(io.trino.spi.type.IntegerType.INTEGER) PlanMatchPattern.expression(io.trino.sql.planner.assertions.PlanMatchPattern.expression) RowType(io.trino.spi.type.RowType) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) SchemaTableName(io.trino.spi.connector.SchemaTableName) List(java.util.List) TestingSession.testSessionBuilder(io.trino.testing.TestingSession.testSessionBuilder) BIGINT(io.trino.spi.type.BigintType.BIGINT) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) Domain.singleValue(io.trino.spi.predicate.Domain.singleValue) Assignment(io.trino.spi.connector.Assignment) Optional(java.util.Optional) RowType.field(io.trino.spi.type.RowType.field) TableScanRedirectApplicationResult(io.trino.spi.connector.TableScanRedirectApplicationResult) PlanMatchPattern.output(io.trino.sql.planner.assertions.PlanMatchPattern.output) Session(io.trino.Session) ApplyProjection(io.trino.connector.MockConnectorFactory.ApplyProjection) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) Type(io.trino.spi.type.Type) PlanMatchPattern.filter(io.trino.sql.planner.assertions.PlanMatchPattern.filter) BOGUS(io.trino.tests.BogusType.BOGUS) Variable(io.trino.spi.expression.Variable) PlanAssert(io.trino.sql.planner.assertions.PlanAssert) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) PlanOptimizer(io.trino.sql.planner.optimizations.PlanOptimizer) LocalQueryRunner(io.trino.testing.LocalQueryRunner) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) Language(org.intellij.lang.annotations.Language) MockConnectorColumnHandle(io.trino.connector.MockConnectorColumnHandle) FieldDereference(io.trino.spi.expression.FieldDereference) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) MockConnectorTableHandle(io.trino.connector.MockConnectorTableHandle) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) PlanMatchPattern.project(io.trino.sql.planner.assertions.PlanMatchPattern.project) WarningCollector(io.trino.execution.warnings.WarningCollector) PlanMatchPattern.tableScan(io.trino.sql.planner.assertions.PlanMatchPattern.tableScan) MockConnectorFactory(io.trino.connector.MockConnectorFactory) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) MockConnectorTableHandle(io.trino.connector.MockConnectorTableHandle) LocalQueryRunner(io.trino.testing.LocalQueryRunner)

Aggregations

INTEGER (io.trino.spi.type.IntegerType.INTEGER)29 ImmutableList (com.google.common.collect.ImmutableList)27 Optional (java.util.Optional)24 Type (io.trino.spi.type.Type)22 List (java.util.List)20 ImmutableMap (com.google.common.collect.ImmutableMap)19 BIGINT (io.trino.spi.type.BigintType.BIGINT)19 DOUBLE (io.trino.spi.type.DoubleType.DOUBLE)16 Map (java.util.Map)16 Test (org.testng.annotations.Test)16 String.format (java.lang.String.format)14 ImmutableSet (com.google.common.collect.ImmutableSet)12 VARCHAR (io.trino.spi.type.VarcharType.VARCHAR)11 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)10 Page (io.trino.spi.Page)10 Slice (io.airlift.slice.Slice)9 Assert.assertEquals (org.testng.Assert.assertEquals)9 Assert.assertFalse (org.testng.Assert.assertFalse)9 RowType (io.trino.spi.type.RowType)8 Session (io.trino.Session)7