use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.
the class TestJoinStatsRule method testStatsForInnerJoinWithTwoEquiClausesAndNonEqualityFunction.
@Test
public void testStatsForInnerJoinWithTwoEquiClausesAndNonEqualityFunction() {
// LEFT_JOIN_COLUMN_2 = RIGHT_JOIN_COLUMN_2 is the more selective clause
double firstClauseSelectivity = 1.0 / LEFT_JOIN_COLUMN_2_NDV * LEFT_JOIN_COLUMN_2_NON_NULLS * RIGHT_JOIN_COLUMN_2_NON_NULLS;
double secondClauseSelectivity = 1.0 / LEFT_JOIN_COLUMN_NDV * LEFT_JOIN_COLUMN_NON_NULLS * RIGHT_JOIN_COLUMN_NON_NULLS;
double innerJoinRowCount = LEFT_ROWS_COUNT * RIGHT_ROWS_COUNT * firstClauseSelectivity * Math.pow(secondClauseSelectivity, 0.5) * // LEFT_JOIN_COLUMN < 10 non equality filter
0.3333333333;
PlanNodeStatsEstimate innerJoinStats = planNodeStats(innerJoinRowCount, symbolStatistics(LEFT_JOIN_COLUMN, 5.0, 10.0, 0.0, RIGHT_JOIN_COLUMN_NDV * 0.3333333333), symbolStatistics(RIGHT_JOIN_COLUMN, 5.0, 20.0, 0.0, RIGHT_JOIN_COLUMN_NDV), symbolStatistics(LEFT_JOIN_COLUMN_2, 100.0, 200.0, 0.0, RIGHT_JOIN_COLUMN_2_NDV), symbolStatistics(RIGHT_JOIN_COLUMN_2, 100.0, 200.0, 0.0, RIGHT_JOIN_COLUMN_2_NDV));
tester().assertStatsFor(testSessionBuilder().setSystemProperty(JOIN_MULTI_CLAUSE_INDEPENDENCE_FACTOR, "0.5").build(), pb -> {
Symbol leftJoinColumnSymbol = pb.symbol(LEFT_JOIN_COLUMN, BIGINT);
Symbol rightJoinColumnSymbol = pb.symbol(RIGHT_JOIN_COLUMN, DOUBLE);
Symbol leftJoinColumnSymbol2 = pb.symbol(LEFT_JOIN_COLUMN_2, BIGINT);
Symbol rightJoinColumnSymbol2 = pb.symbol(RIGHT_JOIN_COLUMN_2, DOUBLE);
ComparisonExpression leftJoinColumnLessThanTen = new ComparisonExpression(ComparisonExpression.Operator.LESS_THAN, leftJoinColumnSymbol.toSymbolReference(), new LongLiteral("10"));
return pb.join(INNER, pb.values(leftJoinColumnSymbol, leftJoinColumnSymbol2), pb.values(rightJoinColumnSymbol, rightJoinColumnSymbol2), ImmutableList.of(new EquiJoinClause(leftJoinColumnSymbol2, rightJoinColumnSymbol2), new EquiJoinClause(leftJoinColumnSymbol, rightJoinColumnSymbol)), ImmutableList.of(leftJoinColumnSymbol, leftJoinColumnSymbol2), ImmutableList.of(rightJoinColumnSymbol, rightJoinColumnSymbol2), Optional.of(leftJoinColumnLessThanTen));
}).withSourceStats(0, planNodeStats(LEFT_ROWS_COUNT, LEFT_JOIN_COLUMN_STATS, LEFT_JOIN_COLUMN_2_STATS)).withSourceStats(1, planNodeStats(RIGHT_ROWS_COUNT, RIGHT_JOIN_COLUMN_STATS, RIGHT_JOIN_COLUMN_2_STATS)).check(stats -> stats.equalTo(innerJoinStats));
}
use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.
the class TupleDomainParquetPredicate method getDomain.
/**
* Get a domain for the ranges defined by each pair of elements from {@code minimums} and {@code maximums}.
* Both arrays must have the same length.
*/
private static Domain getDomain(ColumnDescriptor column, Type type, List<Object> minimums, List<Object> maximums, boolean hasNullValue, DateTimeZone timeZone) {
checkArgument(minimums.size() == maximums.size(), "Expected minimums and maximums to have the same size");
if (type.equals(BOOLEAN)) {
boolean hasTrueValues = minimums.stream().anyMatch(value -> (boolean) value) || maximums.stream().anyMatch(value -> (boolean) value);
boolean hasFalseValues = minimums.stream().anyMatch(value -> !(boolean) value) || maximums.stream().anyMatch(value -> !(boolean) value);
if (hasTrueValues && hasFalseValues) {
return Domain.all(type);
}
if (hasTrueValues) {
return Domain.create(ValueSet.of(type, true), hasNullValue);
}
if (hasFalseValues) {
return Domain.create(ValueSet.of(type, false), hasNullValue);
}
// All nulls case is handled earlier
throw new VerifyException("Impossible boolean statistics");
}
if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(DATE) || type.equals(SMALLINT) || type.equals(TINYINT)) {
List<Range> ranges = new ArrayList<>();
for (int i = 0; i < minimums.size(); i++) {
long min = asLong(minimums.get(i));
long max = asLong(maximums.get(i));
if (isStatisticsOverflow(type, min, max)) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, min, true, max, true));
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type instanceof DecimalType) {
DecimalType decimalType = (DecimalType) type;
List<Range> ranges = new ArrayList<>();
if (decimalType.isShort()) {
for (int i = 0; i < minimums.size(); i++) {
Object min = minimums.get(i);
Object max = maximums.get(i);
long minValue = min instanceof Binary ? getShortDecimalValue(((Binary) min).getBytes()) : asLong(min);
long maxValue = min instanceof Binary ? getShortDecimalValue(((Binary) max).getBytes()) : asLong(max);
if (isStatisticsOverflow(type, minValue, maxValue)) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, minValue, true, maxValue, true));
}
} else {
for (int i = 0; i < minimums.size(); i++) {
Int128 min = Int128.fromBigEndian(((Binary) minimums.get(i)).getBytes());
Int128 max = Int128.fromBigEndian(((Binary) maximums.get(i)).getBytes());
ranges.add(Range.range(type, min, true, max, true));
}
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type.equals(REAL)) {
List<Range> ranges = new ArrayList<>();
for (int i = 0; i < minimums.size(); i++) {
Float min = (Float) minimums.get(i);
Float max = (Float) maximums.get(i);
if (min.isNaN() || max.isNaN()) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, (long) floatToRawIntBits(min), true, (long) floatToRawIntBits(max), true));
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type.equals(DOUBLE)) {
List<Range> ranges = new ArrayList<>();
for (int i = 0; i < minimums.size(); i++) {
Double min = (Double) minimums.get(i);
Double max = (Double) maximums.get(i);
if (min.isNaN() || max.isNaN()) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, min, true, max, true));
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type instanceof VarcharType) {
List<Range> ranges = new ArrayList<>();
for (int i = 0; i < minimums.size(); i++) {
Slice min = Slices.wrappedBuffer(((Binary) minimums.get(i)).toByteBuffer());
Slice max = Slices.wrappedBuffer(((Binary) maximums.get(i)).toByteBuffer());
ranges.add(Range.range(type, min, true, max, true));
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type instanceof TimestampType) {
if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT96)) {
TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, timeZone);
List<Object> values = new ArrayList<>();
for (int i = 0; i < minimums.size(); i++) {
Object min = minimums.get(i);
Object max = maximums.get(i);
// available and valid in that special case
if (!(min instanceof Binary) || !(max instanceof Binary) || !min.equals(max)) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
values.add(timestampEncoder.getTimestamp(decodeInt96Timestamp((Binary) min)));
}
return Domain.multipleValues(type, values, hasNullValue);
}
if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT64)) {
LogicalTypeAnnotation logicalTypeAnnotation = column.getPrimitiveType().getLogicalTypeAnnotation();
if (!(logicalTypeAnnotation instanceof TimestampLogicalTypeAnnotation)) {
// Invalid statistics. Unit and UTC adjustment are not known
return Domain.create(ValueSet.all(type), hasNullValue);
}
TimestampLogicalTypeAnnotation timestampTypeAnnotation = (TimestampLogicalTypeAnnotation) logicalTypeAnnotation;
// Bail out if the precision is not known
if (timestampTypeAnnotation.getUnit() == null) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, DateTimeZone.UTC);
List<Range> ranges = new ArrayList<>();
for (int i = 0; i < minimums.size(); i++) {
long min = (long) minimums.get(i);
long max = (long) maximums.get(i);
ranges.add(Range.range(type, timestampEncoder.getTimestamp(decodeInt64Timestamp(min, timestampTypeAnnotation.getUnit())), true, timestampEncoder.getTimestamp(decodeInt64Timestamp(max, timestampTypeAnnotation.getUnit())), true));
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
}
return Domain.create(ValueSet.all(type), hasNullValue);
}
use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.
the class TestLogicalPlanner method testWithTies.
@Test
public void testWithTies() {
assertPlan("SELECT name, regionkey FROM nation ORDER BY regionkey FETCH FIRST 6 ROWS WITH TIES", any(strictProject(ImmutableMap.of("name", new ExpressionMatcher("name"), "regionkey", new ExpressionMatcher("regionkey")), topNRanking(pattern -> pattern.specification(ImmutableList.of(), ImmutableList.of("regionkey"), ImmutableMap.of("regionkey", SortOrder.ASC_NULLS_LAST)).rankingType(RANK).maxRankingPerPartition(6).partial(false), anyTree(tableScan("nation", ImmutableMap.of("name", "name", "regionkey", "regionkey")))))));
assertPlan("SELECT name, regionkey FROM nation ORDER BY regionkey OFFSET 10 ROWS FETCH FIRST 6 ROWS WITH TIES", any(strictProject(ImmutableMap.of("name", new ExpressionMatcher("name"), "regionkey", new ExpressionMatcher("regionkey")), filter("row_num > BIGINT '10'", rowNumber(pattern -> pattern.partitionBy(ImmutableList.of()), strictProject(ImmutableMap.of("name", new ExpressionMatcher("name"), "regionkey", new ExpressionMatcher("regionkey")), topNRanking(pattern -> pattern.specification(ImmutableList.of(), ImmutableList.of("regionkey"), ImmutableMap.of("regionkey", SortOrder.ASC_NULLS_LAST)).rankingType(RANK).maxRankingPerPartition(16).partial(false), anyTree(tableScan("nation", ImmutableMap.of("name", "name", "regionkey", "regionkey")))))).withAlias("row_num", new RowNumberSymbolMatcher())))));
}
use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.
the class TestHashJoinOperator method testUnwrapsLazyBlocks.
@Test
public void testUnwrapsLazyBlocks() {
TaskContext taskContext = createTaskContext();
DriverContext driverContext = taskContext.addPipelineContext(0, true, true, false).addDriverContext();
InternalJoinFilterFunction filterFunction = new TestInternalJoinFilterFunction(((leftPosition, leftPage, rightPosition, rightPage) -> {
// force loading of probe block
rightPage.getBlock(1).getLoadedBlock();
return true;
}));
RowPagesBuilder buildPages = rowPagesBuilder(false, Ints.asList(0), ImmutableList.of(BIGINT)).addSequencePage(1, 0);
BuildSideSetup buildSideSetup = setupBuildSide(nodePartitioningManager, true, taskContext, buildPages, Optional.of(filterFunction), false, SINGLE_STREAM_SPILLER_FACTORY);
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactory = buildSideSetup.getLookupSourceFactoryManager();
RowPagesBuilder probePages = rowPagesBuilder(false, Ints.asList(0), ImmutableList.of(BIGINT, BIGINT));
List<Page> probeInput = probePages.addSequencePage(1, 0, 0).build();
probeInput = probeInput.stream().map(page -> new Page(page.getBlock(0), new LazyBlock(1, () -> page.getBlock(1)))).collect(toImmutableList());
OperatorFactory joinOperatorFactory = operatorFactories.innerJoin(0, new PlanNodeId("test"), lookupSourceFactory, false, false, true, probePages.getTypes(), Ints.asList(0), getHashChannelAsInt(probePages), Optional.empty(), OptionalInt.of(1), PARTITIONING_SPILLER_FACTORY, TYPE_OPERATOR_FACTORY);
instantiateBuildDrivers(buildSideSetup, taskContext);
buildLookupSource(executor, buildSideSetup);
Operator operator = joinOperatorFactory.createOperator(driverContext);
assertTrue(operator.needsInput());
operator.addInput(probeInput.get(0));
operator.finish();
Page output = operator.getOutput();
assertFalse(output.getBlock(1) instanceof LazyBlock);
}
use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.
the class TestHashJoinOperator method testYield.
@Test
public void testYield() {
// create a filter function that yields for every probe match
// verify we will yield #match times totally
TaskContext taskContext = createTaskContext();
DriverContext driverContext = taskContext.addPipelineContext(0, true, true, false).addDriverContext();
// force a yield for every match
AtomicInteger filterFunctionCalls = new AtomicInteger();
InternalJoinFilterFunction filterFunction = new TestInternalJoinFilterFunction(((leftPosition, leftPage, rightPosition, rightPage) -> {
filterFunctionCalls.incrementAndGet();
driverContext.getYieldSignal().forceYieldForTesting();
return true;
}));
// build with 40 entries
int entries = 40;
RowPagesBuilder buildPages = rowPagesBuilder(true, Ints.asList(0), ImmutableList.of(BIGINT)).addSequencePage(entries, 42);
BuildSideSetup buildSideSetup = setupBuildSide(nodePartitioningManager, true, taskContext, buildPages, Optional.of(filterFunction), false, SINGLE_STREAM_SPILLER_FACTORY);
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactory = buildSideSetup.getLookupSourceFactoryManager();
// probe matching the above 40 entries
RowPagesBuilder probePages = rowPagesBuilder(false, Ints.asList(0), ImmutableList.of(BIGINT));
List<Page> probeInput = probePages.addSequencePage(100, 0).build();
OperatorFactory joinOperatorFactory = operatorFactories.innerJoin(0, new PlanNodeId("test"), lookupSourceFactory, false, false, true, probePages.getTypes(), Ints.asList(0), getHashChannelAsInt(probePages), Optional.empty(), OptionalInt.of(1), PARTITIONING_SPILLER_FACTORY, TYPE_OPERATOR_FACTORY);
instantiateBuildDrivers(buildSideSetup, taskContext);
buildLookupSource(executor, buildSideSetup);
Operator operator = joinOperatorFactory.createOperator(driverContext);
assertTrue(operator.needsInput());
operator.addInput(probeInput.get(0));
operator.finish();
// we will yield 40 times due to filterFunction
for (int i = 0; i < entries; i++) {
driverContext.getYieldSignal().setWithDelay(5 * SECONDS.toNanos(1), driverContext.getYieldExecutor());
filterFunctionCalls.set(0);
assertNull(operator.getOutput());
assertEquals(filterFunctionCalls.get(), 1, "Expected join to stop processing (yield) after calling filter function once");
driverContext.getYieldSignal().reset();
}
// delayed yield is not going to prevent operator from producing a page now (yield won't be forced because filter function won't be called anymore)
driverContext.getYieldSignal().setWithDelay(5 * SECONDS.toNanos(1), driverContext.getYieldExecutor());
// expect output page to be produced within few calls to getOutput(), e.g. to facilitate spill
Page output = null;
for (int i = 0; output == null && i < 5; i++) {
output = operator.getOutput();
}
assertNotNull(output);
driverContext.getYieldSignal().reset();
// make sure we have all 4 entries
assertEquals(output.getPositionCount(), entries);
}
Aggregations