use of io.trino.spi.type.VarcharType in project trino by trinodb.
the class DeltaLakeParquetStatisticsUtils method getMax.
private static Optional<Object> getMax(Type type, Statistics<?> statistics) {
if (statistics.genericGetMax() == null) {
return Optional.empty();
}
if (type.equals(DateType.DATE)) {
checkArgument(statistics instanceof IntStatistics, "Column with DATE type contained invalid statistics: %s", statistics);
IntStatistics intStatistics = (IntStatistics) statistics;
LocalDate date = LocalDate.ofEpochDay(intStatistics.genericGetMax());
return Optional.of(date.format(ISO_LOCAL_DATE));
}
if (type instanceof TimestampWithTimeZoneType) {
if (statistics instanceof LongStatistics) {
Instant ts = Instant.ofEpochMilli(((LongStatistics) statistics).genericGetMax());
return Optional.of(ISO_INSTANT.format(ZonedDateTime.ofInstant(ts, UTC)));
} else if (statistics instanceof BinaryStatistics) {
DecodedTimestamp decodedTimestamp = decodeInt96Timestamp(((BinaryStatistics) statistics).genericGetMax());
Instant ts = Instant.ofEpochSecond(decodedTimestamp.getEpochSeconds(), decodedTimestamp.getNanosOfSecond());
ZonedDateTime zonedDateTime = ZonedDateTime.ofInstant(ts, UTC);
ZonedDateTime truncatedToMillis = zonedDateTime.truncatedTo(MILLIS);
if (truncatedToMillis.isBefore(zonedDateTime)) {
truncatedToMillis = truncatedToMillis.plus(1, MILLIS);
}
return Optional.of(ISO_INSTANT.format(truncatedToMillis));
}
}
if (type.equals(BIGINT) || type.equals(TINYINT) || type.equals(SMALLINT) || type.equals(INTEGER)) {
checkArgument(statistics instanceof IntStatistics || statistics instanceof LongStatistics, "Column with %s type contained invalid statistics: %s", type, statistics);
return Optional.of(statistics.genericGetMax());
}
if (type.equals(REAL)) {
checkArgument(statistics instanceof FloatStatistics, "Column with REAL type contained invalid statistics: %s", statistics);
return Optional.of(((FloatStatistics) statistics).genericGetMax());
}
if (type.equals(DOUBLE)) {
checkArgument(statistics instanceof DoubleStatistics, "Column with DOUBLE type contained invalid statistics: %s", statistics);
return Optional.of(((DoubleStatistics) statistics).genericGetMax());
}
if (type instanceof DecimalType) {
LogicalTypeAnnotation logicalType = statistics.type().getLogicalTypeAnnotation();
checkArgument(logicalType instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation, "DECIMAL column had invalid Parquet Logical Type: %s", logicalType);
int scale = ((LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalType).getScale();
BigDecimal max;
if (statistics instanceof IntStatistics) {
max = BigDecimal.valueOf(((IntStatistics) statistics).getMax()).movePointLeft(scale);
return Optional.of(max.toPlainString());
} else if (statistics instanceof LongStatistics) {
max = BigDecimal.valueOf(((LongStatistics) statistics).getMax()).movePointLeft(scale);
return Optional.of(max.toPlainString());
} else if (statistics instanceof BinaryStatistics) {
BigInteger base = new BigInteger(((BinaryStatistics) statistics).genericGetMax().getBytes());
max = new BigDecimal(base, scale);
return Optional.of(max.toPlainString());
}
}
if (type instanceof VarcharType) {
return Optional.of(new String(((BinaryStatistics) statistics).genericGetMax().getBytes(), UTF_8));
}
if (type.equals(BOOLEAN)) {
// Boolean columns do not collect min/max stats
return Optional.empty();
}
LOG.warn("Accumulating Parquet statistics with Trino type: %s and Parquet statistics of type: %s is not supported", type, statistics);
return Optional.empty();
}
use of io.trino.spi.type.VarcharType in project trino by trinodb.
the class TestHiveBucketing method toNativeContainerValue.
private static Object toNativeContainerValue(Type type, Object hiveValue) {
if (hiveValue == null) {
return null;
}
if (type instanceof ArrayType) {
BlockBuilder blockBuilder = type.createBlockBuilder(null, 1);
BlockBuilder subBlockBuilder = blockBuilder.beginBlockEntry();
for (Object subElement : (Iterable<?>) hiveValue) {
appendToBlockBuilder(type.getTypeParameters().get(0), subElement, subBlockBuilder);
}
blockBuilder.closeEntry();
return type.getObject(blockBuilder, 0);
}
if (type instanceof RowType) {
BlockBuilder blockBuilder = type.createBlockBuilder(null, 1);
BlockBuilder subBlockBuilder = blockBuilder.beginBlockEntry();
int field = 0;
for (Object subElement : (Iterable<?>) hiveValue) {
appendToBlockBuilder(type.getTypeParameters().get(field), subElement, subBlockBuilder);
field++;
}
blockBuilder.closeEntry();
return type.getObject(blockBuilder, 0);
}
if (type instanceof MapType) {
BlockBuilder blockBuilder = type.createBlockBuilder(null, 1);
BlockBuilder subBlockBuilder = blockBuilder.beginBlockEntry();
for (Entry<?, ?> entry : ((Map<?, ?>) hiveValue).entrySet()) {
appendToBlockBuilder(type.getTypeParameters().get(0), entry.getKey(), subBlockBuilder);
appendToBlockBuilder(type.getTypeParameters().get(1), entry.getValue(), subBlockBuilder);
}
blockBuilder.closeEntry();
return type.getObject(blockBuilder, 0);
}
if (type instanceof BooleanType) {
return hiveValue;
}
if (type instanceof TinyintType) {
return (long) (byte) hiveValue;
}
if (type instanceof SmallintType) {
return (long) (short) hiveValue;
}
if (type instanceof IntegerType) {
return (long) (int) hiveValue;
}
if (type instanceof BigintType) {
return hiveValue;
}
if (type instanceof RealType) {
return (long) Float.floatToRawIntBits((float) hiveValue);
}
if (type instanceof DoubleType) {
return hiveValue;
}
if (type instanceof VarcharType) {
return Slices.utf8Slice(hiveValue.toString());
}
if (type instanceof DateType) {
return (long) ((Date) hiveValue).toEpochDay();
}
throw new IllegalArgumentException("Unsupported bucketing type: " + type);
}
use of io.trino.spi.type.VarcharType in project trino by trinodb.
the class CassandraPageSink method appendColumn.
private void appendColumn(List<Object> values, Page page, int position, int channel) {
Block block = page.getBlock(channel);
Type type = columnTypes.get(channel);
if (block.isNull(position)) {
values.add(null);
} else if (BOOLEAN.equals(type)) {
values.add(type.getBoolean(block, position));
} else if (BIGINT.equals(type)) {
values.add(type.getLong(block, position));
} else if (INTEGER.equals(type)) {
values.add(toIntExact(type.getLong(block, position)));
} else if (SMALLINT.equals(type)) {
values.add(Shorts.checkedCast(type.getLong(block, position)));
} else if (TINYINT.equals(type)) {
values.add(SignedBytes.checkedCast(type.getLong(block, position)));
} else if (DOUBLE.equals(type)) {
values.add(type.getDouble(block, position));
} else if (REAL.equals(type)) {
values.add(intBitsToFloat(toIntExact(type.getLong(block, position))));
} else if (DATE.equals(type)) {
values.add(toCassandraDate.apply(type.getLong(block, position)));
} else if (TIMESTAMP_TZ_MILLIS.equals(type)) {
values.add(new Timestamp(unpackMillisUtc(type.getLong(block, position))));
} else if (type instanceof VarcharType) {
values.add(type.getSlice(block, position).toStringUtf8());
} else if (VARBINARY.equals(type)) {
values.add(type.getSlice(block, position).toByteBuffer());
} else if (UuidType.UUID.equals(type)) {
values.add(trinoUuidToJavaUuid(type.getSlice(block, position)));
} else {
throw new TrinoException(NOT_SUPPORTED, "Unsupported column type: " + type.getDisplayName());
}
}
use of io.trino.spi.type.VarcharType in project trino by trinodb.
the class TestDetermineJoinDistributionType method testReplicatesWhenSourceIsSmall.
@Test
public void testReplicatesWhenSourceIsSmall() {
// variable width so that average row size is respected
VarcharType symbolType = createUnboundedVarcharType();
int aRows = 10_000;
int bRows = 10;
// output size exceeds AUTOMATIC_RESTRICTED limit
PlanNodeStatsEstimate aStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000d * 10000, 10))).build();
// output size exceeds AUTOMATIC_RESTRICTED limit
PlanNodeStatsEstimate bStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000d * 10000, 10))).build();
// output size does not exceed AUTOMATIC_RESTRICTED limit
PlanNodeStatsEstimate bSourceStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 64, 10))).build();
// immediate join sources exceeds AUTOMATIC_RESTRICTED limit but build tables are small
// therefore replicated distribution type is chosen
assertDetermineJoinDistributionType().setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "100MB").overrideStats("valuesA", aStatsEstimate).overrideStats("filterB", bStatsEstimate).overrideStats("valuesB", bSourceStatsEstimate).on(p -> {
Symbol a1 = p.symbol("A1", symbolType);
Symbol b1 = p.symbol("B1", symbolType);
return p.join(INNER, p.values(new PlanNodeId("valuesA"), aRows, a1), p.filter(new PlanNodeId("filterB"), TRUE_LITERAL, p.values(new PlanNodeId("valuesB"), bRows, b1)), ImmutableList.of(new JoinNode.EquiJoinClause(a1, b1)), ImmutableList.of(a1), ImmutableList.of(b1), Optional.empty());
}).matches(join(INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), filter("true", values(ImmutableMap.of("B1", 0)))));
// same but with join sides reversed
assertDetermineJoinDistributionType().setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "100MB").overrideStats("valuesA", aStatsEstimate).overrideStats("filterB", bStatsEstimate).overrideStats("valuesB", bSourceStatsEstimate).on(p -> {
Symbol a1 = p.symbol("A1", symbolType);
Symbol b1 = p.symbol("B1", symbolType);
return p.join(INNER, p.filter(new PlanNodeId("filterB"), TRUE_LITERAL, p.values(new PlanNodeId("valuesB"), bRows, b1)), p.values(new PlanNodeId("valuesA"), aRows, a1), ImmutableList.of(new JoinNode.EquiJoinClause(b1, a1)), ImmutableList.of(b1), ImmutableList.of(a1), Optional.empty());
}).matches(join(INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), filter("true", values(ImmutableMap.of("B1", 0)))));
// only probe side (with small tables) source stats are available, join sides should be flipped
assertDetermineJoinDistributionType().setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "100MB").overrideStats("valuesA", PlanNodeStatsEstimate.unknown()).overrideStats("filterB", PlanNodeStatsEstimate.unknown()).overrideStats("valuesB", bSourceStatsEstimate).on(p -> {
Symbol a1 = p.symbol("A1", symbolType);
Symbol b1 = p.symbol("B1", symbolType);
return p.join(LEFT, p.filter(new PlanNodeId("filterB"), TRUE_LITERAL, p.values(new PlanNodeId("valuesB"), bRows, b1)), p.values(new PlanNodeId("valuesA"), aRows, a1), ImmutableList.of(new JoinNode.EquiJoinClause(b1, a1)), ImmutableList.of(b1), ImmutableList.of(a1), Optional.empty());
}).matches(join(RIGHT, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(PARTITIONED), values(ImmutableMap.of("A1", 0)), filter("true", values(ImmutableMap.of("B1", 0)))));
}
use of io.trino.spi.type.VarcharType in project trino by trinodb.
the class TestDetermineJoinDistributionType method testReplicatesWhenNotRestricted.
@Test
public void testReplicatesWhenNotRestricted() {
// variable width so that average row size is respected
VarcharType symbolType = createUnboundedVarcharType();
int aRows = 10_000;
int bRows = 10;
PlanNodeStatsEstimate probeSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000, 10))).build();
PlanNodeStatsEstimate buildSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 10))).build();
// B table is small enough to be replicated in AUTOMATIC_RESTRICTED mode
assertDetermineJoinDistributionType().setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "100MB").overrideStats("valuesA", probeSideStatsEstimate).overrideStats("valuesB", buildSideStatsEstimate).on(p -> {
Symbol a1 = p.symbol("A1", symbolType);
Symbol b1 = p.symbol("B1", symbolType);
return p.join(INNER, p.values(new PlanNodeId("valuesA"), aRows, a1), p.values(new PlanNodeId("valuesB"), bRows, b1), ImmutableList.of(new JoinNode.EquiJoinClause(a1, b1)), ImmutableList.of(a1), ImmutableList.of(b1), Optional.empty());
}).matches(join(INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
probeSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000d * 10000, 10))).build();
buildSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000d * 10000, 10))).build();
// B table exceeds AUTOMATIC_RESTRICTED limit therefore it is partitioned
assertDetermineJoinDistributionType().setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "100MB").overrideStats("valuesA", probeSideStatsEstimate).overrideStats("valuesB", buildSideStatsEstimate).on(p -> {
Symbol a1 = p.symbol("A1", symbolType);
Symbol b1 = p.symbol("B1", symbolType);
return p.join(INNER, p.values(new PlanNodeId("valuesA"), aRows, a1), p.values(new PlanNodeId("valuesB"), bRows, b1), ImmutableList.of(new JoinNode.EquiJoinClause(a1, b1)), ImmutableList.of(a1), ImmutableList.of(b1), Optional.empty());
}).matches(join(INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(PARTITIONED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
}
Aggregations