use of io.trino.spi.type.DoubleType.DOUBLE in project trino by trinodb.
the class TestSampleStatsRule method testStatsForSampleNode.
@Test
public void testStatsForSampleNode() {
tester().assertStatsFor(pb -> {
Symbol a = pb.symbol("a", BIGINT);
Symbol b = pb.symbol("b", DOUBLE);
return pb.sample(.33, SampleNode.Type.BERNOULLI, pb.values(a, b));
}).withSourceStats(PlanNodeStatsEstimate.builder().setOutputRowCount(100).addSymbolStatistics(new Symbol("a"), SymbolStatsEstimate.builder().setDistinctValuesCount(20).setNullsFraction(0.3).setLowValue(1).setHighValue(30).build()).addSymbolStatistics(new Symbol("b"), SymbolStatsEstimate.builder().setDistinctValuesCount(40).setNullsFraction(0.6).setLowValue(13.5).setHighValue(POSITIVE_INFINITY).build()).build()).check(check -> check.outputRowsCount(33).symbolStats("a", assertion -> assertion.dataSizeUnknown().distinctValuesCount(20).nullsFraction(0.3).lowValue(1).highValue(30)).symbolStats("b", assertion -> assertion.dataSizeUnknown().distinctValuesCount(23.1).nullsFraction(0.3).lowValue(13.5).highValueUnknown()));
}
use of io.trino.spi.type.DoubleType.DOUBLE in project trino by trinodb.
the class TestQuantileDigestAggregationFunction method assertPercentilesWithinError.
private void assertPercentilesWithinError(String type, SqlVarbinary binary, double error, List<? extends Number> rows, double[] percentiles) {
List<Double> boxedPercentiles = Arrays.stream(percentiles).sorted().boxed().collect(toImmutableList());
List<Number> lowerBounds = boxedPercentiles.stream().map(percentile -> getLowerBound(error, rows, percentile)).collect(toImmutableList());
List<Number> upperBounds = boxedPercentiles.stream().map(percentile -> getUpperBound(error, rows, percentile)).collect(toImmutableList());
// Ensure that the lower bound of each item in the distribution is not greater than the chosen quantiles
functionAssertions.assertFunction(format("zip_with(values_at_quantiles(CAST(X'%s' AS qdigest(%s)), ARRAY[%s]), ARRAY[%s], (value, lowerbound) -> value >= lowerbound)", binary.toString().replaceAll("\\s+", " "), type, ARRAY_JOINER.join(boxedPercentiles), ARRAY_JOINER.join(lowerBounds)), new ArrayType(BOOLEAN), Collections.nCopies(percentiles.length, true));
// Ensure that the upper bound of each item in the distribution is not less than the chosen quantiles
functionAssertions.assertFunction(format("zip_with(values_at_quantiles(CAST(X'%s' AS qdigest(%s)), ARRAY[%s]), ARRAY[%s], (value, upperbound) -> value <= upperbound)", binary.toString().replaceAll("\\s+", " "), type, ARRAY_JOINER.join(boxedPercentiles), ARRAY_JOINER.join(upperBounds)), new ArrayType(BOOLEAN), Collections.nCopies(percentiles.length, true));
}
use of io.trino.spi.type.DoubleType.DOUBLE in project trino by trinodb.
the class TestHistogram method testManyValuesInducingRehash.
private static void testManyValuesInducingRehash(TestingAggregationFunction aggregationFunction) {
double distinctFraction = 0.1f;
int numGroups = 50000;
int itemCount = 30;
Random random = new Random();
GroupedAggregator groupedAggregator = aggregationFunction.createAggregatorFactory(SINGLE, ImmutableList.of(0), OptionalInt.empty()).createGroupedAggregator();
for (int j = 0; j < numGroups; j++) {
Map<String, Long> expectedValues = new HashMap<>();
List<String> valueList = new ArrayList<>();
for (int i = 0; i < itemCount; i++) {
String str = String.valueOf(i % 10);
String item = IntStream.range(0, itemCount).mapToObj(x -> str).collect(Collectors.joining());
boolean distinctValue = random.nextDouble() < distinctFraction;
if (distinctValue) {
// produce a unique value for the histogram
item = j + "-" + item;
valueList.add(item);
} else {
valueList.add(item);
}
expectedValues.compute(item, (k, v) -> v == null ? 1L : ++v);
}
Block block = createStringsBlock(valueList);
AggregationTestInputBuilder testInputBuilder = new AggregationTestInputBuilder(new Block[] { block }, aggregationFunction);
AggregationTestInput test1 = testInputBuilder.build();
test1.runPagesOnAggregatorWithAssertion(j, aggregationFunction.getFinalType(), groupedAggregator, new AggregationTestOutput(expectedValues));
}
}
use of io.trino.spi.type.DoubleType.DOUBLE in project trino by trinodb.
the class RcFileTester method preprocessWriteValueOld.
private static Object preprocessWriteValueOld(Format format, Type type, Object value) {
if (value == null) {
return null;
}
if (type.equals(BOOLEAN)) {
return value;
}
if (type.equals(TINYINT)) {
return ((Number) value).byteValue();
}
if (type.equals(SMALLINT)) {
return ((Number) value).shortValue();
}
if (type.equals(INTEGER)) {
return ((Number) value).intValue();
}
if (type.equals(BIGINT)) {
return ((Number) value).longValue();
}
if (type.equals(REAL)) {
return ((Number) value).floatValue();
}
if (type.equals(DOUBLE)) {
return ((Number) value).doubleValue();
}
if (type instanceof VarcharType) {
return value;
}
if (type.equals(VARBINARY)) {
return ((SqlVarbinary) value).getBytes();
}
if (type.equals(DATE)) {
return Date.ofEpochDay(((SqlDate) value).getDays());
}
if (type.equals(TIMESTAMP_MILLIS)) {
long millis = ((SqlTimestamp) value).getMillis();
if (format == Format.BINARY) {
millis = HIVE_STORAGE_TIME_ZONE.convertLocalToUTC(millis, false);
}
return Timestamp.ofEpochMilli(millis);
}
if (type instanceof DecimalType) {
return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
}
if (type instanceof ArrayType) {
Type elementType = type.getTypeParameters().get(0);
return ((List<?>) value).stream().map(element -> preprocessWriteValueOld(format, elementType, element)).collect(toList());
}
if (type instanceof MapType) {
Type keyType = type.getTypeParameters().get(0);
Type valueType = type.getTypeParameters().get(1);
Map<Object, Object> newMap = new HashMap<>();
for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
newMap.put(preprocessWriteValueOld(format, keyType, entry.getKey()), preprocessWriteValueOld(format, valueType, entry.getValue()));
}
return newMap;
}
if (type instanceof RowType) {
List<?> fieldValues = (List<?>) value;
List<Type> fieldTypes = type.getTypeParameters();
List<Object> newStruct = new ArrayList<>();
for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
newStruct.add(preprocessWriteValueOld(format, fieldTypes.get(fieldId), fieldValues.get(fieldId)));
}
return newStruct;
}
throw new IllegalArgumentException("unsupported type: " + type);
}
use of io.trino.spi.type.DoubleType.DOUBLE in project trino by trinodb.
the class TestComparisonStatsCalculator method symbolToSymbolEqualStats.
@Test
public void symbolToSymbolEqualStats() {
// z's stats should be unchanged when not involved, except NDV capping to row count
// Equal ranges
double rowCount = 2.7;
assertCalculate(new ComparisonExpression(EQUAL, new SymbolReference("u"), new SymbolReference("w"))).outputRowsCount(rowCount).symbolStats("u", equalTo(capNDV(zeroNullsFraction(uStats), rowCount))).symbolStats("w", equalTo(capNDV(zeroNullsFraction(wStats), rowCount))).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
// One symbol's range is within the other's
rowCount = 9.375;
assertCalculate(new ComparisonExpression(EQUAL, new SymbolReference("x"), new SymbolReference("y"))).outputRowsCount(rowCount).symbolStats("x", symbolAssert -> {
symbolAssert.averageRowSize(4).lowValue(0).highValue(5).distinctValuesCount(9.375).nullsFraction(0);
}).symbolStats("y", symbolAssert -> {
symbolAssert.averageRowSize(4).lowValue(0).highValue(5).distinctValuesCount(9.375).nullsFraction(0);
}).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
// Partially overlapping ranges
rowCount = 16.875;
assertCalculate(new ComparisonExpression(EQUAL, new SymbolReference("x"), new SymbolReference("w"))).outputRowsCount(rowCount).symbolStats("x", symbolAssert -> {
symbolAssert.averageRowSize(6).lowValue(0).highValue(10).distinctValuesCount(16.875).nullsFraction(0);
}).symbolStats("w", symbolAssert -> {
symbolAssert.averageRowSize(6).lowValue(0).highValue(10).distinctValuesCount(16.875).nullsFraction(0);
}).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
// None of the ranges is included in the other, and one symbol has much higher cardinality, so that it has bigger NDV in intersect than the other in total
rowCount = 2.25;
assertCalculate(new ComparisonExpression(EQUAL, new SymbolReference("x"), new SymbolReference("u"))).outputRowsCount(rowCount).symbolStats("x", symbolAssert -> {
symbolAssert.averageRowSize(6).lowValue(0).highValue(10).distinctValuesCount(2.25).nullsFraction(0);
}).symbolStats("u", symbolAssert -> {
symbolAssert.averageRowSize(6).lowValue(0).highValue(10).distinctValuesCount(2.25).nullsFraction(0);
}).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
}
Aggregations