Search in sources :

Example 11 with DOUBLE

use of io.trino.spi.type.DoubleType.DOUBLE in project trino by trinodb.

the class TestSampleStatsRule method testStatsForSampleNode.

@Test
public void testStatsForSampleNode() {
    tester().assertStatsFor(pb -> {
        Symbol a = pb.symbol("a", BIGINT);
        Symbol b = pb.symbol("b", DOUBLE);
        return pb.sample(.33, SampleNode.Type.BERNOULLI, pb.values(a, b));
    }).withSourceStats(PlanNodeStatsEstimate.builder().setOutputRowCount(100).addSymbolStatistics(new Symbol("a"), SymbolStatsEstimate.builder().setDistinctValuesCount(20).setNullsFraction(0.3).setLowValue(1).setHighValue(30).build()).addSymbolStatistics(new Symbol("b"), SymbolStatsEstimate.builder().setDistinctValuesCount(40).setNullsFraction(0.6).setLowValue(13.5).setHighValue(POSITIVE_INFINITY).build()).build()).check(check -> check.outputRowsCount(33).symbolStats("a", assertion -> assertion.dataSizeUnknown().distinctValuesCount(20).nullsFraction(0.3).lowValue(1).highValue(30)).symbolStats("b", assertion -> assertion.dataSizeUnknown().distinctValuesCount(23.1).nullsFraction(0.3).lowValue(13.5).highValueUnknown()));
}
Also used : DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) Symbol(io.trino.sql.planner.Symbol) POSITIVE_INFINITY(java.lang.Double.POSITIVE_INFINITY) BIGINT(io.trino.spi.type.BigintType.BIGINT) SampleNode(io.trino.sql.planner.plan.SampleNode) Test(org.testng.annotations.Test) Symbol(io.trino.sql.planner.Symbol) Test(org.testng.annotations.Test)

Example 12 with DOUBLE

use of io.trino.spi.type.DoubleType.DOUBLE in project trino by trinodb.

the class TestQuantileDigestAggregationFunction method assertPercentilesWithinError.

private void assertPercentilesWithinError(String type, SqlVarbinary binary, double error, List<? extends Number> rows, double[] percentiles) {
    List<Double> boxedPercentiles = Arrays.stream(percentiles).sorted().boxed().collect(toImmutableList());
    List<Number> lowerBounds = boxedPercentiles.stream().map(percentile -> getLowerBound(error, rows, percentile)).collect(toImmutableList());
    List<Number> upperBounds = boxedPercentiles.stream().map(percentile -> getUpperBound(error, rows, percentile)).collect(toImmutableList());
    // Ensure that the lower bound of each item in the distribution is not greater than the chosen quantiles
    functionAssertions.assertFunction(format("zip_with(values_at_quantiles(CAST(X'%s' AS qdigest(%s)), ARRAY[%s]), ARRAY[%s], (value, lowerbound) -> value >= lowerbound)", binary.toString().replaceAll("\\s+", " "), type, ARRAY_JOINER.join(boxedPercentiles), ARRAY_JOINER.join(lowerBounds)), new ArrayType(BOOLEAN), Collections.nCopies(percentiles.length, true));
    // Ensure that the upper bound of each item in the distribution is not less than the chosen quantiles
    functionAssertions.assertFunction(format("zip_with(values_at_quantiles(CAST(X'%s' AS qdigest(%s)), ARRAY[%s]), ARRAY[%s], (value, upperbound) -> value <= upperbound)", binary.toString().replaceAll("\\s+", " "), type, ARRAY_JOINER.join(boxedPercentiles), ARRAY_JOINER.join(upperBounds)), new ArrayType(BOOLEAN), Collections.nCopies(percentiles.length, true));
}
Also used : Arrays(java.util.Arrays) QuantileDigest(io.airlift.stats.QuantileDigest) TypeSignatureProvider.fromTypes(io.trino.sql.analyzer.TypeSignatureProvider.fromTypes) TestingFunctionResolution(io.trino.metadata.TestingFunctionResolution) Page(io.trino.spi.Page) BlockAssertions.createLongSequenceBlock(io.trino.block.BlockAssertions.createLongSequenceBlock) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) Test(org.testng.annotations.Test) BlockAssertions.createDoubleSequenceBlock(io.trino.block.BlockAssertions.createDoubleSequenceBlock) BlockAssertions.createSequenceBlockOfReal(io.trino.block.BlockAssertions.createSequenceBlockOfReal) NaN(java.lang.Double.NaN) Block(io.trino.spi.block.Block) FloatingPointBitsConverterUtil.floatToSortableInt(io.trino.operator.aggregation.FloatingPointBitsConverterUtil.floatToSortableInt) BlockAssertions.createRLEBlock(io.trino.block.BlockAssertions.createRLEBlock) Integer.min(java.lang.Integer.min) QDIGEST_EQUALITY(io.trino.operator.aggregation.TestMergeQuantileDigestFunction.QDIGEST_EQUALITY) Floats(com.google.common.primitives.Floats) LongStream(java.util.stream.LongStream) BlockAssertions.createDoublesBlock(io.trino.block.BlockAssertions.createDoublesBlock) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) StandardTypes(io.trino.spi.type.StandardTypes) ArrayType(io.trino.spi.type.ArrayType) FloatingPointBitsConverterUtil.doubleToSortableLong(io.trino.operator.aggregation.FloatingPointBitsConverterUtil.doubleToSortableLong) Collectors(java.util.stream.Collectors) AbstractTestFunctions(io.trino.operator.scalar.AbstractTestFunctions) BlockAssertions.createBlockOfReals(io.trino.block.BlockAssertions.createBlockOfReals) String.format(java.lang.String.format) QualifiedName(io.trino.sql.tree.QualifiedName) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) Integer.max(java.lang.Integer.max) SqlVarbinary(io.trino.spi.type.SqlVarbinary) List(java.util.List) AggregationTestUtils.assertAggregation(io.trino.operator.aggregation.AggregationTestUtils.assertAggregation) BIGINT(io.trino.spi.type.BigintType.BIGINT) BlockAssertions.createLongsBlock(io.trino.block.BlockAssertions.createLongsBlock) TypeSignatureProvider(io.trino.sql.analyzer.TypeSignatureProvider) Collections(java.util.Collections) REAL(io.trino.spi.type.RealType.REAL) Joiner(com.google.common.base.Joiner) ArrayType(io.trino.spi.type.ArrayType)

Example 13 with DOUBLE

use of io.trino.spi.type.DoubleType.DOUBLE in project trino by trinodb.

the class TestHistogram method testManyValuesInducingRehash.

private static void testManyValuesInducingRehash(TestingAggregationFunction aggregationFunction) {
    double distinctFraction = 0.1f;
    int numGroups = 50000;
    int itemCount = 30;
    Random random = new Random();
    GroupedAggregator groupedAggregator = aggregationFunction.createAggregatorFactory(SINGLE, ImmutableList.of(0), OptionalInt.empty()).createGroupedAggregator();
    for (int j = 0; j < numGroups; j++) {
        Map<String, Long> expectedValues = new HashMap<>();
        List<String> valueList = new ArrayList<>();
        for (int i = 0; i < itemCount; i++) {
            String str = String.valueOf(i % 10);
            String item = IntStream.range(0, itemCount).mapToObj(x -> str).collect(Collectors.joining());
            boolean distinctValue = random.nextDouble() < distinctFraction;
            if (distinctValue) {
                // produce a unique value for the histogram
                item = j + "-" + item;
                valueList.add(item);
            } else {
                valueList.add(item);
            }
            expectedValues.compute(item, (k, v) -> v == null ? 1L : ++v);
        }
        Block block = createStringsBlock(valueList);
        AggregationTestInputBuilder testInputBuilder = new AggregationTestInputBuilder(new Block[] { block }, aggregationFunction);
        AggregationTestInput test1 = testInputBuilder.build();
        test1.runPagesOnAggregatorWithAssertion(j, aggregationFunction.getFinalType(), groupedAggregator, new AggregationTestOutput(expectedValues));
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) TypeSignatureProvider.fromTypes(io.trino.sql.analyzer.TypeSignatureProvider.fromTypes) TestingFunctionResolution(io.trino.metadata.TestingFunctionResolution) AggregationTestInput(io.trino.operator.aggregation.groupby.AggregationTestInput) Test(org.testng.annotations.Test) Random(java.util.Random) AggregationTestInputBuilder(io.trino.operator.aggregation.groupby.AggregationTestInputBuilder) Block(io.trino.spi.block.Block) DateTimeEncoding.unpackZoneKey(io.trino.spi.type.DateTimeEncoding.unpackZoneKey) Map(java.util.Map) TIMESTAMP_WITH_TIME_ZONE(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_WITH_TIME_ZONE) RowType(io.trino.spi.type.RowType) ImmutableMap(com.google.common.collect.ImmutableMap) BlockAssertions.createDoublesBlock(io.trino.block.BlockAssertions.createDoublesBlock) OperatorAssertion.toRow(io.trino.operator.OperatorAssertion.toRow) DateTimeEncoding.packDateTimeWithZone(io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone) ArrayType(io.trino.spi.type.ArrayType) Collectors(java.util.stream.Collectors) SqlTimestampWithTimeZone(io.trino.spi.type.SqlTimestampWithTimeZone) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) BlockAssertions.createLongsBlock(io.trino.block.BlockAssertions.createLongsBlock) DateTimeEncoding.unpackMillisUtc(io.trino.spi.type.DateTimeEncoding.unpackMillisUtc) AggregationTestOutput(io.trino.operator.aggregation.groupby.AggregationTestOutput) IntStream(java.util.stream.IntStream) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) SINGLE(io.trino.sql.planner.plan.AggregationNode.Step.SINGLE) HashMap(java.util.HashMap) OptionalInt(java.util.OptionalInt) StructuralTestUtil.mapBlockOf(io.trino.util.StructuralTestUtil.mapBlockOf) ArrayList(java.util.ArrayList) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) TimeZoneKey(io.trino.spi.type.TimeZoneKey) Histogram(io.trino.operator.aggregation.histogram.Histogram) DateTimeZoneIndex.getDateTimeZone(io.trino.util.DateTimeZoneIndex.getDateTimeZone) BlockAssertions.createStringsBlock(io.trino.block.BlockAssertions.createStringsBlock) MapType(io.trino.spi.type.MapType) BlockAssertions.createBooleansBlock(io.trino.block.BlockAssertions.createBooleansBlock) DateTime(org.joda.time.DateTime) Ints(com.google.common.primitives.Ints) BlockAssertions.createStringArraysBlock(io.trino.block.BlockAssertions.createStringArraysBlock) QualifiedName(io.trino.sql.tree.QualifiedName) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) TimeZoneKey.getTimeZoneKey(io.trino.spi.type.TimeZoneKey.getTimeZoneKey) StructuralTestUtil.mapType(io.trino.util.StructuralTestUtil.mapType) AggregationTestUtils.assertAggregation(io.trino.operator.aggregation.AggregationTestUtils.assertAggregation) Assert.assertTrue(org.testng.Assert.assertTrue) BlockBuilder(io.trino.spi.block.BlockBuilder) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Random(java.util.Random) AggregationTestOutput(io.trino.operator.aggregation.groupby.AggregationTestOutput) Block(io.trino.spi.block.Block) BlockAssertions.createDoublesBlock(io.trino.block.BlockAssertions.createDoublesBlock) BlockAssertions.createLongsBlock(io.trino.block.BlockAssertions.createLongsBlock) BlockAssertions.createStringsBlock(io.trino.block.BlockAssertions.createStringsBlock) BlockAssertions.createBooleansBlock(io.trino.block.BlockAssertions.createBooleansBlock) BlockAssertions.createStringArraysBlock(io.trino.block.BlockAssertions.createStringArraysBlock) AggregationTestInput(io.trino.operator.aggregation.groupby.AggregationTestInput) AggregationTestInputBuilder(io.trino.operator.aggregation.groupby.AggregationTestInputBuilder)

Example 14 with DOUBLE

use of io.trino.spi.type.DoubleType.DOUBLE in project trino by trinodb.

the class RcFileTester method preprocessWriteValueOld.

private static Object preprocessWriteValueOld(Format format, Type type, Object value) {
    if (value == null) {
        return null;
    }
    if (type.equals(BOOLEAN)) {
        return value;
    }
    if (type.equals(TINYINT)) {
        return ((Number) value).byteValue();
    }
    if (type.equals(SMALLINT)) {
        return ((Number) value).shortValue();
    }
    if (type.equals(INTEGER)) {
        return ((Number) value).intValue();
    }
    if (type.equals(BIGINT)) {
        return ((Number) value).longValue();
    }
    if (type.equals(REAL)) {
        return ((Number) value).floatValue();
    }
    if (type.equals(DOUBLE)) {
        return ((Number) value).doubleValue();
    }
    if (type instanceof VarcharType) {
        return value;
    }
    if (type.equals(VARBINARY)) {
        return ((SqlVarbinary) value).getBytes();
    }
    if (type.equals(DATE)) {
        return Date.ofEpochDay(((SqlDate) value).getDays());
    }
    if (type.equals(TIMESTAMP_MILLIS)) {
        long millis = ((SqlTimestamp) value).getMillis();
        if (format == Format.BINARY) {
            millis = HIVE_STORAGE_TIME_ZONE.convertLocalToUTC(millis, false);
        }
        return Timestamp.ofEpochMilli(millis);
    }
    if (type instanceof DecimalType) {
        return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
    }
    if (type instanceof ArrayType) {
        Type elementType = type.getTypeParameters().get(0);
        return ((List<?>) value).stream().map(element -> preprocessWriteValueOld(format, elementType, element)).collect(toList());
    }
    if (type instanceof MapType) {
        Type keyType = type.getTypeParameters().get(0);
        Type valueType = type.getTypeParameters().get(1);
        Map<Object, Object> newMap = new HashMap<>();
        for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
            newMap.put(preprocessWriteValueOld(format, keyType, entry.getKey()), preprocessWriteValueOld(format, valueType, entry.getValue()));
        }
        return newMap;
    }
    if (type instanceof RowType) {
        List<?> fieldValues = (List<?>) value;
        List<Type> fieldTypes = type.getTypeParameters();
        List<Object> newStruct = new ArrayList<>();
        for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
            newStruct.add(preprocessWriteValueOld(format, fieldTypes.get(fieldId), fieldValues.get(fieldId)));
        }
        return newStruct;
    }
    throw new IllegalArgumentException("unsupported type: " + type);
}
Also used : SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec) PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY(io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY) DateTimeZone(org.joda.time.DateTimeZone) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Writable(org.apache.hadoop.io.Writable) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) Date(org.apache.hadoop.hive.common.type.Date) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) Decimals.rescale(io.trino.spi.type.Decimals.rescale) FileSplit(org.apache.hadoop.mapred.FileSplit) RcFileDecoderUtils.findFirstSyncPosition(io.trino.rcfile.RcFileDecoderUtils.findFirstSyncPosition) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Assert.assertFalse(org.testng.Assert.assertFalse) LazyBinaryArray(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray) IntWritable(org.apache.hadoop.io.IntWritable) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) Set(java.util.Set) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) MICROSECONDS_PER_MILLISECOND(io.trino.type.DateTimes.MICROSECONDS_PER_MILLISECOND) UncheckedIOException(java.io.UncheckedIOException) BooleanWritable(org.apache.hadoop.io.BooleanWritable) RecordReader(org.apache.hadoop.mapred.RecordReader) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Lz4Codec(org.apache.hadoop.io.compress.Lz4Codec) Iterables(com.google.common.collect.Iterables) Slice(io.airlift.slice.Slice) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) StructObject(org.apache.hadoop.hive.serde2.StructObject) Page(io.trino.spi.Page) SqlDecimal(io.trino.spi.type.SqlDecimal) Functions.constant(com.google.common.base.Functions.constant) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) ArrayList(java.util.ArrayList) NONE(io.trino.rcfile.RcFileTester.Compression.NONE) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) Lists(com.google.common.collect.Lists) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) BZIP2(io.trino.rcfile.RcFileTester.Compression.BZIP2) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) HadoopNative(io.trino.hadoop.HadoopNative) LinkedHashSet(java.util.LinkedHashSet) Int128(io.trino.spi.type.Int128) Properties(java.util.Properties) MapType(io.trino.spi.type.MapType) AbstractIterator(com.google.common.collect.AbstractIterator) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) File(java.io.File) NULL(org.apache.hadoop.mapred.Reporter.NULL) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) SqlVarbinary(io.trino.spi.type.SqlVarbinary) SIZE_OF_LONG(io.airlift.slice.SizeOf.SIZE_OF_LONG) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) TINYINT(io.trino.spi.type.TinyintType.TINYINT) BlockBuilder(io.trino.spi.block.BlockBuilder) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) FloatWritable(org.apache.hadoop.io.FloatWritable) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding) DateTimeTestingUtils.sqlTimestampOf(io.trino.testing.DateTimeTestingUtils.sqlTimestampOf) Iterables.transform(com.google.common.collect.Iterables.transform) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) LongWritable(org.apache.hadoop.io.LongWritable) SNAPPY(io.trino.rcfile.RcFileTester.Compression.SNAPPY) TextRcFileEncoding(io.trino.rcfile.text.TextRcFileEncoding) SqlTimestamp(io.trino.spi.type.SqlTimestamp) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Block(io.trino.spi.block.Block) PRESTO_RCFILE_WRITER_VERSION(io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) RowType(io.trino.spi.type.RowType) SIZE_OF_INT(io.airlift.slice.SizeOf.SIZE_OF_INT) ImmutableSet(com.google.common.collect.ImmutableSet) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.nCopies(java.util.Collections.nCopies) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) SESSION(io.trino.testing.TestingConnectorSession.SESSION) ArrayType(io.trino.spi.type.ArrayType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) SqlDate(io.trino.spi.type.SqlDate) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Objects(java.util.Objects) DataSize(io.airlift.units.DataSize) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Decimals(io.trino.spi.type.Decimals) Entry(java.util.Map.Entry) LZ4(io.trino.rcfile.RcFileTester.Compression.LZ4) Optional(java.util.Optional) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) DecimalType(io.trino.spi.type.DecimalType) MAP(io.trino.spi.type.StandardTypes.MAP) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) Assert.assertNull(org.testng.Assert.assertNull) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) Type(io.trino.spi.type.Type) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) DoubleWritable(org.apache.hadoop.io.DoubleWritable) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) VarcharType(io.trino.spi.type.VarcharType) OutputStreamSliceOutput(io.airlift.slice.OutputStreamSliceOutput) COMPRESS_CODEC(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.COMPRESS_CODEC) ImmutableList(com.google.common.collect.ImmutableList) ByteWritable(org.apache.hadoop.io.ByteWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Math.toIntExact(java.lang.Math.toIntExact) Iterator(java.util.Iterator) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) Iterators.advance(com.google.common.collect.Iterators.advance) FileInputStream(java.io.FileInputStream) JobConf(org.apache.hadoop.mapred.JobConf) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) Collectors.toList(java.util.stream.Collectors.toList) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) Serializer(org.apache.hadoop.hive.serde2.Serializer) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Closeable(java.io.Closeable) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) Collections(java.util.Collections) InputStream(java.io.InputStream) ZLIB(io.trino.rcfile.RcFileTester.Compression.ZLIB) VarcharType(io.trino.spi.type.VarcharType) HashMap(java.util.HashMap) SqlVarbinary(io.trino.spi.type.SqlVarbinary) ArrayList(java.util.ArrayList) RowType(io.trino.spi.type.RowType) SqlTimestamp(io.trino.spi.type.SqlTimestamp) MapType(io.trino.spi.type.MapType) ArrayType(io.trino.spi.type.ArrayType) MapType(io.trino.spi.type.MapType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) DecimalType(io.trino.spi.type.DecimalType) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) StructObject(org.apache.hadoop.hive.serde2.StructObject) Map(java.util.Map) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) ImmutableMap(com.google.common.collect.ImmutableMap) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) HashMap(java.util.HashMap)

Example 15 with DOUBLE

use of io.trino.spi.type.DoubleType.DOUBLE in project trino by trinodb.

the class TestComparisonStatsCalculator method symbolToSymbolEqualStats.

@Test
public void symbolToSymbolEqualStats() {
    // z's stats should be unchanged when not involved, except NDV capping to row count
    // Equal ranges
    double rowCount = 2.7;
    assertCalculate(new ComparisonExpression(EQUAL, new SymbolReference("u"), new SymbolReference("w"))).outputRowsCount(rowCount).symbolStats("u", equalTo(capNDV(zeroNullsFraction(uStats), rowCount))).symbolStats("w", equalTo(capNDV(zeroNullsFraction(wStats), rowCount))).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
    // One symbol's range is within the other's
    rowCount = 9.375;
    assertCalculate(new ComparisonExpression(EQUAL, new SymbolReference("x"), new SymbolReference("y"))).outputRowsCount(rowCount).symbolStats("x", symbolAssert -> {
        symbolAssert.averageRowSize(4).lowValue(0).highValue(5).distinctValuesCount(9.375).nullsFraction(0);
    }).symbolStats("y", symbolAssert -> {
        symbolAssert.averageRowSize(4).lowValue(0).highValue(5).distinctValuesCount(9.375).nullsFraction(0);
    }).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
    // Partially overlapping ranges
    rowCount = 16.875;
    assertCalculate(new ComparisonExpression(EQUAL, new SymbolReference("x"), new SymbolReference("w"))).outputRowsCount(rowCount).symbolStats("x", symbolAssert -> {
        symbolAssert.averageRowSize(6).lowValue(0).highValue(10).distinctValuesCount(16.875).nullsFraction(0);
    }).symbolStats("w", symbolAssert -> {
        symbolAssert.averageRowSize(6).lowValue(0).highValue(10).distinctValuesCount(16.875).nullsFraction(0);
    }).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
    // None of the ranges is included in the other, and one symbol has much higher cardinality, so that it has bigger NDV in intersect than the other in total
    rowCount = 2.25;
    assertCalculate(new ComparisonExpression(EQUAL, new SymbolReference("x"), new SymbolReference("u"))).outputRowsCount(rowCount).symbolStats("x", symbolAssert -> {
        symbolAssert.averageRowSize(6).lowValue(0).highValue(10).distinctValuesCount(2.25).nullsFraction(0);
    }).symbolStats("u", symbolAssert -> {
        symbolAssert.averageRowSize(6).lowValue(0).highValue(10).distinctValuesCount(2.25).nullsFraction(0);
    }).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
}
Also used : TypeAnalyzer.createTestingTypeAnalyzer(io.trino.sql.planner.TypeAnalyzer.createTestingTypeAnalyzer) POSITIVE_INFINITY(java.lang.Double.POSITIVE_INFINITY) DoubleType(io.trino.spi.type.DoubleType) Type(io.trino.spi.type.Type) Test(org.testng.annotations.Test) NEGATIVE_INFINITY(java.lang.Double.NEGATIVE_INFINITY) ArrayList(java.util.ArrayList) Cast(io.trino.sql.tree.Cast) VarcharType(io.trino.spi.type.VarcharType) NaN(java.lang.Double.NaN) LongLiteral(io.trino.sql.tree.LongLiteral) GREATER_THAN(io.trino.sql.tree.ComparisonExpression.Operator.GREATER_THAN) Symbol(io.trino.sql.planner.Symbol) StringLiteral(io.trino.sql.tree.StringLiteral) ImmutableMap(com.google.common.collect.ImmutableMap) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) TypeSignatureTranslator.toSqlType(io.trino.sql.analyzer.TypeSignatureTranslator.toSqlType) LESS_THAN(io.trino.sql.tree.ComparisonExpression.Operator.LESS_THAN) Math.min(java.lang.Math.min) ComparisonExpression(io.trino.sql.tree.ComparisonExpression) DoubleLiteral(io.trino.sql.tree.DoubleLiteral) String.format(java.lang.String.format) Collectors.joining(java.util.stream.Collectors.joining) Objects(java.util.Objects) Consumer(java.util.function.Consumer) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) EQUAL(io.trino.sql.tree.ComparisonExpression.Operator.EQUAL) NOT_EQUAL(io.trino.sql.tree.ComparisonExpression.Operator.NOT_EQUAL) List(java.util.List) TestingSession.testSessionBuilder(io.trino.testing.TestingSession.testSessionBuilder) BIGINT(io.trino.spi.type.BigintType.BIGINT) PLANNER_CONTEXT(io.trino.sql.planner.TestingPlannerContext.PLANNER_CONTEXT) SymbolReference(io.trino.sql.tree.SymbolReference) Double.isNaN(java.lang.Double.isNaN) TypeProvider(io.trino.sql.planner.TypeProvider) Expression(io.trino.sql.tree.Expression) Session(io.trino.Session) ComparisonExpression(io.trino.sql.tree.ComparisonExpression) SymbolReference(io.trino.sql.tree.SymbolReference) Test(org.testng.annotations.Test)

Aggregations

DOUBLE (io.trino.spi.type.DoubleType.DOUBLE)32 BIGINT (io.trino.spi.type.BigintType.BIGINT)28 List (java.util.List)23 Optional (java.util.Optional)22 ImmutableList (com.google.common.collect.ImmutableList)21 ImmutableMap (com.google.common.collect.ImmutableMap)19 Type (io.trino.spi.type.Type)18 Test (org.testng.annotations.Test)18 String.format (java.lang.String.format)17 Map (java.util.Map)17 INTEGER (io.trino.spi.type.IntegerType.INTEGER)15 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)14 DATE (io.trino.spi.type.DateType.DATE)13 SMALLINT (io.trino.spi.type.SmallintType.SMALLINT)13 TINYINT (io.trino.spi.type.TinyintType.TINYINT)13 VarcharType (io.trino.spi.type.VarcharType)13 ArrayList (java.util.ArrayList)13 REAL (io.trino.spi.type.RealType.REAL)12 Slice (io.airlift.slice.Slice)11 BOOLEAN (io.trino.spi.type.BooleanType.BOOLEAN)11