Search in sources :

Example 41 with BIGINT

use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.

the class TestOrcReaderPositions method testRowGroupSkipping.

@Test
public void testRowGroupSkipping() throws Exception {
    try (TempFile tempFile = new TempFile()) {
        // create single strip file with multiple row groups
        int rowCount = 142_000;
        createSequentialFile(tempFile.getFile(), rowCount);
        // test reading two row groups from middle of file
        OrcPredicate predicate = (numberOfRows, allColumnStatistics) -> {
            if (numberOfRows == rowCount) {
                return true;
            }
            IntegerStatistics stats = allColumnStatistics.get(new OrcColumnId(1)).getIntegerStatistics();
            return (stats.getMin() == 50_000) || (stats.getMin() == 60_000);
        };
        try (OrcRecordReader reader = createCustomOrcRecordReader(tempFile, predicate, BIGINT, MAX_BATCH_SIZE)) {
            assertEquals(reader.getFileRowCount(), rowCount);
            assertEquals(reader.getReaderRowCount(), rowCount);
            assertEquals(reader.getFilePosition(), 0);
            assertEquals(reader.getReaderPosition(), 0);
            long position = 50_000;
            while (true) {
                Page page = reader.nextPage();
                if (page == null) {
                    break;
                }
                page = page.getLoadedPage();
                Block block = page.getBlock(0);
                for (int i = 0; i < block.getPositionCount(); i++) {
                    assertEquals(BIGINT.getLong(block, i), position + i);
                }
                assertEquals(reader.getFilePosition(), position);
                assertEquals(reader.getReaderPosition(), position);
                position += page.getPositionCount();
            }
            assertEquals(position, 70_000);
            assertEquals(reader.getFilePosition(), rowCount);
            assertEquals(reader.getReaderPosition(), rowCount);
        }
    }
}
Also used : OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) Slice(io.airlift.slice.Slice) Assert.assertNull(org.testng.Assert.assertNull) IntegerStatistics(io.trino.orc.metadata.statistics.IntegerStatistics) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) ByteBuffer(java.nio.ByteBuffer) Writer(org.apache.hadoop.hive.ql.io.orc.Writer) BATCH_SIZE_GROWTH_FACTOR(io.trino.orc.OrcReader.BATCH_SIZE_GROWTH_FACTOR) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) Block(io.trino.spi.block.Block) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) READER_OPTIONS(io.trino.orc.OrcTester.READER_OPTIONS) ORC_12(io.trino.orc.OrcTester.Format.ORC_12) NullMemoryManager(org.apache.orc.NullMemoryManager) ImmutableMap(com.google.common.collect.ImmutableMap) UTF_8(java.nio.charset.StandardCharsets.UTF_8) SNAPPY(org.apache.hadoop.hive.ql.io.orc.CompressionKind.SNAPPY) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) Assert.fail(org.testng.Assert.fail) IOException(java.io.IOException) OrcTester.createCustomOrcRecordReader(io.trino.orc.OrcTester.createCustomOrcRecordReader) OrcTester.createSettableStructObjectInspector(io.trino.orc.OrcTester.createSettableStructObjectInspector) Math.min(java.lang.Math.min) Field(java.lang.reflect.Field) Maps(com.google.common.collect.Maps) CompressionKind(io.trino.orc.metadata.CompressionKind) File(java.io.File) Footer(io.trino.orc.metadata.Footer) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) MAX_BATCH_SIZE(io.trino.orc.OrcReader.MAX_BATCH_SIZE) BIGINT(io.trino.spi.type.BigintType.BIGINT) Serializer(org.apache.hadoop.hive.serde2.Serializer) OrcTester.createOrcRecordWriter(io.trino.orc.OrcTester.createOrcRecordWriter) Assert.assertTrue(org.testng.Assert.assertTrue) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) OrcColumnId(io.trino.orc.metadata.OrcColumnId) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) OrcColumnId(io.trino.orc.metadata.OrcColumnId) Block(io.trino.spi.block.Block) Page(io.trino.spi.Page) OrcTester.createCustomOrcRecordReader(io.trino.orc.OrcTester.createCustomOrcRecordReader) IntegerStatistics(io.trino.orc.metadata.statistics.IntegerStatistics) Test(org.testng.annotations.Test)

Example 42 with BIGINT

use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.

the class TestOrcReaderPositions method testStripeSkipping.

@Test
public void testStripeSkipping() throws Exception {
    try (TempFile tempFile = new TempFile()) {
        createMultiStripeFile(tempFile.getFile());
        // test reading second and fourth stripes
        OrcPredicate predicate = (numberOfRows, allColumnStatistics) -> {
            if (numberOfRows == 100) {
                return true;
            }
            IntegerStatistics stats = allColumnStatistics.get(new OrcColumnId(1)).getIntegerStatistics();
            return ((stats.getMin() == 60) && (stats.getMax() == 117)) || ((stats.getMin() == 180) && (stats.getMax() == 237));
        };
        try (OrcRecordReader reader = createCustomOrcRecordReader(tempFile, predicate, BIGINT, MAX_BATCH_SIZE)) {
            assertEquals(reader.getFileRowCount(), 100);
            assertEquals(reader.getReaderRowCount(), 40);
            assertEquals(reader.getFilePosition(), 0);
            assertEquals(reader.getReaderPosition(), 0);
            // second stripe
            Page page = reader.nextPage().getLoadedPage();
            assertEquals(page.getPositionCount(), 20);
            assertEquals(reader.getReaderPosition(), 0);
            assertEquals(reader.getFilePosition(), 20);
            assertCurrentBatch(page, 1);
            // fourth stripe
            page = reader.nextPage().getLoadedPage();
            assertEquals(page.getPositionCount(), 20);
            assertEquals(reader.getReaderPosition(), 20);
            assertEquals(reader.getFilePosition(), 60);
            assertCurrentBatch(page, 3);
            page = reader.nextPage();
            assertNull(page);
            assertEquals(reader.getReaderPosition(), 40);
            assertEquals(reader.getFilePosition(), 100);
        }
    }
}
Also used : OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) Slice(io.airlift.slice.Slice) Assert.assertNull(org.testng.Assert.assertNull) IntegerStatistics(io.trino.orc.metadata.statistics.IntegerStatistics) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) ByteBuffer(java.nio.ByteBuffer) Writer(org.apache.hadoop.hive.ql.io.orc.Writer) BATCH_SIZE_GROWTH_FACTOR(io.trino.orc.OrcReader.BATCH_SIZE_GROWTH_FACTOR) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) Block(io.trino.spi.block.Block) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) READER_OPTIONS(io.trino.orc.OrcTester.READER_OPTIONS) ORC_12(io.trino.orc.OrcTester.Format.ORC_12) NullMemoryManager(org.apache.orc.NullMemoryManager) ImmutableMap(com.google.common.collect.ImmutableMap) UTF_8(java.nio.charset.StandardCharsets.UTF_8) SNAPPY(org.apache.hadoop.hive.ql.io.orc.CompressionKind.SNAPPY) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) Assert.fail(org.testng.Assert.fail) IOException(java.io.IOException) OrcTester.createCustomOrcRecordReader(io.trino.orc.OrcTester.createCustomOrcRecordReader) OrcTester.createSettableStructObjectInspector(io.trino.orc.OrcTester.createSettableStructObjectInspector) Math.min(java.lang.Math.min) Field(java.lang.reflect.Field) Maps(com.google.common.collect.Maps) CompressionKind(io.trino.orc.metadata.CompressionKind) File(java.io.File) Footer(io.trino.orc.metadata.Footer) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) MAX_BATCH_SIZE(io.trino.orc.OrcReader.MAX_BATCH_SIZE) BIGINT(io.trino.spi.type.BigintType.BIGINT) Serializer(org.apache.hadoop.hive.serde2.Serializer) OrcTester.createOrcRecordWriter(io.trino.orc.OrcTester.createOrcRecordWriter) Assert.assertTrue(org.testng.Assert.assertTrue) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) OrcColumnId(io.trino.orc.metadata.OrcColumnId) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) OrcColumnId(io.trino.orc.metadata.OrcColumnId) Page(io.trino.spi.Page) OrcTester.createCustomOrcRecordReader(io.trino.orc.OrcTester.createCustomOrcRecordReader) IntegerStatistics(io.trino.orc.metadata.statistics.IntegerStatistics) Test(org.testng.annotations.Test)

Example 43 with BIGINT

use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.

the class RcFileTester method preprocessWriteValueOld.

private static Object preprocessWriteValueOld(Format format, Type type, Object value) {
    if (value == null) {
        return null;
    }
    if (type.equals(BOOLEAN)) {
        return value;
    }
    if (type.equals(TINYINT)) {
        return ((Number) value).byteValue();
    }
    if (type.equals(SMALLINT)) {
        return ((Number) value).shortValue();
    }
    if (type.equals(INTEGER)) {
        return ((Number) value).intValue();
    }
    if (type.equals(BIGINT)) {
        return ((Number) value).longValue();
    }
    if (type.equals(REAL)) {
        return ((Number) value).floatValue();
    }
    if (type.equals(DOUBLE)) {
        return ((Number) value).doubleValue();
    }
    if (type instanceof VarcharType) {
        return value;
    }
    if (type.equals(VARBINARY)) {
        return ((SqlVarbinary) value).getBytes();
    }
    if (type.equals(DATE)) {
        return Date.ofEpochDay(((SqlDate) value).getDays());
    }
    if (type.equals(TIMESTAMP_MILLIS)) {
        long millis = ((SqlTimestamp) value).getMillis();
        if (format == Format.BINARY) {
            millis = HIVE_STORAGE_TIME_ZONE.convertLocalToUTC(millis, false);
        }
        return Timestamp.ofEpochMilli(millis);
    }
    if (type instanceof DecimalType) {
        return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
    }
    if (type instanceof ArrayType) {
        Type elementType = type.getTypeParameters().get(0);
        return ((List<?>) value).stream().map(element -> preprocessWriteValueOld(format, elementType, element)).collect(toList());
    }
    if (type instanceof MapType) {
        Type keyType = type.getTypeParameters().get(0);
        Type valueType = type.getTypeParameters().get(1);
        Map<Object, Object> newMap = new HashMap<>();
        for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
            newMap.put(preprocessWriteValueOld(format, keyType, entry.getKey()), preprocessWriteValueOld(format, valueType, entry.getValue()));
        }
        return newMap;
    }
    if (type instanceof RowType) {
        List<?> fieldValues = (List<?>) value;
        List<Type> fieldTypes = type.getTypeParameters();
        List<Object> newStruct = new ArrayList<>();
        for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
            newStruct.add(preprocessWriteValueOld(format, fieldTypes.get(fieldId), fieldValues.get(fieldId)));
        }
        return newStruct;
    }
    throw new IllegalArgumentException("unsupported type: " + type);
}
Also used : SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec) PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY(io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY) DateTimeZone(org.joda.time.DateTimeZone) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Writable(org.apache.hadoop.io.Writable) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) Date(org.apache.hadoop.hive.common.type.Date) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) Decimals.rescale(io.trino.spi.type.Decimals.rescale) FileSplit(org.apache.hadoop.mapred.FileSplit) RcFileDecoderUtils.findFirstSyncPosition(io.trino.rcfile.RcFileDecoderUtils.findFirstSyncPosition) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Assert.assertFalse(org.testng.Assert.assertFalse) LazyBinaryArray(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray) IntWritable(org.apache.hadoop.io.IntWritable) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) Set(java.util.Set) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) MICROSECONDS_PER_MILLISECOND(io.trino.type.DateTimes.MICROSECONDS_PER_MILLISECOND) UncheckedIOException(java.io.UncheckedIOException) BooleanWritable(org.apache.hadoop.io.BooleanWritable) RecordReader(org.apache.hadoop.mapred.RecordReader) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Lz4Codec(org.apache.hadoop.io.compress.Lz4Codec) Iterables(com.google.common.collect.Iterables) Slice(io.airlift.slice.Slice) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) StructObject(org.apache.hadoop.hive.serde2.StructObject) Page(io.trino.spi.Page) SqlDecimal(io.trino.spi.type.SqlDecimal) Functions.constant(com.google.common.base.Functions.constant) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) ArrayList(java.util.ArrayList) NONE(io.trino.rcfile.RcFileTester.Compression.NONE) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) Lists(com.google.common.collect.Lists) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) BZIP2(io.trino.rcfile.RcFileTester.Compression.BZIP2) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) HadoopNative(io.trino.hadoop.HadoopNative) LinkedHashSet(java.util.LinkedHashSet) Int128(io.trino.spi.type.Int128) Properties(java.util.Properties) MapType(io.trino.spi.type.MapType) AbstractIterator(com.google.common.collect.AbstractIterator) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) File(java.io.File) NULL(org.apache.hadoop.mapred.Reporter.NULL) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) SqlVarbinary(io.trino.spi.type.SqlVarbinary) SIZE_OF_LONG(io.airlift.slice.SizeOf.SIZE_OF_LONG) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) TINYINT(io.trino.spi.type.TinyintType.TINYINT) BlockBuilder(io.trino.spi.block.BlockBuilder) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) FloatWritable(org.apache.hadoop.io.FloatWritable) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding) DateTimeTestingUtils.sqlTimestampOf(io.trino.testing.DateTimeTestingUtils.sqlTimestampOf) Iterables.transform(com.google.common.collect.Iterables.transform) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) LongWritable(org.apache.hadoop.io.LongWritable) SNAPPY(io.trino.rcfile.RcFileTester.Compression.SNAPPY) TextRcFileEncoding(io.trino.rcfile.text.TextRcFileEncoding) SqlTimestamp(io.trino.spi.type.SqlTimestamp) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Block(io.trino.spi.block.Block) PRESTO_RCFILE_WRITER_VERSION(io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) RowType(io.trino.spi.type.RowType) SIZE_OF_INT(io.airlift.slice.SizeOf.SIZE_OF_INT) ImmutableSet(com.google.common.collect.ImmutableSet) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.nCopies(java.util.Collections.nCopies) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) SESSION(io.trino.testing.TestingConnectorSession.SESSION) ArrayType(io.trino.spi.type.ArrayType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) SqlDate(io.trino.spi.type.SqlDate) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Objects(java.util.Objects) DataSize(io.airlift.units.DataSize) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Decimals(io.trino.spi.type.Decimals) Entry(java.util.Map.Entry) LZ4(io.trino.rcfile.RcFileTester.Compression.LZ4) Optional(java.util.Optional) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) DecimalType(io.trino.spi.type.DecimalType) MAP(io.trino.spi.type.StandardTypes.MAP) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) Assert.assertNull(org.testng.Assert.assertNull) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) Type(io.trino.spi.type.Type) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) DoubleWritable(org.apache.hadoop.io.DoubleWritable) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) VarcharType(io.trino.spi.type.VarcharType) OutputStreamSliceOutput(io.airlift.slice.OutputStreamSliceOutput) COMPRESS_CODEC(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.COMPRESS_CODEC) ImmutableList(com.google.common.collect.ImmutableList) ByteWritable(org.apache.hadoop.io.ByteWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Math.toIntExact(java.lang.Math.toIntExact) Iterator(java.util.Iterator) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) Iterators.advance(com.google.common.collect.Iterators.advance) FileInputStream(java.io.FileInputStream) JobConf(org.apache.hadoop.mapred.JobConf) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) Collectors.toList(java.util.stream.Collectors.toList) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) Serializer(org.apache.hadoop.hive.serde2.Serializer) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Closeable(java.io.Closeable) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) Collections(java.util.Collections) InputStream(java.io.InputStream) ZLIB(io.trino.rcfile.RcFileTester.Compression.ZLIB) VarcharType(io.trino.spi.type.VarcharType) HashMap(java.util.HashMap) SqlVarbinary(io.trino.spi.type.SqlVarbinary) ArrayList(java.util.ArrayList) RowType(io.trino.spi.type.RowType) SqlTimestamp(io.trino.spi.type.SqlTimestamp) MapType(io.trino.spi.type.MapType) ArrayType(io.trino.spi.type.ArrayType) MapType(io.trino.spi.type.MapType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) DecimalType(io.trino.spi.type.DecimalType) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) StructObject(org.apache.hadoop.hive.serde2.StructObject) Map(java.util.Map) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) ImmutableMap(com.google.common.collect.ImmutableMap) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) HashMap(java.util.HashMap)

Example 44 with BIGINT

use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.

the class TestDefaultJdbcMetadata method testAggregationPushdownForTableHandle.

@Test
public void testAggregationPushdownForTableHandle() {
    ConnectorSession session = TestingConnectorSession.builder().setPropertyMetadata(new JdbcMetadataSessionProperties(new JdbcMetadataConfig().setAggregationPushdownEnabled(true), Optional.empty()).getSessionProperties()).build();
    ColumnHandle groupByColumn = metadata.getColumnHandles(session, tableHandle).get("text");
    Function<ConnectorTableHandle, Optional<AggregationApplicationResult<ConnectorTableHandle>>> applyAggregation = handle -> metadata.applyAggregation(session, handle, ImmutableList.of(new AggregateFunction("count", BIGINT, List.of(), List.of(), false, Optional.empty())), ImmutableMap.of(), ImmutableList.of(ImmutableList.of(groupByColumn)));
    ConnectorTableHandle baseTableHandle = metadata.getTableHandle(session, new SchemaTableName("example", "numbers"));
    Optional<AggregationApplicationResult<ConnectorTableHandle>> aggregationResult = applyAggregation.apply(baseTableHandle);
    assertThat(aggregationResult).isPresent();
    SchemaTableName noAggregationPushdownTable = new SchemaTableName("example", "no_aggregation_pushdown");
    metadata.createTable(SESSION, new ConnectorTableMetadata(noAggregationPushdownTable, ImmutableList.of(new ColumnMetadata("text", VARCHAR))), false);
    ConnectorTableHandle noAggregationPushdownTableHandle = metadata.getTableHandle(session, noAggregationPushdownTable);
    aggregationResult = applyAggregation.apply(noAggregationPushdownTableHandle);
    assertThat(aggregationResult).isEmpty();
}
Also used : Constraint(io.trino.spi.connector.Constraint) Assert.assertNull(org.testng.Assert.assertNull) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) AggregateFunction(io.trino.spi.connector.AggregateFunction) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Assert.assertEquals(org.testng.Assert.assertEquals) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Test(org.testng.annotations.Test) AfterMethod(org.testng.annotations.AfterMethod) TrinoExceptionAssert.assertTrinoExceptionThrownBy(io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy) Function(java.util.function.Function) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) ColumnHandle(io.trino.spi.connector.ColumnHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) NOT_FOUND(io.trino.spi.StandardErrorCode.NOT_FOUND) JDBC_VARCHAR(io.trino.plugin.jdbc.TestingJdbcTypeHandle.JDBC_VARCHAR) Collections.emptyMap(java.util.Collections.emptyMap) ImmutableSet(com.google.common.collect.ImmutableSet) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) ImmutableMap(com.google.common.collect.ImmutableMap) Domain(io.trino.spi.predicate.Domain) BeforeMethod(org.testng.annotations.BeforeMethod) JDBC_BIGINT(io.trino.plugin.jdbc.TestingJdbcTypeHandle.JDBC_BIGINT) SESSION(io.trino.testing.TestingConnectorSession.SESSION) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) SchemaTableName(io.trino.spi.connector.SchemaTableName) AggregationApplicationResult(io.trino.spi.connector.AggregationApplicationResult) TestingConnectorSession(io.trino.testing.TestingConnectorSession) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) Assert.assertTrue(org.testng.Assert.assertTrue) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) ColumnHandle(io.trino.spi.connector.ColumnHandle) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) Optional(java.util.Optional) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) AggregateFunction(io.trino.spi.connector.AggregateFunction) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) AggregationApplicationResult(io.trino.spi.connector.AggregationApplicationResult) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Test(org.testng.annotations.Test)

Example 45 with BIGINT

use of io.trino.spi.type.BigintType.BIGINT in project trino by trinodb.

the class TestHashJoinOperator method testProbeOuterJoinWithFilterFunction.

@Test(dataProvider = "hashJoinTestValues")
public void testProbeOuterJoinWithFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) {
    TaskContext taskContext = createTaskContext();
    InternalJoinFilterFunction filterFunction = new TestInternalJoinFilterFunction(((leftPosition, leftPage, rightPosition, rightPage) -> BIGINT.getLong(rightPage.getBlock(1), rightPosition) >= 1025));
    // build factory
    List<Type> buildTypes = ImmutableList.of(VARCHAR, BIGINT, BIGINT);
    RowPagesBuilder buildPages = rowPagesBuilder(buildHashEnabled, Ints.asList(0), ImmutableList.of(VARCHAR, BIGINT, BIGINT)).addSequencePage(10, 20, 30, 40);
    BuildSideSetup buildSideSetup = setupBuildSide(nodePartitioningManager, parallelBuild, taskContext, buildPages, Optional.of(filterFunction), false, SINGLE_STREAM_SPILLER_FACTORY);
    JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactory = buildSideSetup.getLookupSourceFactoryManager();
    // probe factory
    List<Type> probeTypes = ImmutableList.of(VARCHAR, BIGINT, BIGINT);
    RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), probeTypes);
    List<Page> probeInput = probePages.addSequencePage(15, 20, 1020, 2020).build();
    OperatorFactory joinOperatorFactory = probeOuterJoinOperatorFactory(lookupSourceFactory, probePages, true);
    // build drivers and operators
    instantiateBuildDrivers(buildSideSetup, taskContext);
    buildLookupSource(executor, buildSideSetup);
    // expected
    MaterializedResult expected = MaterializedResult.resultBuilder(taskContext.getSession(), concat(probeTypes, buildTypes)).row("20", 1020L, 2020L, null, null, null).row("21", 1021L, 2021L, null, null, null).row("22", 1022L, 2022L, null, null, null).row("23", 1023L, 2023L, null, null, null).row("24", 1024L, 2024L, null, null, null).row("25", 1025L, 2025L, "25", 35L, 45L).row("26", 1026L, 2026L, "26", 36L, 46L).row("27", 1027L, 2027L, "27", 37L, 47L).row("28", 1028L, 2028L, "28", 38L, 48L).row("29", 1029L, 2029L, "29", 39L, 49L).row("30", 1030L, 2030L, null, null, null).row("31", 1031L, 2031L, null, null, null).row("32", 1032L, 2032L, null, null, null).row("33", 1033L, 2033L, null, null, null).row("34", 1034L, 2034L, null, null, null).build();
    assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages));
}
Also used : OperatorAssertion.assertOperatorEquals(io.trino.operator.OperatorAssertion.assertOperatorEquals) Arrays(java.util.Arrays) Test(org.testng.annotations.Test) TypeOperators(io.trino.spi.type.TypeOperators) TaskStateMachine(io.trino.execution.TaskStateMachine) AfterMethod(org.testng.annotations.AfterMethod) OperatorContext(io.trino.operator.OperatorContext) Collections.singletonList(java.util.Collections.singletonList) NodeScheduler(io.trino.execution.scheduler.NodeScheduler) FinalizerService(io.trino.util.FinalizerService) ExceededMemoryLimitException(io.trino.ExceededMemoryLimitException) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) Executors.newScheduledThreadPool(java.util.concurrent.Executors.newScheduledThreadPool) BuildSideSetup(io.trino.operator.join.JoinTestUtils.BuildSideSetup) Arrays.asList(java.util.Arrays.asList) Slices(io.airlift.slice.Slices) WorkProcessor(io.trino.operator.WorkProcessor) TEST_SESSION(io.trino.SessionTestUtils.TEST_SESSION) ProcessState.finished(io.trino.operator.WorkProcessor.ProcessState.finished) Assert.assertFalse(org.testng.Assert.assertFalse) Assert.assertEquals(io.trino.testing.assertions.Assert.assertEquals) TestInternalJoinFilterFunction(io.trino.operator.join.JoinTestUtils.TestInternalJoinFilterFunction) NodeSchedulerConfig(io.trino.execution.scheduler.NodeSchedulerConfig) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoOperatorFactories(io.trino.operator.TrinoOperatorFactories) WorkProcessorOperatorFactory(io.trino.operator.WorkProcessorOperatorFactory) ProcessState.ofResult(io.trino.operator.WorkProcessor.ProcessState.ofResult) RowPagesBuilder(io.trino.RowPagesBuilder) OperatorAssertion.without(io.trino.operator.OperatorAssertion.without) InMemoryNodeManager(io.trino.metadata.InMemoryNodeManager) ValuesOperatorFactory(io.trino.operator.ValuesOperator.ValuesOperatorFactory) Iterables(com.google.common.collect.Iterables) StageId(io.trino.execution.StageId) Page(io.trino.spi.Page) NodePartitioningManager(io.trino.sql.planner.NodePartitioningManager) JoinTestUtils.getHashChannelAsInt(io.trino.operator.join.JoinTestUtils.getHashChannelAsInt) ArrayList(java.util.ArrayList) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) WorkProcessorOperator(io.trino.operator.WorkProcessorOperator) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Lifespan(io.trino.execution.Lifespan) BlockTypeOperators(io.trino.type.BlockTypeOperators) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) JoinTestUtils.instantiateBuildDrivers(io.trino.operator.join.JoinTestUtils.instantiateBuildDrivers) Driver(io.trino.operator.Driver) NodeTaskMap(io.trino.execution.NodeTaskMap) MaterializedResult(io.trino.testing.MaterializedResult) PageBufferOperatorFactory(io.trino.operator.index.PageBufferOperator.PageBufferOperatorFactory) JoinTestUtils.innerJoinOperatorFactory(io.trino.operator.join.JoinTestUtils.innerJoinOperatorFactory) LazyBlock(io.trino.spi.block.LazyBlock) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RowPagesBuilder.rowPagesBuilder(io.trino.RowPagesBuilder.rowPagesBuilder) INTEGER(io.trino.spi.type.IntegerType.INTEGER) OperatorAssertion.dropChannel(io.trino.operator.OperatorAssertion.dropChannel) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) ImmutableSet(com.google.common.collect.ImmutableSet) Operator(io.trino.operator.Operator) SynchronousQueue(java.util.concurrent.SynchronousQueue) Collections.nCopies(java.util.Collections.nCopies) BeforeMethod(org.testng.annotations.BeforeMethod) Assert.assertNotNull(org.testng.Assert.assertNotNull) TaskId(io.trino.execution.TaskId) UniformNodeSelectorFactory(io.trino.execution.scheduler.UniformNodeSelectorFactory) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) List(java.util.List) DriverContext(io.trino.operator.DriverContext) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) JoinTestUtils.setupBuildSide(io.trino.operator.join.JoinTestUtils.setupBuildSide) OperatorFactories(io.trino.operator.OperatorFactories) PageBuffer(io.trino.operator.index.PageBuffer) ProcessorContext(io.trino.operator.ProcessorContext) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) DataProvider(org.testng.annotations.DataProvider) Assert.assertNull(org.testng.Assert.assertNull) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) Type(io.trino.spi.type.Type) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) JoinTestUtils.runDriverInThread(io.trino.operator.join.JoinTestUtils.runDriverInThread) OptionalInt(java.util.OptionalInt) TestingTaskContext(io.trino.testing.TestingTaskContext) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) Objects.requireNonNull(java.util.Objects.requireNonNull) TaskContext(io.trino.operator.TaskContext) GenericPartitioningSpillerFactory(io.trino.spiller.GenericPartitioningSpillerFactory) ExecutorService(java.util.concurrent.ExecutorService) OperatorFactory(io.trino.operator.OperatorFactory) OperatorAssertion(io.trino.operator.OperatorAssertion) SingleStreamSpillerFactory(io.trino.spiller.SingleStreamSpillerFactory) Ints(com.google.common.primitives.Ints) TimeUnit(java.util.concurrent.TimeUnit) PartitioningSpillerFactory(io.trino.spiller.PartitioningSpillerFactory) Assertions.assertEqualsIgnoreOrder(io.airlift.testing.Assertions.assertEqualsIgnoreOrder) Collectors.toList(java.util.stream.Collectors.toList) DummySpillerFactory(io.trino.operator.join.JoinTestUtils.DummySpillerFactory) JoinTestUtils.buildLookupSource(io.trino.operator.join.JoinTestUtils.buildLookupSource) Assert.assertTrue(org.testng.Assert.assertTrue) SECONDS(java.util.concurrent.TimeUnit.SECONDS) TestingTaskContext(io.trino.testing.TestingTaskContext) TaskContext(io.trino.operator.TaskContext) TestInternalJoinFilterFunction(io.trino.operator.join.JoinTestUtils.TestInternalJoinFilterFunction) RowPagesBuilder(io.trino.RowPagesBuilder) Page(io.trino.spi.Page) TestInternalJoinFilterFunction(io.trino.operator.join.JoinTestUtils.TestInternalJoinFilterFunction) Type(io.trino.spi.type.Type) BuildSideSetup(io.trino.operator.join.JoinTestUtils.BuildSideSetup) WorkProcessorOperatorFactory(io.trino.operator.WorkProcessorOperatorFactory) ValuesOperatorFactory(io.trino.operator.ValuesOperator.ValuesOperatorFactory) PageBufferOperatorFactory(io.trino.operator.index.PageBufferOperator.PageBufferOperatorFactory) JoinTestUtils.innerJoinOperatorFactory(io.trino.operator.join.JoinTestUtils.innerJoinOperatorFactory) OperatorFactory(io.trino.operator.OperatorFactory) MaterializedResult(io.trino.testing.MaterializedResult) Test(org.testng.annotations.Test)

Aggregations

BIGINT (io.trino.spi.type.BigintType.BIGINT)106 ImmutableList (com.google.common.collect.ImmutableList)99 Optional (java.util.Optional)87 Test (org.testng.annotations.Test)86 ImmutableMap (com.google.common.collect.ImmutableMap)84 VARCHAR (io.trino.spi.type.VarcharType.VARCHAR)44 List (java.util.List)44 Map (java.util.Map)44 ColumnHandle (io.trino.spi.connector.ColumnHandle)38 Type (io.trino.spi.type.Type)38 Symbol (io.trino.sql.planner.Symbol)38 QualifiedName (io.trino.sql.tree.QualifiedName)38 ImmutableSet (com.google.common.collect.ImmutableSet)37 TupleDomain (io.trino.spi.predicate.TupleDomain)36 PlanMatchPattern.values (io.trino.sql.planner.assertions.PlanMatchPattern.values)36 TableHandle (io.trino.metadata.TableHandle)35 BaseRuleTest (io.trino.sql.planner.iterative.rule.test.BaseRuleTest)35 Session (io.trino.Session)34 PlanMatchPattern.filter (io.trino.sql.planner.assertions.PlanMatchPattern.filter)33 PlanBuilder.expression (io.trino.sql.planner.iterative.rule.test.PlanBuilder.expression)32