Search in sources :

Example 26 with RowType

use of io.trino.spi.type.RowType in project trino by trinodb.

the class OrcTester method testStructRoundTrip.

private void testStructRoundTrip(Type type, List<?> values) throws Exception {
    Type rowType = rowType(type, type, type);
    // values in simple struct
    testRoundTripType(rowType, values.stream().map(OrcTester::toHiveStruct).collect(toList()));
    if (structuralNullTestsEnabled) {
        // values and nulls in simple struct
        testRoundTripType(rowType, insertNullEvery(5, values).stream().map(OrcTester::toHiveStruct).collect(toList()));
        // all null values in simple struct
        testRoundTripType(rowType, values.stream().map(value -> toHiveStruct(null)).collect(toList()));
    }
    if (missingStructFieldsTestsEnabled) {
        Type readType = rowType(type, type, type, type, type, type);
        Type writeType = rowType(type, type, type);
        List<?> writeValues = values.stream().map(OrcTester::toHiveStruct).collect(toList());
        List<?> readValues = values.stream().map(OrcTester::toHiveStructWithNull).collect(toList());
        assertRoundTrip(writeType, readType, writeValues, readValues);
    }
}
Also used : TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) OrcType(io.trino.orc.metadata.OrcType) MapType(io.trino.spi.type.MapType) VarbinaryType(io.trino.spi.type.VarbinaryType) CharType(io.trino.spi.type.CharType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType)

Example 27 with RowType

use of io.trino.spi.type.RowType in project trino by trinodb.

the class TestOrcWithoutRowGroupInfo method testAndVerifyResults.

private void testAndVerifyResults(OrcPredicate orcPredicate) throws Exception {
    // this file was written by minor compaction in hive
    File file = new File(getResource("orcFileWithoutRowGroupInfo.orc").toURI());
    OrcReader orcReader = createOrcReader(new FileOrcDataSource(file, new OrcReaderOptions()), new OrcReaderOptions()).orElseThrow();
    assertEquals(orcReader.getFooter().getNumberOfRows(), 2);
    assertEquals(orcReader.getFooter().getRowsInRowGroup(), OptionalInt.empty());
    RowType rowType = RowType.from(ImmutableList.of(RowType.field("a", BIGINT)));
    OrcRecordReader reader = orcReader.createRecordReader(orcReader.getRootColumn().getNestedColumns(), ImmutableList.of(INTEGER, BIGINT, INTEGER, BIGINT, BIGINT, rowType), orcPredicate, DateTimeZone.UTC, newSimpleAggregatedMemoryContext(), INITIAL_BATCH_SIZE, RuntimeException::new);
    int rows = 0;
    while (true) {
        Page page = reader.nextPage();
        if (page == null) {
            break;
        }
        page = page.getLoadedPage();
        rows += page.getPositionCount();
        Block rowBlock = page.getBlock(5);
        for (int position = 0; position < page.getPositionCount(); position++) {
            BIGINT.getLong(rowType.getObject(rowBlock, 0), 0);
        }
    }
    assertEquals(rows, reader.getFileRowCount());
}
Also used : OrcReader.createOrcReader(io.trino.orc.OrcReader.createOrcReader) RowType(io.trino.spi.type.RowType) Block(io.trino.spi.block.Block) Page(io.trino.spi.Page) File(java.io.File)

Example 28 with RowType

use of io.trino.spi.type.RowType in project trino by trinodb.

the class RcFileTester method preprocessWriteValueOld.

private static Object preprocessWriteValueOld(Format format, Type type, Object value) {
    if (value == null) {
        return null;
    }
    if (type.equals(BOOLEAN)) {
        return value;
    }
    if (type.equals(TINYINT)) {
        return ((Number) value).byteValue();
    }
    if (type.equals(SMALLINT)) {
        return ((Number) value).shortValue();
    }
    if (type.equals(INTEGER)) {
        return ((Number) value).intValue();
    }
    if (type.equals(BIGINT)) {
        return ((Number) value).longValue();
    }
    if (type.equals(REAL)) {
        return ((Number) value).floatValue();
    }
    if (type.equals(DOUBLE)) {
        return ((Number) value).doubleValue();
    }
    if (type instanceof VarcharType) {
        return value;
    }
    if (type.equals(VARBINARY)) {
        return ((SqlVarbinary) value).getBytes();
    }
    if (type.equals(DATE)) {
        return Date.ofEpochDay(((SqlDate) value).getDays());
    }
    if (type.equals(TIMESTAMP_MILLIS)) {
        long millis = ((SqlTimestamp) value).getMillis();
        if (format == Format.BINARY) {
            millis = HIVE_STORAGE_TIME_ZONE.convertLocalToUTC(millis, false);
        }
        return Timestamp.ofEpochMilli(millis);
    }
    if (type instanceof DecimalType) {
        return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
    }
    if (type instanceof ArrayType) {
        Type elementType = type.getTypeParameters().get(0);
        return ((List<?>) value).stream().map(element -> preprocessWriteValueOld(format, elementType, element)).collect(toList());
    }
    if (type instanceof MapType) {
        Type keyType = type.getTypeParameters().get(0);
        Type valueType = type.getTypeParameters().get(1);
        Map<Object, Object> newMap = new HashMap<>();
        for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
            newMap.put(preprocessWriteValueOld(format, keyType, entry.getKey()), preprocessWriteValueOld(format, valueType, entry.getValue()));
        }
        return newMap;
    }
    if (type instanceof RowType) {
        List<?> fieldValues = (List<?>) value;
        List<Type> fieldTypes = type.getTypeParameters();
        List<Object> newStruct = new ArrayList<>();
        for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
            newStruct.add(preprocessWriteValueOld(format, fieldTypes.get(fieldId), fieldValues.get(fieldId)));
        }
        return newStruct;
    }
    throw new IllegalArgumentException("unsupported type: " + type);
}
Also used : SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec) PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY(io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY) DateTimeZone(org.joda.time.DateTimeZone) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Writable(org.apache.hadoop.io.Writable) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) Date(org.apache.hadoop.hive.common.type.Date) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) Decimals.rescale(io.trino.spi.type.Decimals.rescale) FileSplit(org.apache.hadoop.mapred.FileSplit) RcFileDecoderUtils.findFirstSyncPosition(io.trino.rcfile.RcFileDecoderUtils.findFirstSyncPosition) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Assert.assertFalse(org.testng.Assert.assertFalse) LazyBinaryArray(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray) IntWritable(org.apache.hadoop.io.IntWritable) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) Set(java.util.Set) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) MICROSECONDS_PER_MILLISECOND(io.trino.type.DateTimes.MICROSECONDS_PER_MILLISECOND) UncheckedIOException(java.io.UncheckedIOException) BooleanWritable(org.apache.hadoop.io.BooleanWritable) RecordReader(org.apache.hadoop.mapred.RecordReader) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Lz4Codec(org.apache.hadoop.io.compress.Lz4Codec) Iterables(com.google.common.collect.Iterables) Slice(io.airlift.slice.Slice) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) StructObject(org.apache.hadoop.hive.serde2.StructObject) Page(io.trino.spi.Page) SqlDecimal(io.trino.spi.type.SqlDecimal) Functions.constant(com.google.common.base.Functions.constant) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) ArrayList(java.util.ArrayList) NONE(io.trino.rcfile.RcFileTester.Compression.NONE) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) Lists(com.google.common.collect.Lists) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) BZIP2(io.trino.rcfile.RcFileTester.Compression.BZIP2) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) HadoopNative(io.trino.hadoop.HadoopNative) LinkedHashSet(java.util.LinkedHashSet) Int128(io.trino.spi.type.Int128) Properties(java.util.Properties) MapType(io.trino.spi.type.MapType) AbstractIterator(com.google.common.collect.AbstractIterator) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) File(java.io.File) NULL(org.apache.hadoop.mapred.Reporter.NULL) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) SqlVarbinary(io.trino.spi.type.SqlVarbinary) SIZE_OF_LONG(io.airlift.slice.SizeOf.SIZE_OF_LONG) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) TINYINT(io.trino.spi.type.TinyintType.TINYINT) BlockBuilder(io.trino.spi.block.BlockBuilder) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) FloatWritable(org.apache.hadoop.io.FloatWritable) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding) DateTimeTestingUtils.sqlTimestampOf(io.trino.testing.DateTimeTestingUtils.sqlTimestampOf) Iterables.transform(com.google.common.collect.Iterables.transform) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) LongWritable(org.apache.hadoop.io.LongWritable) SNAPPY(io.trino.rcfile.RcFileTester.Compression.SNAPPY) TextRcFileEncoding(io.trino.rcfile.text.TextRcFileEncoding) SqlTimestamp(io.trino.spi.type.SqlTimestamp) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Block(io.trino.spi.block.Block) PRESTO_RCFILE_WRITER_VERSION(io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) RowType(io.trino.spi.type.RowType) SIZE_OF_INT(io.airlift.slice.SizeOf.SIZE_OF_INT) ImmutableSet(com.google.common.collect.ImmutableSet) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.nCopies(java.util.Collections.nCopies) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) SESSION(io.trino.testing.TestingConnectorSession.SESSION) ArrayType(io.trino.spi.type.ArrayType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) SqlDate(io.trino.spi.type.SqlDate) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Objects(java.util.Objects) DataSize(io.airlift.units.DataSize) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Decimals(io.trino.spi.type.Decimals) Entry(java.util.Map.Entry) LZ4(io.trino.rcfile.RcFileTester.Compression.LZ4) Optional(java.util.Optional) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) DecimalType(io.trino.spi.type.DecimalType) MAP(io.trino.spi.type.StandardTypes.MAP) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) Assert.assertNull(org.testng.Assert.assertNull) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) Type(io.trino.spi.type.Type) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) DoubleWritable(org.apache.hadoop.io.DoubleWritable) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) VarcharType(io.trino.spi.type.VarcharType) OutputStreamSliceOutput(io.airlift.slice.OutputStreamSliceOutput) COMPRESS_CODEC(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.COMPRESS_CODEC) ImmutableList(com.google.common.collect.ImmutableList) ByteWritable(org.apache.hadoop.io.ByteWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Math.toIntExact(java.lang.Math.toIntExact) Iterator(java.util.Iterator) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) Iterators.advance(com.google.common.collect.Iterators.advance) FileInputStream(java.io.FileInputStream) JobConf(org.apache.hadoop.mapred.JobConf) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) Collectors.toList(java.util.stream.Collectors.toList) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) Serializer(org.apache.hadoop.hive.serde2.Serializer) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Closeable(java.io.Closeable) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) Collections(java.util.Collections) InputStream(java.io.InputStream) ZLIB(io.trino.rcfile.RcFileTester.Compression.ZLIB) VarcharType(io.trino.spi.type.VarcharType) HashMap(java.util.HashMap) SqlVarbinary(io.trino.spi.type.SqlVarbinary) ArrayList(java.util.ArrayList) RowType(io.trino.spi.type.RowType) SqlTimestamp(io.trino.spi.type.SqlTimestamp) MapType(io.trino.spi.type.MapType) ArrayType(io.trino.spi.type.ArrayType) MapType(io.trino.spi.type.MapType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) DecimalType(io.trino.spi.type.DecimalType) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) StructObject(org.apache.hadoop.hive.serde2.StructObject) Map(java.util.Map) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) ImmutableMap(com.google.common.collect.ImmutableMap) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) HashMap(java.util.HashMap)

Example 29 with RowType

use of io.trino.spi.type.RowType in project trino by trinodb.

the class RcFileTester method writeValue.

private static void writeValue(Type type, BlockBuilder blockBuilder, Object value) {
    if (value == null) {
        blockBuilder.appendNull();
    } else {
        if (BOOLEAN.equals(type)) {
            type.writeBoolean(blockBuilder, (Boolean) value);
        } else if (TINYINT.equals(type)) {
            type.writeLong(blockBuilder, ((Number) value).longValue());
        } else if (SMALLINT.equals(type)) {
            type.writeLong(blockBuilder, ((Number) value).longValue());
        } else if (INTEGER.equals(type)) {
            type.writeLong(blockBuilder, ((Number) value).longValue());
        } else if (BIGINT.equals(type)) {
            type.writeLong(blockBuilder, ((Number) value).longValue());
        } else if (Decimals.isShortDecimal(type)) {
            type.writeLong(blockBuilder, ((SqlDecimal) value).toBigDecimal().unscaledValue().longValue());
        } else if (Decimals.isLongDecimal(type)) {
            type.writeObject(blockBuilder, Int128.valueOf(((SqlDecimal) value).toBigDecimal().unscaledValue()));
        } else if (REAL.equals(type)) {
            type.writeLong(blockBuilder, Float.floatToIntBits((Float) value));
        } else if (DOUBLE.equals(type)) {
            type.writeDouble(blockBuilder, ((Number) value).doubleValue());
        } else if (VARCHAR.equals(type)) {
            type.writeSlice(blockBuilder, Slices.utf8Slice((String) value));
        } else if (VARBINARY.equals(type)) {
            type.writeSlice(blockBuilder, Slices.wrappedBuffer(((SqlVarbinary) value).getBytes()));
        } else if (DATE.equals(type)) {
            long days = ((SqlDate) value).getDays();
            type.writeLong(blockBuilder, days);
        } else if (TIMESTAMP_MILLIS.equals(type)) {
            long millis = ((SqlTimestamp) value).getMillis();
            type.writeLong(blockBuilder, millis * MICROSECONDS_PER_MILLISECOND);
        } else {
            if (type instanceof ArrayType) {
                List<?> array = (List<?>) value;
                Type elementType = type.getTypeParameters().get(0);
                BlockBuilder arrayBlockBuilder = blockBuilder.beginBlockEntry();
                for (Object elementValue : array) {
                    writeValue(elementType, arrayBlockBuilder, elementValue);
                }
                blockBuilder.closeEntry();
            } else if (type instanceof MapType) {
                Map<?, ?> map = (Map<?, ?>) value;
                Type keyType = type.getTypeParameters().get(0);
                Type valueType = type.getTypeParameters().get(1);
                BlockBuilder mapBlockBuilder = blockBuilder.beginBlockEntry();
                for (Entry<?, ?> entry : map.entrySet()) {
                    writeValue(keyType, mapBlockBuilder, entry.getKey());
                    writeValue(valueType, mapBlockBuilder, entry.getValue());
                }
                blockBuilder.closeEntry();
            } else if (type instanceof RowType) {
                List<?> array = (List<?>) value;
                List<Type> fieldTypes = type.getTypeParameters();
                BlockBuilder rowBlockBuilder = blockBuilder.beginBlockEntry();
                for (int fieldId = 0; fieldId < fieldTypes.size(); fieldId++) {
                    Type fieldType = fieldTypes.get(fieldId);
                    writeValue(fieldType, rowBlockBuilder, array.get(fieldId));
                }
                blockBuilder.closeEntry();
            } else {
                throw new IllegalArgumentException("Unsupported type " + type);
            }
        }
    }
}
Also used : SqlVarbinary(io.trino.spi.type.SqlVarbinary) RowType(io.trino.spi.type.RowType) SqlDecimal(io.trino.spi.type.SqlDecimal) SqlTimestamp(io.trino.spi.type.SqlTimestamp) MapType(io.trino.spi.type.MapType) ArrayType(io.trino.spi.type.ArrayType) MapType(io.trino.spi.type.MapType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) Entry(java.util.Map.Entry) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) StructObject(org.apache.hadoop.hive.serde2.StructObject) Map(java.util.Map) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) ImmutableMap(com.google.common.collect.ImmutableMap) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) HashMap(java.util.HashMap) BlockBuilder(io.trino.spi.block.BlockBuilder)

Example 30 with RowType

use of io.trino.spi.type.RowType in project trino by trinodb.

the class CheckpointSchemaManager method getAddEntryType.

public RowType getAddEntryType(MetadataEntry metadataEntry) {
    List<ColumnMetadata> allColumns = extractSchema(metadataEntry, typeManager);
    List<ColumnMetadata> minMaxColumns = columnsWithStats(metadataEntry, typeManager);
    ImmutableList.Builder<RowType.Field> minMaxFields = ImmutableList.builder();
    for (ColumnMetadata dataColumn : minMaxColumns) {
        Type type = dataColumn.getType();
        if (type instanceof TimestampWithTimeZoneType) {
            minMaxFields.add(RowType.field(dataColumn.getName(), TimestampType.TIMESTAMP_MILLIS));
        } else {
            minMaxFields.add(RowType.field(dataColumn.getName(), type));
        }
    }
    ImmutableList.Builder<RowType.Field> statsColumns = ImmutableList.builder();
    statsColumns.add(RowType.field("numRecords", BigintType.BIGINT));
    List<RowType.Field> minMax = minMaxFields.build();
    if (!minMax.isEmpty()) {
        RowType minMaxType = RowType.from(minMax);
        statsColumns.add(RowType.field("minValues", minMaxType));
        statsColumns.add(RowType.field("maxValues", minMaxType));
    }
    statsColumns.add(RowType.field("nullCount", RowType.from(allColumns.stream().map(column -> buildNullCountType(Optional.of(column.getName()), column.getType())).collect(toImmutableList()))));
    MapType stringMap = (MapType) typeManager.getType(TypeSignature.mapType(VarcharType.VARCHAR.getTypeSignature(), VarcharType.VARCHAR.getTypeSignature()));
    List<RowType.Field> addFields = ImmutableList.of(RowType.field("path", VarcharType.createUnboundedVarcharType()), RowType.field("partitionValues", stringMap), RowType.field("size", BigintType.BIGINT), RowType.field("modificationTime", BigintType.BIGINT), RowType.field("dataChange", BooleanType.BOOLEAN), RowType.field("stats", VarcharType.createUnboundedVarcharType()), RowType.field("stats_parsed", RowType.from(statsColumns.build())), RowType.field("tags", stringMap));
    return RowType.from(addFields);
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) RowType(io.trino.spi.type.RowType) IntegerType(io.trino.spi.type.IntegerType) MapType(io.trino.spi.type.MapType) Type(io.trino.spi.type.Type) ArrayType(io.trino.spi.type.ArrayType) BooleanType(io.trino.spi.type.BooleanType) TimestampType(io.trino.spi.type.TimestampType) BigintType(io.trino.spi.type.BigintType) VarcharType(io.trino.spi.type.VarcharType) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) RowType(io.trino.spi.type.RowType) MapType(io.trino.spi.type.MapType)

Aggregations

RowType (io.trino.spi.type.RowType)90 ArrayType (io.trino.spi.type.ArrayType)51 MapType (io.trino.spi.type.MapType)42 Type (io.trino.spi.type.Type)42 ImmutableList (com.google.common.collect.ImmutableList)30 VarcharType (io.trino.spi.type.VarcharType)28 BlockBuilder (io.trino.spi.block.BlockBuilder)26 DecimalType (io.trino.spi.type.DecimalType)21 Test (org.testng.annotations.Test)21 ImmutableMap (com.google.common.collect.ImmutableMap)20 Block (io.trino.spi.block.Block)20 List (java.util.List)20 Map (java.util.Map)18 ArrayList (java.util.ArrayList)17 Optional (java.util.Optional)17 CharType (io.trino.spi.type.CharType)16 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)15 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)12 TimestampWithTimeZoneType (io.trino.spi.type.TimestampWithTimeZoneType)12 HashMap (java.util.HashMap)12