Search in sources :

Example 66 with ArrayType

use of io.trino.spi.type.ArrayType in project trino by trinodb.

the class AbstractTestParquetReader method testComplexNestedStructs.

@Test
public void testComplexNestedStructs() throws Exception {
    int n = 30;
    Iterable<Integer> mapKeys = intsBetween(0, n);
    Iterable<Integer> intPrimitives = limit(cycle(asList(1, null, 3, null, 5, null, 7, null, null, null, 11, null, 13)), n);
    Iterable<String> stringPrimitives = limit(cycle(asList(null, "value2", "value3", null, null, "value6", "value7")), n);
    Iterable<Double> doublePrimitives = limit(cycle(asList(1.1, null, 3.3, null, 5.5, null, 7.7, null, null, null, 11.11, null, 13.13)), n);
    Iterable<Boolean> booleanPrimitives = limit(cycle(asList(null, true, false, null, null, true, false)), n);
    Iterable<String> mapStringKeys = Stream.generate(() -> UUID.randomUUID().toString()).limit(n).collect(Collectors.toList());
    Iterable<Map<Integer, String>> mapsIntString = createNullableTestMaps(mapKeys, stringPrimitives);
    Iterable<List<String>> arraysString = createNullableTestArrays(stringPrimitives);
    Iterable<Map<Integer, Double>> mapsIntDouble = createNullableTestMaps(mapKeys, doublePrimitives);
    Iterable<List<Boolean>> arraysBoolean = createNullableTestArrays(booleanPrimitives);
    Iterable<Map<String, String>> mapsStringString = createNullableTestMaps(mapStringKeys, stringPrimitives);
    List<String> struct1FieldNames = asList("mapIntStringField", "stringArrayField", "intField");
    Iterable<?> structs1 = createNullableTestStructs(mapsIntString, arraysString, intPrimitives);
    ObjectInspector struct1ObjectInspector = getStandardStructObjectInspector(struct1FieldNames, asList(getStandardMapObjectInspector(javaIntObjectInspector, javaStringObjectInspector), getStandardListObjectInspector(javaStringObjectInspector), javaIntObjectInspector));
    Type struct1Type = RowType.from(asList(field("mapIntStringField", mapType(INTEGER, VARCHAR)), field("stringArrayField", new ArrayType(VARCHAR)), field("intField", INTEGER)));
    List<String> struct2FieldNames = asList("mapIntStringField", "stringArrayField", "structField");
    Iterable<?> structs2 = createNullableTestStructs(mapsIntString, arraysString, structs1);
    ObjectInspector struct2ObjectInspector = getStandardStructObjectInspector(struct2FieldNames, asList(getStandardMapObjectInspector(javaIntObjectInspector, javaStringObjectInspector), getStandardListObjectInspector(javaStringObjectInspector), struct1ObjectInspector));
    Type struct2Type = RowType.from(asList(field("mapIntStringField", mapType(INTEGER, VARCHAR)), field("stringArrayField", new ArrayType(VARCHAR)), field("structField", struct1Type)));
    List<String> struct3FieldNames = asList("mapIntDoubleField", "booleanArrayField", "booleanField");
    Iterable<?> structs3 = createNullableTestStructs(mapsIntDouble, arraysBoolean, booleanPrimitives);
    ObjectInspector struct3ObjectInspector = getStandardStructObjectInspector(struct3FieldNames, asList(getStandardMapObjectInspector(javaIntObjectInspector, javaDoubleObjectInspector), getStandardListObjectInspector(javaBooleanObjectInspector), javaBooleanObjectInspector));
    Type struct3Type = RowType.from(asList(field("mapIntDoubleField", mapType(INTEGER, DOUBLE)), field("booleanArrayField", new ArrayType(BOOLEAN)), field("booleanField", BOOLEAN)));
    List<String> struct4FieldNames = asList("mapIntDoubleField", "booleanArrayField", "structField");
    Iterable<?> structs4 = createNullableTestStructs(mapsIntDouble, arraysBoolean, structs3);
    ObjectInspector struct4ObjectInspector = getStandardStructObjectInspector(struct4FieldNames, asList(getStandardMapObjectInspector(javaIntObjectInspector, javaDoubleObjectInspector), getStandardListObjectInspector(javaBooleanObjectInspector), struct3ObjectInspector));
    Type struct4Type = RowType.from(asList(field("mapIntDoubleField", mapType(INTEGER, DOUBLE)), field("booleanArrayField", new ArrayType(BOOLEAN)), field("structField", struct3Type)));
    List<String> structFieldNames = asList("structField1", "structField2", "structField3", "structField4", "mapIntDoubleField", "booleanArrayField", "mapStringStringField");
    List<ObjectInspector> objectInspectors = asList(struct1ObjectInspector, struct2ObjectInspector, struct3ObjectInspector, struct4ObjectInspector, getStandardMapObjectInspector(javaIntObjectInspector, javaDoubleObjectInspector), getStandardListObjectInspector(javaBooleanObjectInspector), getStandardMapObjectInspector(javaStringObjectInspector, javaStringObjectInspector));
    List<Type> types = ImmutableList.of(struct1Type, struct2Type, struct3Type, struct4Type, mapType(INTEGER, DOUBLE), new ArrayType(BOOLEAN), mapType(VARCHAR, VARCHAR));
    Iterable<?>[] values = new Iterable<?>[] { structs1, structs2, structs3, structs4, mapsIntDouble, arraysBoolean, mapsStringString };
    tester.assertRoundTrip(objectInspectors, values, values, structFieldNames, types, Optional.empty());
}
Also used : ArrayType(io.trino.spi.type.ArrayType) Collections.singletonList(java.util.Collections.singletonList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) JavaHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) BigInteger(java.math.BigInteger) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) MessageType(org.apache.parquet.schema.MessageType) StructuralTestUtil.mapType(io.trino.testing.StructuralTestUtil.mapType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) MessageTypeParser.parseMessageType(org.apache.parquet.schema.MessageTypeParser.parseMessageType) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.testng.annotations.Test)

Example 67 with ArrayType

use of io.trino.spi.type.ArrayType in project trino by trinodb.

the class AbstractTestParquetReader method testNestedArrays.

@Test
public void testNestedArrays() throws Exception {
    int nestingLevel = ThreadLocalRandom.current().nextInt(1, 15);
    ObjectInspector objectInspector = getStandardListObjectInspector(javaIntObjectInspector);
    Type type = new ArrayType(INTEGER);
    Iterable<?> values = limit(cycle(asList(1, null, 3, null, 5, null, 7, null, null, null, 11, null, 13)), 3_210);
    for (int i = 0; i < nestingLevel; i++) {
        values = createNullableTestArrays(values);
        objectInspector = getStandardListObjectInspector(objectInspector);
        type = new ArrayType(type);
    }
    values = createTestArrays(values);
    tester.testRoundTrip(objectInspector, values, values, type);
}
Also used : ArrayType(io.trino.spi.type.ArrayType) JavaHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) MessageType(org.apache.parquet.schema.MessageType) StructuralTestUtil.mapType(io.trino.testing.StructuralTestUtil.mapType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) MessageTypeParser.parseMessageType(org.apache.parquet.schema.MessageTypeParser.parseMessageType) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) Test(org.testng.annotations.Test)

Example 68 with ArrayType

use of io.trino.spi.type.ArrayType in project trino by trinodb.

the class AbstractTestParquetReader method testArraySchemas.

/**
 * Test reading various arrays schemas compatible with spec
 * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
 */
@Test
public void testArraySchemas() throws Exception {
    MessageType parquetMrNullableSpecSchema = parseMessageType("message hive_schema {" + "  optional group my_list (LIST){" + "    repeated group list {" + "        required int32 element;" + "    }" + "  }" + "} ");
    Iterable<List<Integer>> nonNullArrayElements = createTestArrays(intsBetween(0, 31_234));
    tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), nonNullArrayElements, nonNullArrayElements, "my_list", new ArrayType(INTEGER), Optional.of(parquetMrNullableSpecSchema));
    MessageType parquetMrNonNullSpecSchema = parseMessageType("message hive_schema {" + "  required group my_list (LIST){" + "    repeated group list {" + "        optional int32 element;" + "    }" + "  }" + "} ");
    Iterable<List<Integer>> values = createTestArrays(limit(cycle(asList(1, null, 3, 5, null, null, null, 7, 11, null, 13, 17)), 30_000));
    tester.assertRoundTrip(singletonList(getStandardListObjectInspector(javaIntObjectInspector)), new Iterable<?>[] { values }, new Iterable<?>[] { values }, singletonList("my_list"), singletonList(new ArrayType(INTEGER)), Optional.of(parquetMrNonNullSpecSchema));
    MessageType sparkSchema = parseMessageType("message hive_schema {" + "  optional group my_list (LIST){" + "    repeated group list {" + "        optional int32 element;" + "    }" + "  }" + "} ");
    tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), values, values, "my_list", new ArrayType(INTEGER), Optional.of(sparkSchema));
    MessageType hiveSchema = parseMessageType("message hive_schema {" + "  optional group my_list (LIST){" + "    repeated group bag {" + "        optional int32 array_element;" + "    }" + "  }" + "} ");
    tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), values, values, "my_list", new ArrayType(INTEGER), Optional.of(hiveSchema));
    MessageType customNamingSchema = parseMessageType("message hive_schema {" + "  optional group my_list (LIST){" + "    repeated group bag {" + "        optional int32 array;" + "    }" + "  }" + "} ");
    tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), values, values, "my_list", new ArrayType(INTEGER), Optional.of(customNamingSchema));
}
Also used : ArrayType(io.trino.spi.type.ArrayType) Collections.singletonList(java.util.Collections.singletonList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) MessageType(org.apache.parquet.schema.MessageType) MessageTypeParser.parseMessageType(org.apache.parquet.schema.MessageTypeParser.parseMessageType) Test(org.testng.annotations.Test)

Example 69 with ArrayType

use of io.trino.spi.type.ArrayType in project trino by trinodb.

the class AbstractTestParquetReader method testStructOfTwoArrays.

@Test
public void testStructOfTwoArrays() throws Exception {
    Iterable<List<Integer>> intArrayField = createNullableTestArrays(limit(cycle(ImmutableList.of(1, 3, 5, 7, 11, 13, 17)), 30_000));
    Iterable<List<String>> stringArrayField = createNullableTestArrays(transform(intsBetween(0, 30_000), Object::toString));
    List<List<?>> values = createTestStructs(stringArrayField, intArrayField);
    List<String> structFieldNames = asList("stringArrayField", "intArrayField");
    Type structType = RowType.from(asList(field("stringArrayField", new ArrayType(VARCHAR)), field("intArrayField", new ArrayType(INTEGER))));
    tester.testRoundTrip(getStandardStructObjectInspector(structFieldNames, asList(getStandardListObjectInspector(javaStringObjectInspector), getStandardListObjectInspector(javaIntObjectInspector))), values, values, structType);
}
Also used : ArrayType(io.trino.spi.type.ArrayType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) MessageType(org.apache.parquet.schema.MessageType) StructuralTestUtil.mapType(io.trino.testing.StructuralTestUtil.mapType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) MessageTypeParser.parseMessageType(org.apache.parquet.schema.MessageTypeParser.parseMessageType) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) Collections.singletonList(java.util.Collections.singletonList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Test(org.testng.annotations.Test)

Example 70 with ArrayType

use of io.trino.spi.type.ArrayType in project trino by trinodb.

the class AbstractTestParquetReader method testNewAvroArray.

@Test
public void testNewAvroArray() throws Exception {
    MessageType parquetMrAvroSchema = parseMessageType("message avro_schema_new { " + "  optional group my_list (LIST) { " + "    repeated group list { " + "      optional int32 element; " + "    } " + "  } " + "}");
    Iterable<List<Integer>> values = createTestArrays(limit(cycle(asList(1, null, 3, 5, null, null, null, 7, 11, null, 13, 17)), 30_000));
    tester.testRoundTrip(getStandardListObjectInspector(javaIntObjectInspector), values, values, "my_list", new ArrayType(INTEGER), Optional.of(parquetMrAvroSchema));
}
Also used : ArrayType(io.trino.spi.type.ArrayType) Collections.singletonList(java.util.Collections.singletonList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) MessageType(org.apache.parquet.schema.MessageType) MessageTypeParser.parseMessageType(org.apache.parquet.schema.MessageTypeParser.parseMessageType) Test(org.testng.annotations.Test)

Aggregations

ArrayType (io.trino.spi.type.ArrayType)289 Test (org.testng.annotations.Test)205 Type (io.trino.spi.type.Type)92 RowType (io.trino.spi.type.RowType)86 ImmutableList (com.google.common.collect.ImmutableList)66 List (java.util.List)62 ArrayList (java.util.ArrayList)59 MapType (io.trino.spi.type.MapType)43 Arrays.asList (java.util.Arrays.asList)36 Collections.singletonList (java.util.Collections.singletonList)34 VarcharType (io.trino.spi.type.VarcharType)32 VarcharType.createUnboundedVarcharType (io.trino.spi.type.VarcharType.createUnboundedVarcharType)32 BlockBuilder (io.trino.spi.block.BlockBuilder)31 MessageType (org.apache.parquet.schema.MessageType)31 DecimalType.createDecimalType (io.trino.spi.type.DecimalType.createDecimalType)30 MessageTypeParser.parseMessageType (org.apache.parquet.schema.MessageTypeParser.parseMessageType)27 DecimalType (io.trino.spi.type.DecimalType)26 StructuralTestUtil.mapType (io.trino.testing.StructuralTestUtil.mapType)24 Block (io.trino.spi.block.Block)23 Map (java.util.Map)23