Search in sources :

Example 16 with ArrayType

use of org.apache.flink.table.types.logical.ArrayType in project flink by apache.

the class JsonToRowDataConverters method createArrayConverter.

private JsonToRowDataConverter createArrayConverter(ArrayType arrayType) {
    JsonToRowDataConverter elementConverter = createConverter(arrayType.getElementType());
    final Class<?> elementClass = LogicalTypeUtils.toInternalConversionClass(arrayType.getElementType());
    return jsonNode -> {
        final ArrayNode node = (ArrayNode) jsonNode;
        final Object[] array = (Object[]) Array.newInstance(elementClass, node.size());
        for (int i = 0; i < node.size(); i++) {
            final JsonNode innerNode = node.get(i);
            array[i] = elementConverter.convert(innerNode);
        }
        return new GenericArrayData(array);
    };
}
Also used : ISO8601_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT(org.apache.flink.formats.common.TimeFormats.ISO8601_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT) Array(java.lang.reflect.Array) GenericArrayData(org.apache.flink.table.data.GenericArrayData) IntType(org.apache.flink.table.types.logical.IntType) LocalDateTime(java.time.LocalDateTime) HashMap(java.util.HashMap) SQL_TIMESTAMP_FORMAT(org.apache.flink.formats.common.TimeFormats.SQL_TIMESTAMP_FORMAT) JsonNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode) MapType(org.apache.flink.table.types.logical.MapType) RowType(org.apache.flink.table.types.logical.RowType) TemporalQueries(java.time.temporal.TemporalQueries) ArrayNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode) TemporalAccessor(java.time.temporal.TemporalAccessor) BigDecimal(java.math.BigDecimal) SQL_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT(org.apache.flink.formats.common.TimeFormats.SQL_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT) TextNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.TextNode) GenericRowData(org.apache.flink.table.data.GenericRowData) DecimalType(org.apache.flink.table.types.logical.DecimalType) LogicalTypeFamily(org.apache.flink.table.types.logical.LogicalTypeFamily) GenericMapData(org.apache.flink.table.data.GenericMapData) Map(java.util.Map) LocalTime(java.time.LocalTime) ZoneOffset(java.time.ZoneOffset) ISO_LOCAL_DATE(java.time.format.DateTimeFormatter.ISO_LOCAL_DATE) LogicalTypeUtils(org.apache.flink.table.types.logical.utils.LogicalTypeUtils) MultisetType(org.apache.flink.table.types.logical.MultisetType) ISO8601_TIMESTAMP_FORMAT(org.apache.flink.formats.common.TimeFormats.ISO8601_TIMESTAMP_FORMAT) RowData(org.apache.flink.table.data.RowData) Iterator(java.util.Iterator) TimestampData(org.apache.flink.table.data.TimestampData) ObjectNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode) TableException(org.apache.flink.table.api.TableException) DecimalData(org.apache.flink.table.data.DecimalData) IOException(java.io.IOException) ArrayType(org.apache.flink.table.types.logical.ArrayType) TimestampFormat(org.apache.flink.formats.common.TimestampFormat) Serializable(java.io.Serializable) SQL_TIME_FORMAT(org.apache.flink.formats.common.TimeFormats.SQL_TIME_FORMAT) StringData(org.apache.flink.table.data.StringData) LogicalType(org.apache.flink.table.types.logical.LogicalType) LocalDate(java.time.LocalDate) Internal(org.apache.flink.annotation.Internal) GenericArrayData(org.apache.flink.table.data.GenericArrayData) JsonNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode) ArrayNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode)

Example 17 with ArrayType

use of org.apache.flink.table.types.logical.ArrayType in project flink by apache.

the class OrcBulkRowDataWriterTest method initInput.

@Before
public void initInput() {
    input = new ArrayList<>();
    fieldTypes = new LogicalType[4];
    fieldTypes[0] = new VarCharType();
    fieldTypes[1] = new IntType();
    List<RowType.RowField> arrayRowFieldList = Collections.singletonList(new RowType.RowField("_col2_col0", new VarCharType()));
    fieldTypes[2] = new ArrayType(new RowType(arrayRowFieldList));
    List<RowType.RowField> mapRowFieldList = Arrays.asList(new RowType.RowField("_col3_col0", new VarCharType()), new RowType.RowField("_col3_col1", new TimestampType()));
    fieldTypes[3] = new MapType(new VarCharType(), new RowType(mapRowFieldList));
    {
        GenericRowData rowData = new GenericRowData(4);
        rowData.setField(0, new BinaryStringData("_col_0_string_1"));
        rowData.setField(1, 1);
        GenericRowData arrayValue1 = new GenericRowData(1);
        arrayValue1.setField(0, new BinaryStringData("_col_2_row_0_string_1"));
        GenericRowData arrayValue2 = new GenericRowData(1);
        arrayValue2.setField(0, new BinaryStringData("_col_2_row_1_string_1"));
        GenericArrayData arrayData = new GenericArrayData(new Object[] { arrayValue1, arrayValue2 });
        rowData.setField(2, arrayData);
        GenericRowData mapValue1 = new GenericRowData(2);
        mapValue1.setField(0, new BinaryStringData(("_col_3_map_value_string_1")));
        mapValue1.setField(1, TimestampData.fromTimestamp(new Timestamp(3600000)));
        Map<StringData, RowData> mapDataMap = new HashMap<>();
        mapDataMap.put(new BinaryStringData("_col_3_map_key_1"), mapValue1);
        GenericMapData mapData = new GenericMapData(mapDataMap);
        rowData.setField(3, mapData);
        input.add(rowData);
    }
    {
        GenericRowData rowData = new GenericRowData(4);
        rowData.setField(0, new BinaryStringData("_col_0_string_2"));
        rowData.setField(1, 2);
        GenericRowData arrayValue1 = new GenericRowData(1);
        arrayValue1.setField(0, new BinaryStringData("_col_2_row_0_string_2"));
        GenericRowData arrayValue2 = new GenericRowData(1);
        arrayValue2.setField(0, new BinaryStringData("_col_2_row_1_string_2"));
        GenericArrayData arrayData = new GenericArrayData(new Object[] { arrayValue1, arrayValue2 });
        rowData.setField(2, arrayData);
        GenericRowData mapValue1 = new GenericRowData(2);
        mapValue1.setField(0, new BinaryStringData(("_col_3_map_value_string_2")));
        mapValue1.setField(1, TimestampData.fromTimestamp(new Timestamp(3600000)));
        Map<StringData, RowData> mapDataMap = new HashMap<>();
        mapDataMap.put(new BinaryStringData("_col_3_map_key_2"), mapValue1);
        GenericMapData mapData = new GenericMapData(mapDataMap);
        rowData.setField(3, mapData);
        input.add(rowData);
    }
}
Also used : GenericMapData(org.apache.flink.table.data.GenericMapData) GenericArrayData(org.apache.flink.table.data.GenericArrayData) RowType(org.apache.flink.table.types.logical.RowType) Timestamp(java.sql.Timestamp) MapType(org.apache.flink.table.types.logical.MapType) IntType(org.apache.flink.table.types.logical.IntType) ArrayType(org.apache.flink.table.types.logical.ArrayType) TimestampType(org.apache.flink.table.types.logical.TimestampType) GenericRowData(org.apache.flink.table.data.GenericRowData) VarCharType(org.apache.flink.table.types.logical.VarCharType) BinaryStringData(org.apache.flink.table.data.binary.BinaryStringData) Map(java.util.Map) HashMap(java.util.HashMap) Before(org.junit.Before)

Example 18 with ArrayType

use of org.apache.flink.table.types.logical.ArrayType in project flink by apache.

the class ParquetSchemaConverter method convertToParquetType.

private static Type convertToParquetType(String name, LogicalType type, Type.Repetition repetition) {
    switch(type.getTypeRoot()) {
        case CHAR:
        case VARCHAR:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8).named(name);
        case BOOLEAN:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.BOOLEAN, repetition).named(name);
        case BINARY:
        case VARBINARY:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition).named(name);
        case DECIMAL:
            int precision = ((DecimalType) type).getPrecision();
            int scale = ((DecimalType) type).getScale();
            int numBytes = computeMinBytesForDecimalPrecision(precision);
            return Types.primitive(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, repetition).precision(precision).scale(scale).length(numBytes).as(OriginalType.DECIMAL).named(name);
        case TINYINT:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.INT_8).named(name);
        case SMALLINT:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.INT_16).named(name);
        case INTEGER:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).named(name);
        case BIGINT:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition).named(name);
        case FLOAT:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.FLOAT, repetition).named(name);
        case DOUBLE:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, repetition).named(name);
        case DATE:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
        case TIME_WITHOUT_TIME_ZONE:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.TIME_MILLIS).named(name);
        case TIMESTAMP_WITHOUT_TIME_ZONE:
        case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT96, repetition).named(name);
        case ARRAY:
            ArrayType arrayType = (ArrayType) type;
            return ConversionPatterns.listOfElements(repetition, name, convertToParquetType(LIST_ELEMENT_NAME, arrayType.getElementType()));
        case MAP:
            MapType mapType = (MapType) type;
            return ConversionPatterns.mapType(repetition, name, MAP_REPEATED_NAME, convertToParquetType("key", mapType.getKeyType()), convertToParquetType("value", mapType.getValueType()));
        case ROW:
            RowType rowType = (RowType) type;
            return new GroupType(repetition, name, convertToParquetTypes(rowType));
        default:
            throw new UnsupportedOperationException("Unsupported type: " + type);
    }
}
Also used : ArrayType(org.apache.flink.table.types.logical.ArrayType) GroupType(org.apache.parquet.schema.GroupType) DecimalType(org.apache.flink.table.types.logical.DecimalType) RowType(org.apache.flink.table.types.logical.RowType) MapType(org.apache.flink.table.types.logical.MapType)

Example 19 with ArrayType

use of org.apache.flink.table.types.logical.ArrayType in project flink by apache.

the class OrcFileSystemITCase method initNestedTypesFile.

private String initNestedTypesFile(List<RowData> data) throws Exception {
    LogicalType[] fieldTypes = new LogicalType[4];
    fieldTypes[0] = new VarCharType();
    fieldTypes[1] = new IntType();
    List<RowType.RowField> arrayRowFieldList = Collections.singletonList(new RowType.RowField("_col2_col0", new VarCharType()));
    fieldTypes[2] = new ArrayType(new RowType(arrayRowFieldList));
    List<RowType.RowField> mapRowFieldList = Arrays.asList(new RowType.RowField("_col3_col0", new VarCharType()), new RowType.RowField("_col3_col1", new TimestampType()));
    fieldTypes[3] = new MapType(new VarCharType(), new RowType(mapRowFieldList));
    String schema = "struct<_col0:string,_col1:int,_col2:array<struct<_col2_col0:string>>," + "_col3:map<string,struct<_col3_col0:string,_col3_col1:timestamp>>>";
    File outDir = TEMPORARY_FOLDER.newFolder();
    Properties writerProps = new Properties();
    writerProps.setProperty("orc.compress", "LZ4");
    final OrcBulkWriterFactory<RowData> writer = new OrcBulkWriterFactory<>(new RowDataVectorizer(schema, fieldTypes), writerProps, new Configuration());
    StreamingFileSink<RowData> sink = StreamingFileSink.forBulkFormat(new org.apache.flink.core.fs.Path(outDir.toURI()), writer).withBucketCheckInterval(10000).build();
    try (OneInputStreamOperatorTestHarness<RowData, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), 1, 1, 0)) {
        testHarness.setup();
        testHarness.open();
        int time = 0;
        for (final RowData record : data) {
            testHarness.processElement(record, ++time);
        }
        testHarness.snapshot(1, ++time);
        testHarness.notifyOfCompletedCheckpoint(1);
    }
    return outDir.getAbsolutePath();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) Properties(java.util.Properties) MapType(org.apache.flink.table.types.logical.MapType) IntType(org.apache.flink.table.types.logical.IntType) ArrayType(org.apache.flink.table.types.logical.ArrayType) OrcBulkWriterFactory(org.apache.flink.orc.writer.OrcBulkWriterFactory) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) RowDataVectorizer(org.apache.flink.orc.vector.RowDataVectorizer) TimestampType(org.apache.flink.table.types.logical.TimestampType) VarCharType(org.apache.flink.table.types.logical.VarCharType) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) OrcFile(org.apache.orc.OrcFile) File(java.io.File)

Example 20 with ArrayType

use of org.apache.flink.table.types.logical.ArrayType in project flink by apache.

the class ArrowReaderWriterTest method init.

@BeforeClass
public static void init() {
    fieldTypes.add(new TinyIntType());
    fieldTypes.add(new SmallIntType());
    fieldTypes.add(new IntType());
    fieldTypes.add(new BigIntType());
    fieldTypes.add(new BooleanType());
    fieldTypes.add(new FloatType());
    fieldTypes.add(new DoubleType());
    fieldTypes.add(new VarCharType());
    fieldTypes.add(new VarBinaryType());
    fieldTypes.add(new DecimalType(10, 3));
    fieldTypes.add(new DateType());
    fieldTypes.add(new TimeType(0));
    fieldTypes.add(new TimeType(2));
    fieldTypes.add(new TimeType(4));
    fieldTypes.add(new TimeType(8));
    fieldTypes.add(new LocalZonedTimestampType(0));
    fieldTypes.add(new LocalZonedTimestampType(2));
    fieldTypes.add(new LocalZonedTimestampType(4));
    fieldTypes.add(new LocalZonedTimestampType(8));
    fieldTypes.add(new TimestampType(0));
    fieldTypes.add(new TimestampType(2));
    fieldTypes.add(new TimestampType(4));
    fieldTypes.add(new TimestampType(8));
    fieldTypes.add(new ArrayType(new VarCharType()));
    rowFieldType = new RowType(Arrays.asList(new RowType.RowField("a", new IntType()), new RowType.RowField("b", new VarCharType()), new RowType.RowField("c", new ArrayType(new VarCharType())), new RowType.RowField("d", new TimestampType(2)), new RowType.RowField("e", new RowType(Arrays.asList(new RowType.RowField("e1", new IntType()), new RowType.RowField("e2", new VarCharType()))))));
    fieldTypes.add(rowFieldType);
    List<RowType.RowField> rowFields = new ArrayList<>();
    for (int i = 0; i < fieldTypes.size(); i++) {
        rowFields.add(new RowType.RowField("f" + i, fieldTypes.get(i)));
    }
    rowType = new RowType(rowFields);
    allocator = ArrowUtils.getRootAllocator().newChildAllocator("stdout", 0, Long.MAX_VALUE);
}
Also used : VarBinaryType(org.apache.flink.table.types.logical.VarBinaryType) BooleanType(org.apache.flink.table.types.logical.BooleanType) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) ArrayList(java.util.ArrayList) BigIntType(org.apache.flink.table.types.logical.BigIntType) RowType(org.apache.flink.table.types.logical.RowType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) FloatType(org.apache.flink.table.types.logical.FloatType) TimeType(org.apache.flink.table.types.logical.TimeType) ArrayType(org.apache.flink.table.types.logical.ArrayType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) DoubleType(org.apache.flink.table.types.logical.DoubleType) DecimalType(org.apache.flink.table.types.logical.DecimalType) TimestampType(org.apache.flink.table.types.logical.TimestampType) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) VarCharType(org.apache.flink.table.types.logical.VarCharType) DateType(org.apache.flink.table.types.logical.DateType) BeforeClass(org.junit.BeforeClass)

Aggregations

ArrayType (org.apache.flink.table.types.logical.ArrayType)28 LogicalType (org.apache.flink.table.types.logical.LogicalType)18 RowType (org.apache.flink.table.types.logical.RowType)18 DecimalType (org.apache.flink.table.types.logical.DecimalType)11 MapType (org.apache.flink.table.types.logical.MapType)11 TimestampType (org.apache.flink.table.types.logical.TimestampType)10 IntType (org.apache.flink.table.types.logical.IntType)8 ArrayList (java.util.ArrayList)7 GenericRowData (org.apache.flink.table.data.GenericRowData)7 RowData (org.apache.flink.table.data.RowData)7 VarCharType (org.apache.flink.table.types.logical.VarCharType)7 LocalTime (java.time.LocalTime)6 Internal (org.apache.flink.annotation.Internal)6 LocalZonedTimestampType (org.apache.flink.table.types.logical.LocalZonedTimestampType)6 Serializable (java.io.Serializable)5 Array (java.lang.reflect.Array)5 LocalDate (java.time.LocalDate)5 LocalDateTime (java.time.LocalDateTime)5 Map (java.util.Map)5 JsonNode (org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode)5