Search in sources :

Example 6 with GenericRowData

use of org.apache.flink.table.data.GenericRowData in project flink by apache.

the class CsvToRowDataConverters method createRowConverter.

public CsvToRowDataConverter createRowConverter(RowType rowType, boolean isTopLevel) {
    final CsvToRowDataConverter[] fieldConverters = rowType.getFields().stream().map(RowType.RowField::getType).map(this::createNullableConverter).toArray(CsvToRowDataConverter[]::new);
    final String[] fieldNames = rowType.getFieldNames().toArray(new String[0]);
    final int arity = fieldNames.length;
    return jsonNode -> {
        int nodeSize = jsonNode.size();
        if (nodeSize != 0) {
            validateArity(arity, nodeSize, ignoreParseErrors);
        } else {
            return null;
        }
        GenericRowData row = new GenericRowData(arity);
        for (int i = 0; i < arity; i++) {
            JsonNode field;
            // Jackson only supports mapping by name in the first level
            if (isTopLevel) {
                field = jsonNode.get(fieldNames[i]);
            } else {
                field = jsonNode.get(i);
            }
            try {
                if (field == null) {
                    row.setField(i, null);
                } else {
                    row.setField(i, fieldConverters[i].convert(field));
                }
            } catch (Throwable t) {
                throw new RuntimeException(String.format("Fail to deserialize at field: %s.", fieldNames[i]), t);
            }
        }
        return row;
    };
}
Also used : Array(java.lang.reflect.Array) GenericArrayData(org.apache.flink.table.data.GenericArrayData) LocalDateTime(java.time.LocalDateTime) SQL_TIMESTAMP_FORMAT(org.apache.flink.formats.common.TimeFormats.SQL_TIMESTAMP_FORMAT) JsonNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode) RowType(org.apache.flink.table.types.logical.RowType) ArrayNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode) BigDecimal(java.math.BigDecimal) SQL_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT(org.apache.flink.formats.common.TimeFormats.SQL_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT) GenericRowData(org.apache.flink.table.data.GenericRowData) DecimalType(org.apache.flink.table.types.logical.DecimalType) LocalTime(java.time.LocalTime) TimeType(org.apache.flink.table.types.logical.TimeType) LogicalTypeUtils(org.apache.flink.table.types.logical.utils.LogicalTypeUtils) RowData(org.apache.flink.table.data.RowData) TimestampData(org.apache.flink.table.data.TimestampData) DecimalData(org.apache.flink.table.data.DecimalData) IOException(java.io.IOException) ArrayType(org.apache.flink.table.types.logical.ArrayType) Serializable(java.io.Serializable) Date(java.sql.Date) Converter(org.apache.flink.formats.common.Converter) StringData(org.apache.flink.table.data.StringData) LogicalType(org.apache.flink.table.types.logical.LogicalType) DateTimeFormatter(java.time.format.DateTimeFormatter) Internal(org.apache.flink.annotation.Internal) GenericRowData(org.apache.flink.table.data.GenericRowData) JsonNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode)

Example 7 with GenericRowData

use of org.apache.flink.table.data.GenericRowData in project flink by apache.

the class HiveLookupJoinITCase method testPartitionFetcherAndReader.

@Test
public void testPartitionFetcherAndReader() throws Exception {
    // constructs test data using dynamic partition
    TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
    batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    batchEnv.useCatalog(hiveCatalog.getName());
    batchEnv.executeSql("insert overwrite partition_table values " + "(1,'a',08,2019,'08','01')," + "(1,'a',10,2020,'08','31')," + "(2,'a',21,2020,'08','31')," + "(2,'b',22,2020,'08','31')," + "(3,'c',33,2020,'09','31')").await();
    FileSystemLookupFunction<HiveTablePartition> lookupFunction = getLookupFunction("partition_table");
    lookupFunction.open(null);
    PartitionFetcher<HiveTablePartition> fetcher = lookupFunction.getPartitionFetcher();
    PartitionFetcher.Context<HiveTablePartition> context = lookupFunction.getFetcherContext();
    List<HiveTablePartition> partitions = fetcher.fetch(context);
    // fetch latest partition by partition-name
    assertEquals(1, partitions.size());
    PartitionReader<HiveTablePartition, RowData> reader = lookupFunction.getPartitionReader();
    reader.open(partitions);
    List<RowData> res = new ArrayList<>();
    ObjectIdentifier tableIdentifier = ObjectIdentifier.of(hiveCatalog.getName(), "default", "partition_table");
    CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(tableIdentifier.toObjectPath());
    GenericRowData reuse = new GenericRowData(catalogTable.getSchema().getFieldCount());
    TypeSerializer<RowData> serializer = InternalSerializers.create(catalogTable.getSchema().toRowDataType().getLogicalType());
    RowData row;
    while ((row = reader.read(reuse)) != null) {
        res.add(serializer.copy(row));
    }
    res.sort(Comparator.comparingInt(o -> o.getInt(0)));
    assertEquals("[+I(3,c,33,2020,09,31)]", res.toString());
}
Also used : PartitionReader(org.apache.flink.connector.file.table.PartitionReader) PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN) Arrays(java.util.Arrays) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) BeforeClass(org.junit.BeforeClass) PARTITION_TIME_EXTRACTOR_KIND(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_KIND) CatalogTable(org.apache.flink.table.catalog.CatalogTable) STREAMING_SOURCE_PARTITION_INCLUDE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_INCLUDE) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) ArrayList(java.util.ArrayList) GenericRowData(org.apache.flink.table.data.GenericRowData) InternalSerializers(org.apache.flink.table.runtime.typeutils.InternalSerializers) Duration(java.time.Duration) DynamicTableSourceFactory(org.apache.flink.table.factories.DynamicTableSourceFactory) STREAMING_SOURCE_PARTITION_ORDER(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_ORDER) TableEnvironment(org.apache.flink.table.api.TableEnvironment) AfterClass(org.junit.AfterClass) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) TestValuesTableFactory(org.apache.flink.table.planner.factories.TestValuesTableFactory) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) TableImpl(org.apache.flink.table.api.internal.TableImpl) TestCollectionTableFactory(org.apache.flink.table.planner.factories.utils.TestCollectionTableFactory) HiveTestUtils(org.apache.flink.table.catalog.hive.HiveTestUtils) STREAMING_SOURCE_ENABLE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_ENABLE) List(java.util.List) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) SqlDialect(org.apache.flink.table.api.SqlDialect) EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) Row(org.apache.flink.types.Row) Comparator(java.util.Comparator) STREAMING_SOURCE_MONITOR_INTERVAL(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_MONITOR_INTERVAL) TableEnvironmentInternal(org.apache.flink.table.api.internal.TableEnvironmentInternal) Assert.assertEquals(org.junit.Assert.assertEquals) ArrayList(java.util.ArrayList) TableEnvironment(org.apache.flink.table.api.TableEnvironment) CatalogTable(org.apache.flink.table.catalog.CatalogTable) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) GenericRowData(org.apache.flink.table.data.GenericRowData) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) Test(org.junit.Test)

Example 8 with GenericRowData

use of org.apache.flink.table.data.GenericRowData in project flink by apache.

the class MaxwellJsonDeserializationSchema method emitRow.

private void emitRow(GenericRowData rootRow, GenericRowData physicalRow, Collector<RowData> out) {
    // shortcut in case no output projection is required
    if (!hasMetadata) {
        out.collect(physicalRow);
        return;
    }
    final int metadataArity = metadataConverters.length;
    final GenericRowData producedRow = new GenericRowData(physicalRow.getRowKind(), fieldCount + metadataArity);
    for (int physicalPos = 0; physicalPos < fieldCount; physicalPos++) {
        producedRow.setField(physicalPos, physicalRow.getField(physicalPos));
    }
    for (int metadataPos = 0; metadataPos < metadataArity; metadataPos++) {
        producedRow.setField(fieldCount + metadataPos, metadataConverters[metadataPos].convert(rootRow));
    }
    out.collect(producedRow);
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData)

Example 9 with GenericRowData

use of org.apache.flink.table.data.GenericRowData in project flink by apache.

the class JsonRowDataSerDeSchemaTest method testSerializationMapNullKey.

@Test
public void testSerializationMapNullKey() throws Exception {
    RowType rowType = (RowType) ROW(FIELD("nestedMap", MAP(STRING(), MAP(STRING(), INT())))).getLogicalType();
    // test data
    // use LinkedHashMap to make sure entries order
    Map<StringData, Integer> map = new LinkedHashMap<>();
    map.put(StringData.fromString("no-null key"), 1);
    map.put(StringData.fromString(null), 2);
    GenericMapData mapData = new GenericMapData(map);
    Map<StringData, GenericMapData> nestedMap = new LinkedHashMap<>();
    nestedMap.put(StringData.fromString("no-null key"), mapData);
    nestedMap.put(StringData.fromString(null), mapData);
    GenericMapData nestedMapData = new GenericMapData(nestedMap);
    GenericRowData rowData = new GenericRowData(1);
    rowData.setField(0, nestedMapData);
    JsonRowDataSerializationSchema serializationSchema1 = new JsonRowDataSerializationSchema(rowType, TimestampFormat.SQL, JsonFormatOptions.MapNullKeyMode.FAIL, "null", true);
    // expect message for serializationSchema1
    String errorMessage1 = "JSON format doesn't support to serialize map data with null keys." + " You can drop null key entries or encode null in literals by specifying map-null-key.mode option.";
    JsonRowDataSerializationSchema serializationSchema2 = new JsonRowDataSerializationSchema(rowType, TimestampFormat.SQL, JsonFormatOptions.MapNullKeyMode.DROP, "null", true);
    // expect result for serializationSchema2
    String expectResult2 = "{\"nestedMap\":{\"no-null key\":{\"no-null key\":1}}}";
    JsonRowDataSerializationSchema serializationSchema3 = new JsonRowDataSerializationSchema(rowType, TimestampFormat.SQL, JsonFormatOptions.MapNullKeyMode.LITERAL, "nullKey", true);
    // expect result for serializationSchema3
    String expectResult3 = "{\"nestedMap\":{\"no-null key\":{\"no-null key\":1,\"nullKey\":2},\"nullKey\":{\"no-null key\":1,\"nullKey\":2}}}";
    try {
        // throw exception when mapNullKey Mode is fail
        serializationSchema1.serialize(rowData);
        Assert.fail("expecting exception message: " + errorMessage1);
    } catch (Throwable t) {
        assertThat(t, FlinkMatchers.containsMessage(errorMessage1));
    }
    // mapNullKey Mode is drop
    byte[] actual2 = serializationSchema2.serialize(rowData);
    assertEquals(expectResult2, new String(actual2));
    // mapNullKey Mode is literal
    byte[] actual3 = serializationSchema3.serialize(rowData);
    assertEquals(expectResult3, new String(actual3));
}
Also used : GenericMapData(org.apache.flink.table.data.GenericMapData) RowType(org.apache.flink.table.types.logical.RowType) GenericRowData(org.apache.flink.table.data.GenericRowData) StringData(org.apache.flink.table.data.StringData) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.Test)

Example 10 with GenericRowData

use of org.apache.flink.table.data.GenericRowData in project flink by apache.

the class RowDataVectorizer method convert.

/**
 * Converting ArrayData to RowData for calling {@link RowDataVectorizer#setColumn(int,
 * ColumnVector, LogicalType, RowData, int)} recursively with array.
 *
 * @param arrayData input ArrayData.
 * @param arrayFieldType LogicalType of input ArrayData.
 * @return RowData.
 */
private static RowData convert(ArrayData arrayData, LogicalType arrayFieldType) {
    GenericRowData rowData = new GenericRowData(arrayData.size());
    ArrayData.ElementGetter elementGetter = ArrayData.createElementGetter(arrayFieldType);
    for (int i = 0; i < arrayData.size(); i++) {
        rowData.setField(i, elementGetter.getElementOrNull(arrayData, i));
    }
    return rowData;
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) ArrayData(org.apache.flink.table.data.ArrayData)

Aggregations

GenericRowData (org.apache.flink.table.data.GenericRowData)94 RowData (org.apache.flink.table.data.RowData)32 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)16 Test (org.junit.Test)14 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)13 RowType (org.apache.flink.table.types.logical.RowType)13 IOException (java.io.IOException)11 ArrayList (java.util.ArrayList)11 IntType (org.apache.flink.table.types.logical.IntType)11 List (java.util.List)9 LogicalType (org.apache.flink.table.types.logical.LogicalType)9 GenericArrayData (org.apache.flink.table.data.GenericArrayData)6 StringData (org.apache.flink.table.data.StringData)6 Arrays (java.util.Arrays)5 HashMap (java.util.HashMap)5 OutputStream (java.io.OutputStream)4 PrintStream (java.io.PrintStream)4 Collections (java.util.Collections)4 Random (java.util.Random)4 Consumer (java.util.function.Consumer)4