Search in sources :

Example 26 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class DebeziumJsonFormatFactory method createEncodingFormat.

@Override
public EncodingFormat<SerializationSchema<RowData>> createEncodingFormat(DynamicTableFactory.Context context, ReadableConfig formatOptions) {
    FactoryUtil.validateFactoryOptions(this, formatOptions);
    validateEncodingFormatOptions(formatOptions);
    TimestampFormat timestampFormat = JsonFormatOptionsUtil.getTimestampFormat(formatOptions);
    JsonFormatOptions.MapNullKeyMode mapNullKeyMode = JsonFormatOptionsUtil.getMapNullKeyMode(formatOptions);
    String mapNullKeyLiteral = formatOptions.get(JSON_MAP_NULL_KEY_LITERAL);
    final boolean encodeDecimalAsPlainNumber = formatOptions.get(ENCODE_DECIMAL_AS_PLAIN_NUMBER);
    return new EncodingFormat<SerializationSchema<RowData>>() {

        @Override
        public ChangelogMode getChangelogMode() {
            return ChangelogMode.newBuilder().addContainedKind(RowKind.INSERT).addContainedKind(RowKind.UPDATE_BEFORE).addContainedKind(RowKind.UPDATE_AFTER).addContainedKind(RowKind.DELETE).build();
        }

        @Override
        public SerializationSchema<RowData> createRuntimeEncoder(DynamicTableSink.Context context, DataType consumedDataType) {
            final RowType rowType = (RowType) consumedDataType.getLogicalType();
            return new DebeziumJsonSerializationSchema(rowType, timestampFormat, mapNullKeyMode, mapNullKeyLiteral, encodeDecimalAsPlainNumber);
        }
    };
}
Also used : EncodingFormat(org.apache.flink.table.connector.format.EncodingFormat) JsonFormatOptions(org.apache.flink.formats.json.JsonFormatOptions) RowData(org.apache.flink.table.data.RowData) DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType) TimestampFormat(org.apache.flink.formats.common.TimestampFormat)

Example 27 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class MaxwellJsonDeserializationSchema method createJsonRowType.

// --------------------------------------------------------------------------------------------
private static RowType createJsonRowType(DataType physicalDataType, List<ReadableMetadata> readableMetadata) {
    DataType root = DataTypes.ROW(DataTypes.FIELD("data", physicalDataType), DataTypes.FIELD("old", physicalDataType), DataTypes.FIELD("type", DataTypes.STRING()));
    // append fields that are required for reading metadata in the root
    final List<DataTypes.Field> rootMetadataFields = readableMetadata.stream().map(m -> m.requiredJsonField).distinct().collect(Collectors.toList());
    return (RowType) DataTypeUtils.appendRowFields(root, rootMetadataFields).getLogicalType();
}
Also used : DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType)

Example 28 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class OrcNoHiveColumnarRowInputFormat method createPartitionedFormat.

/**
 * Create a partitioned {@link OrcColumnarRowInputFormat}, the partition columns can be
 * generated by split.
 */
public static <SplitT extends FileSourceSplit> OrcColumnarRowInputFormat<VectorizedRowBatch, SplitT> createPartitionedFormat(Configuration hadoopConfig, RowType tableType, List<String> partitionKeys, PartitionFieldExtractor<SplitT> extractor, int[] selectedFields, List<OrcFilters.Predicate> conjunctPredicates, int batchSize, Function<RowType, TypeInformation<RowData>> rowTypeInfoFactory) {
    // TODO FLINK-25113 all this partition keys code should be pruned from the orc format,
    // because now FileSystemTableSource uses FileInfoExtractorBulkFormat for reading partition
    // keys.
    String[] tableFieldNames = tableType.getFieldNames().toArray(new String[0]);
    LogicalType[] tableFieldTypes = tableType.getChildren().toArray(new LogicalType[0]);
    List<String> orcFieldNames = getNonPartNames(tableFieldNames, partitionKeys);
    int[] orcSelectedFields = getSelectedOrcFields(tableFieldNames, selectedFields, orcFieldNames);
    ColumnBatchFactory<VectorizedRowBatch, SplitT> batchGenerator = (SplitT split, VectorizedRowBatch rowBatch) -> {
        // create and initialize the row batch
        ColumnVector[] vectors = new ColumnVector[selectedFields.length];
        for (int i = 0; i < vectors.length; i++) {
            String name = tableFieldNames[selectedFields[i]];
            LogicalType type = tableFieldTypes[selectedFields[i]];
            vectors[i] = partitionKeys.contains(name) ? createFlinkVectorFromConstant(type, extractor.extract(split, name, type), batchSize) : createFlinkVector(rowBatch.cols[orcFieldNames.indexOf(name)]);
        }
        return new VectorizedColumnBatch(vectors);
    };
    return new OrcColumnarRowInputFormat<>(new OrcNoHiveShim(), hadoopConfig, convertToOrcTypeWithPart(tableFieldNames, tableFieldTypes, partitionKeys), orcSelectedFields, conjunctPredicates, batchSize, batchGenerator, rowTypeInfoFactory.apply(new RowType(Arrays.stream(selectedFields).mapToObj(i -> tableType.getFields().get(i)).collect(Collectors.toList()))));
}
Also used : LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) VectorizedRowBatch(org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch) VectorizedColumnBatch(org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch) OrcColumnarRowInputFormat(org.apache.flink.orc.OrcColumnarRowInputFormat) OrcNoHiveShim(org.apache.flink.orc.nohive.shim.OrcNoHiveShim)

Example 29 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class JsonRowDataSerDeSchemaTest method testSerializationMapNullKey.

@Test
public void testSerializationMapNullKey() throws Exception {
    RowType rowType = (RowType) ROW(FIELD("nestedMap", MAP(STRING(), MAP(STRING(), INT())))).getLogicalType();
    // test data
    // use LinkedHashMap to make sure entries order
    Map<StringData, Integer> map = new LinkedHashMap<>();
    map.put(StringData.fromString("no-null key"), 1);
    map.put(StringData.fromString(null), 2);
    GenericMapData mapData = new GenericMapData(map);
    Map<StringData, GenericMapData> nestedMap = new LinkedHashMap<>();
    nestedMap.put(StringData.fromString("no-null key"), mapData);
    nestedMap.put(StringData.fromString(null), mapData);
    GenericMapData nestedMapData = new GenericMapData(nestedMap);
    GenericRowData rowData = new GenericRowData(1);
    rowData.setField(0, nestedMapData);
    JsonRowDataSerializationSchema serializationSchema1 = new JsonRowDataSerializationSchema(rowType, TimestampFormat.SQL, JsonFormatOptions.MapNullKeyMode.FAIL, "null", true);
    // expect message for serializationSchema1
    String errorMessage1 = "JSON format doesn't support to serialize map data with null keys." + " You can drop null key entries or encode null in literals by specifying map-null-key.mode option.";
    JsonRowDataSerializationSchema serializationSchema2 = new JsonRowDataSerializationSchema(rowType, TimestampFormat.SQL, JsonFormatOptions.MapNullKeyMode.DROP, "null", true);
    // expect result for serializationSchema2
    String expectResult2 = "{\"nestedMap\":{\"no-null key\":{\"no-null key\":1}}}";
    JsonRowDataSerializationSchema serializationSchema3 = new JsonRowDataSerializationSchema(rowType, TimestampFormat.SQL, JsonFormatOptions.MapNullKeyMode.LITERAL, "nullKey", true);
    // expect result for serializationSchema3
    String expectResult3 = "{\"nestedMap\":{\"no-null key\":{\"no-null key\":1,\"nullKey\":2},\"nullKey\":{\"no-null key\":1,\"nullKey\":2}}}";
    try {
        // throw exception when mapNullKey Mode is fail
        serializationSchema1.serialize(rowData);
        Assert.fail("expecting exception message: " + errorMessage1);
    } catch (Throwable t) {
        assertThat(t, FlinkMatchers.containsMessage(errorMessage1));
    }
    // mapNullKey Mode is drop
    byte[] actual2 = serializationSchema2.serialize(rowData);
    assertEquals(expectResult2, new String(actual2));
    // mapNullKey Mode is literal
    byte[] actual3 = serializationSchema3.serialize(rowData);
    assertEquals(expectResult3, new String(actual3));
}
Also used : GenericMapData(org.apache.flink.table.data.GenericMapData) RowType(org.apache.flink.table.types.logical.RowType) GenericRowData(org.apache.flink.table.data.GenericRowData) StringData(org.apache.flink.table.data.StringData) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.Test)

Example 30 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class JsonRowDataSerDeSchemaTest method testSerializationDecimalEncode.

@Test
public void testSerializationDecimalEncode() throws Exception {
    RowType schema = (RowType) ROW(FIELD("decimal1", DECIMAL(9, 6)), FIELD("decimal2", DECIMAL(20, 0)), FIELD("decimal3", DECIMAL(11, 9))).getLogicalType();
    TypeInformation<RowData> resultTypeInfo = InternalTypeInfo.of(schema);
    JsonRowDataDeserializationSchema deserializer = new JsonRowDataDeserializationSchema(schema, resultTypeInfo, false, false, TimestampFormat.ISO_8601);
    JsonRowDataSerializationSchema plainDecimalSerializer = new JsonRowDataSerializationSchema(schema, TimestampFormat.ISO_8601, JsonFormatOptions.MapNullKeyMode.LITERAL, "null", true);
    JsonRowDataSerializationSchema scientificDecimalSerializer = new JsonRowDataSerializationSchema(schema, TimestampFormat.ISO_8601, JsonFormatOptions.MapNullKeyMode.LITERAL, "null", false);
    String plainDecimalJson = "{\"decimal1\":123.456789,\"decimal2\":454621864049246170,\"decimal3\":0.000000027}";
    RowData rowData = deserializer.deserialize(plainDecimalJson.getBytes());
    String plainDecimalResult = new String(plainDecimalSerializer.serialize(rowData));
    assertEquals(plainDecimalJson, plainDecimalResult);
    String scientificDecimalJson = "{\"decimal1\":123.456789,\"decimal2\":4.5462186404924617E+17,\"decimal3\":2.7E-8}";
    String scientificDecimalResult = new String(scientificDecimalSerializer.serialize(rowData));
    assertEquals(scientificDecimalJson, scientificDecimalResult);
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) RowType(org.apache.flink.table.types.logical.RowType) Test(org.junit.Test)

Aggregations

RowType (org.apache.flink.table.types.logical.RowType)212 RowData (org.apache.flink.table.data.RowData)108 LogicalType (org.apache.flink.table.types.logical.LogicalType)59 DataType (org.apache.flink.table.types.DataType)57 Transformation (org.apache.flink.api.dag.Transformation)50 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)46 TableException (org.apache.flink.table.api.TableException)37 Test (org.junit.Test)36 GenericRowData (org.apache.flink.table.data.GenericRowData)33 ArrayList (java.util.ArrayList)28 List (java.util.List)28 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)26 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)25 CodeGeneratorContext (org.apache.flink.table.planner.codegen.CodeGeneratorContext)22 TableConfig (org.apache.flink.table.api.TableConfig)19 ArrayType (org.apache.flink.table.types.logical.ArrayType)19 TimestampType (org.apache.flink.table.types.logical.TimestampType)19 DecimalType (org.apache.flink.table.types.logical.DecimalType)17 Collections (java.util.Collections)16 AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)16