Search in sources :

Example 1 with ConstraintEnforcer

use of org.apache.flink.table.runtime.operators.sink.ConstraintEnforcer in project flink by apache.

the class CommonExecSink method applyConstraintValidations.

/**
 * Apply an operator to filter or report error to process not-null values for not-null fields.
 */
private Transformation<RowData> applyConstraintValidations(Transformation<RowData> inputTransform, ReadableConfig config, RowType physicalRowType) {
    final ConstraintEnforcer.Builder validatorBuilder = ConstraintEnforcer.newBuilder();
    final String[] fieldNames = physicalRowType.getFieldNames().toArray(new String[0]);
    // Build NOT NULL enforcer
    final int[] notNullFieldIndices = getNotNullFieldIndices(physicalRowType);
    if (notNullFieldIndices.length > 0) {
        final ExecutionConfigOptions.NotNullEnforcer notNullEnforcer = config.get(ExecutionConfigOptions.TABLE_EXEC_SINK_NOT_NULL_ENFORCER);
        final List<String> notNullFieldNames = Arrays.stream(notNullFieldIndices).mapToObj(idx -> fieldNames[idx]).collect(Collectors.toList());
        validatorBuilder.addNotNullConstraint(notNullEnforcer, notNullFieldIndices, notNullFieldNames, fieldNames);
    }
    final ExecutionConfigOptions.TypeLengthEnforcer typeLengthEnforcer = config.get(ExecutionConfigOptions.TABLE_EXEC_SINK_TYPE_LENGTH_ENFORCER);
    // Build CHAR/VARCHAR length enforcer
    final List<ConstraintEnforcer.FieldInfo> charFieldInfo = getFieldInfoForLengthEnforcer(physicalRowType, LengthEnforcerType.CHAR);
    if (!charFieldInfo.isEmpty()) {
        final List<String> charFieldNames = charFieldInfo.stream().map(cfi -> fieldNames[cfi.fieldIdx()]).collect(Collectors.toList());
        validatorBuilder.addCharLengthConstraint(typeLengthEnforcer, charFieldInfo, charFieldNames, fieldNames);
    }
    // Build BINARY/VARBINARY length enforcer
    final List<ConstraintEnforcer.FieldInfo> binaryFieldInfo = getFieldInfoForLengthEnforcer(physicalRowType, LengthEnforcerType.BINARY);
    if (!binaryFieldInfo.isEmpty()) {
        final List<String> binaryFieldNames = binaryFieldInfo.stream().map(cfi -> fieldNames[cfi.fieldIdx()]).collect(Collectors.toList());
        validatorBuilder.addBinaryLengthConstraint(typeLengthEnforcer, binaryFieldInfo, binaryFieldNames, fieldNames);
    }
    ConstraintEnforcer constraintEnforcer = validatorBuilder.build();
    if (constraintEnforcer != null) {
        return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(CONSTRAINT_VALIDATOR_TRANSFORMATION, constraintEnforcer.getOperatorName(), "ConstraintEnforcer", config), constraintEnforcer, getInputTypeInfo(), inputTransform.getParallelism());
    } else {
        // there are no not-null fields, just skip adding the enforcer operator
        return inputTransform;
    }
}
Also used : TransformationMetadata(org.apache.flink.table.planner.plan.nodes.exec.utils.TransformationMetadata) Arrays(java.util.Arrays) InputProperty(org.apache.flink.table.planner.plan.nodes.exec.InputProperty) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) ExecNode(org.apache.flink.table.planner.plan.nodes.exec.ExecNode) CharType(org.apache.flink.table.types.logical.CharType) KeySelectorUtil(org.apache.flink.table.planner.plan.utils.KeySelectorUtil) InternalSerializers(org.apache.flink.table.runtime.typeutils.InternalSerializers) ConstraintEnforcer(org.apache.flink.table.runtime.operators.sink.ConstraintEnforcer) OutputFormat(org.apache.flink.api.common.io.OutputFormat) SinkUpsertMaterializer(org.apache.flink.table.runtime.operators.sink.SinkUpsertMaterializer) PartitionTransformation(org.apache.flink.streaming.api.transformations.PartitionTransformation) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) StateConfigUtil(org.apache.flink.table.runtime.util.StateConfigUtil) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) Collectors(java.util.stream.Collectors) JsonProperty(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty) SimpleOperatorFactory(org.apache.flink.streaming.api.operators.SimpleOperatorFactory) List(java.util.List) InternalTypeInfo(org.apache.flink.table.runtime.typeutils.InternalTypeInfo) LogicalType(org.apache.flink.table.types.logical.LogicalType) DataStreamSinkProvider(org.apache.flink.table.connector.sink.DataStreamSinkProvider) LegacySinkTransformation(org.apache.flink.streaming.api.transformations.LegacySinkTransformation) Optional(java.util.Optional) ExecNodeBase(org.apache.flink.table.planner.plan.nodes.exec.ExecNodeBase) KeyGroupRangeAssignment(org.apache.flink.runtime.state.KeyGroupRangeAssignment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) SinkRuntimeProvider(org.apache.flink.table.connector.sink.DynamicTableSink.SinkRuntimeProvider) IntStream(java.util.stream.IntStream) BinaryType(org.apache.flink.table.types.logical.BinaryType) ParallelismProvider(org.apache.flink.table.connector.ParallelismProvider) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) MultipleTransformationTranslator(org.apache.flink.table.planner.plan.nodes.exec.MultipleTransformationTranslator) StreamRecordTimestampInserter(org.apache.flink.table.runtime.operators.sink.StreamRecordTimestampInserter) TransformationSinkProvider(org.apache.flink.table.planner.connectors.TransformationSinkProvider) RowType(org.apache.flink.table.types.logical.RowType) ArrayList(java.util.ArrayList) SinkV2Provider(org.apache.flink.table.connector.sink.SinkV2Provider) OutputFormatSinkFunction(org.apache.flink.streaming.api.functions.sink.OutputFormatSinkFunction) DynamicTableSinkSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.DynamicTableSinkSpec) ExecNodeUtil(org.apache.flink.table.planner.plan.nodes.exec.utils.ExecNodeUtil) ReadableConfig(org.apache.flink.configuration.ReadableConfig) SinkFunctionProvider(org.apache.flink.table.connector.sink.SinkFunctionProvider) DataStreamSink(org.apache.flink.streaming.api.datastream.DataStreamSink) ExecNodeContext(org.apache.flink.table.planner.plan.nodes.exec.ExecNodeContext) RowData(org.apache.flink.table.data.RowData) ProviderContext(org.apache.flink.table.connector.ProviderContext) SinkOperator(org.apache.flink.table.runtime.operators.sink.SinkOperator) TableException(org.apache.flink.table.api.TableException) SinkProvider(org.apache.flink.table.connector.sink.SinkProvider) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) DataStream(org.apache.flink.streaming.api.datastream.DataStream) OutputFormatProvider(org.apache.flink.table.connector.sink.OutputFormatProvider) KeyGroupStreamPartitioner(org.apache.flink.streaming.runtime.partitioner.KeyGroupStreamPartitioner) EqualiserCodeGenerator(org.apache.flink.table.planner.codegen.EqualiserCodeGenerator) SinkRuntimeProviderContext(org.apache.flink.table.runtime.connector.sink.SinkRuntimeProviderContext) RowKind(org.apache.flink.types.RowKind) StreamExecNode(org.apache.flink.table.planner.plan.nodes.exec.stream.StreamExecNode) GeneratedRecordEqualiser(org.apache.flink.table.runtime.generated.GeneratedRecordEqualiser) Transformation(org.apache.flink.api.dag.Transformation) InputTypeConfigurable(org.apache.flink.api.java.typeutils.InputTypeConfigurable) ExecutionConfigOptions(org.apache.flink.table.api.config.ExecutionConfigOptions) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot) LogicalTypeChecks(org.apache.flink.table.types.logical.utils.LogicalTypeChecks) ExecutionConfigOptions(org.apache.flink.table.api.config.ExecutionConfigOptions) ConstraintEnforcer(org.apache.flink.table.runtime.operators.sink.ConstraintEnforcer)

Aggregations

ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 List (java.util.List)1 Optional (java.util.Optional)1 Collectors (java.util.stream.Collectors)1 IntStream (java.util.stream.IntStream)1 OutputFormat (org.apache.flink.api.common.io.OutputFormat)1 Transformation (org.apache.flink.api.dag.Transformation)1 InputTypeConfigurable (org.apache.flink.api.java.typeutils.InputTypeConfigurable)1 ReadableConfig (org.apache.flink.configuration.ReadableConfig)1 KeyGroupRangeAssignment (org.apache.flink.runtime.state.KeyGroupRangeAssignment)1 JsonProperty (org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty)1 DataStream (org.apache.flink.streaming.api.datastream.DataStream)1 DataStreamSink (org.apache.flink.streaming.api.datastream.DataStreamSink)1 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)1 OutputFormatSinkFunction (org.apache.flink.streaming.api.functions.sink.OutputFormatSinkFunction)1 SinkFunction (org.apache.flink.streaming.api.functions.sink.SinkFunction)1 SimpleOperatorFactory (org.apache.flink.streaming.api.operators.SimpleOperatorFactory)1 LegacySinkTransformation (org.apache.flink.streaming.api.transformations.LegacySinkTransformation)1 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)1