Search in sources :

Example 1 with ScanTableSource

use of org.apache.flink.table.connector.source.ScanTableSource in project flink by apache.

the class DynamicSourceUtils method prepareDynamicSource.

/**
 * Prepares the given {@link DynamicTableSource}. It check whether the source is compatible with
 * the given schema and applies initial parameters.
 */
public static void prepareDynamicSource(String tableDebugName, ResolvedCatalogTable table, DynamicTableSource source, boolean isBatchMode, ReadableConfig config) {
    final ResolvedSchema schema = table.getResolvedSchema();
    validateAndApplyMetadata(tableDebugName, schema, source);
    if (source instanceof ScanTableSource) {
        validateScanSource(tableDebugName, schema, (ScanTableSource) source, isBatchMode, config);
    }
// lookup table source is validated in LookupJoin node
}
Also used : ScanTableSource(org.apache.flink.table.connector.source.ScanTableSource) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema)

Example 2 with ScanTableSource

use of org.apache.flink.table.connector.source.ScanTableSource in project flink by apache.

the class CommonExecTableSourceScan method translateToPlanInternal.

@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final StreamExecutionEnvironment env = planner.getExecEnv();
    final TransformationMetadata meta = createTransformationMeta(SOURCE_TRANSFORMATION, config);
    final InternalTypeInfo<RowData> outputTypeInfo = InternalTypeInfo.of((RowType) getOutputType());
    final ScanTableSource tableSource = tableSourceSpec.getScanTableSource(planner.getFlinkContext());
    ScanTableSource.ScanRuntimeProvider provider = tableSource.getScanRuntimeProvider(ScanRuntimeProviderContext.INSTANCE);
    if (provider instanceof SourceFunctionProvider) {
        final SourceFunctionProvider sourceFunctionProvider = (SourceFunctionProvider) provider;
        final SourceFunction<RowData> function = sourceFunctionProvider.createSourceFunction();
        final Transformation<RowData> transformation = createSourceFunctionTransformation(env, function, sourceFunctionProvider.isBounded(), meta.getName(), outputTypeInfo);
        return meta.fill(transformation);
    } else if (provider instanceof InputFormatProvider) {
        final InputFormat<RowData, ?> inputFormat = ((InputFormatProvider) provider).createInputFormat();
        final Transformation<RowData> transformation = createInputFormatTransformation(env, inputFormat, outputTypeInfo, meta.getName());
        return meta.fill(transformation);
    } else if (provider instanceof SourceProvider) {
        final Source<RowData, ?, ?> source = ((SourceProvider) provider).createSource();
        // TODO: Push down watermark strategy to source scan
        final Transformation<RowData> transformation = env.fromSource(source, WatermarkStrategy.noWatermarks(), meta.getName(), outputTypeInfo).getTransformation();
        return meta.fill(transformation);
    } else if (provider instanceof DataStreamScanProvider) {
        Transformation<RowData> transformation = ((DataStreamScanProvider) provider).produceDataStream(createProviderContext(), env).getTransformation();
        meta.fill(transformation);
        transformation.setOutputType(outputTypeInfo);
        return transformation;
    } else if (provider instanceof TransformationScanProvider) {
        final Transformation<RowData> transformation = ((TransformationScanProvider) provider).createTransformation(createProviderContext());
        meta.fill(transformation);
        transformation.setOutputType(outputTypeInfo);
        return transformation;
    } else {
        throw new UnsupportedOperationException(provider.getClass().getSimpleName() + " is unsupported now.");
    }
}
Also used : TransformationMetadata(org.apache.flink.table.planner.plan.nodes.exec.utils.TransformationMetadata) LegacySourceTransformation(org.apache.flink.streaming.api.transformations.LegacySourceTransformation) Transformation(org.apache.flink.api.dag.Transformation) TransformationScanProvider(org.apache.flink.table.planner.connectors.TransformationScanProvider) InputFormatProvider(org.apache.flink.table.connector.source.InputFormatProvider) SourceFunctionProvider(org.apache.flink.table.connector.source.SourceFunctionProvider) SourceProvider(org.apache.flink.table.connector.source.SourceProvider) ScanTableSource(org.apache.flink.table.connector.source.ScanTableSource) RowData(org.apache.flink.table.data.RowData) InputFormat(org.apache.flink.api.common.io.InputFormat) DataStreamScanProvider(org.apache.flink.table.connector.source.DataStreamScanProvider) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 3 with ScanTableSource

use of org.apache.flink.table.connector.source.ScanTableSource in project flink by apache.

the class DynamicSourceUtils method validateScanSourceForStreaming.

private static void validateScanSourceForStreaming(String tableDebugName, ResolvedSchema schema, ScanTableSource scanSource, ChangelogMode changelogMode, ReadableConfig config) {
    // sanity check for produced ChangelogMode
    final boolean hasUpdateBefore = changelogMode.contains(RowKind.UPDATE_BEFORE);
    final boolean hasUpdateAfter = changelogMode.contains(RowKind.UPDATE_AFTER);
    if (!hasUpdateBefore && hasUpdateAfter) {
        // only UPDATE_AFTER
        if (!schema.getPrimaryKey().isPresent()) {
            throw new TableException(String.format("Table '%s' produces a changelog stream that contains UPDATE_AFTER but no UPDATE_BEFORE. " + "This requires defining a primary key constraint on the table.", tableDebugName));
        }
    } else if (hasUpdateBefore && !hasUpdateAfter) {
        // only UPDATE_BEFORE
        throw new ValidationException(String.format("Invalid source for table '%s'. A %s doesn't support a changelog stream that contains " + "UPDATE_BEFORE but no UPDATE_AFTER. Please adapt the implementation of class '%s'.", tableDebugName, ScanTableSource.class.getSimpleName(), scanSource.getClass().getName()));
    } else if (!changelogMode.containsOnly(RowKind.INSERT)) {
        // CDC mode (non-upsert mode and non-insert-only mode)
        final boolean changeEventsDuplicate = config.get(ExecutionConfigOptions.TABLE_EXEC_SOURCE_CDC_EVENTS_DUPLICATE);
        if (changeEventsDuplicate && !schema.getPrimaryKey().isPresent()) {
            throw new TableException(String.format("Configuration '%s' is enabled which requires the changelog sources to define a PRIMARY KEY. " + "However, table '%s' doesn't have a primary key.", ExecutionConfigOptions.TABLE_EXEC_SOURCE_CDC_EVENTS_DUPLICATE.key(), tableDebugName));
        }
    }
}
Also used : ScanTableSource(org.apache.flink.table.connector.source.ScanTableSource) TableException(org.apache.flink.table.api.TableException) ValidationException(org.apache.flink.table.api.ValidationException)

Example 4 with ScanTableSource

use of org.apache.flink.table.connector.source.ScanTableSource in project flink by apache.

the class DynamicSourceUtils method isUpsertSource.

/**
 * Returns true if the table is an upsert source.
 */
public static boolean isUpsertSource(ResolvedSchema resolvedSchema, DynamicTableSource tableSource) {
    if (!(tableSource instanceof ScanTableSource)) {
        return false;
    }
    ChangelogMode mode = ((ScanTableSource) tableSource).getChangelogMode();
    boolean isUpsertMode = mode.contains(RowKind.UPDATE_AFTER) && !mode.contains(RowKind.UPDATE_BEFORE);
    boolean hasPrimaryKey = resolvedSchema.getPrimaryKey().isPresent();
    return isUpsertMode && hasPrimaryKey;
}
Also used : ScanTableSource(org.apache.flink.table.connector.source.ScanTableSource) ChangelogMode(org.apache.flink.table.connector.ChangelogMode)

Example 5 with ScanTableSource

use of org.apache.flink.table.connector.source.ScanTableSource in project flink by apache.

the class DynamicSourceUtils method isSourceChangeEventsDuplicate.

/**
 * Returns true if the table source produces duplicate change events.
 */
public static boolean isSourceChangeEventsDuplicate(ResolvedSchema resolvedSchema, DynamicTableSource tableSource, TableConfig tableConfig) {
    if (!(tableSource instanceof ScanTableSource)) {
        return false;
    }
    ChangelogMode mode = ((ScanTableSource) tableSource).getChangelogMode();
    boolean isCDCSource = !mode.containsOnly(RowKind.INSERT) && !isUpsertSource(resolvedSchema, tableSource);
    boolean changeEventsDuplicate = tableConfig.getConfiguration().getBoolean(ExecutionConfigOptions.TABLE_EXEC_SOURCE_CDC_EVENTS_DUPLICATE);
    boolean hasPrimaryKey = resolvedSchema.getPrimaryKey().isPresent();
    return isCDCSource && changeEventsDuplicate && hasPrimaryKey;
}
Also used : ScanTableSource(org.apache.flink.table.connector.source.ScanTableSource) ChangelogMode(org.apache.flink.table.connector.ChangelogMode)

Aggregations

ScanTableSource (org.apache.flink.table.connector.source.ScanTableSource)5 ChangelogMode (org.apache.flink.table.connector.ChangelogMode)2 InputFormat (org.apache.flink.api.common.io.InputFormat)1 Transformation (org.apache.flink.api.dag.Transformation)1 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)1 LegacySourceTransformation (org.apache.flink.streaming.api.transformations.LegacySourceTransformation)1 TableException (org.apache.flink.table.api.TableException)1 ValidationException (org.apache.flink.table.api.ValidationException)1 ResolvedSchema (org.apache.flink.table.catalog.ResolvedSchema)1 DataStreamScanProvider (org.apache.flink.table.connector.source.DataStreamScanProvider)1 InputFormatProvider (org.apache.flink.table.connector.source.InputFormatProvider)1 SourceFunctionProvider (org.apache.flink.table.connector.source.SourceFunctionProvider)1 SourceProvider (org.apache.flink.table.connector.source.SourceProvider)1 RowData (org.apache.flink.table.data.RowData)1 TransformationScanProvider (org.apache.flink.table.planner.connectors.TransformationScanProvider)1 TransformationMetadata (org.apache.flink.table.planner.plan.nodes.exec.utils.TransformationMetadata)1