Search in sources :

Example 1 with StreamReadMonitoringFunction

use of org.apache.hudi.source.StreamReadMonitoringFunction in project hudi by apache.

the class HoodieTableSource method getScanRuntimeProvider.

@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
    return new DataStreamScanProvider() {

        @Override
        public boolean isBounded() {
            return !conf.getBoolean(FlinkOptions.READ_AS_STREAMING);
        }

        @Override
        public DataStream<RowData> produceDataStream(StreamExecutionEnvironment execEnv) {
            @SuppressWarnings("unchecked") TypeInformation<RowData> typeInfo = (TypeInformation<RowData>) TypeInfoDataTypeConverter.fromDataTypeToTypeInfo(getProducedDataType());
            if (conf.getBoolean(FlinkOptions.READ_AS_STREAMING)) {
                StreamReadMonitoringFunction monitoringFunction = new StreamReadMonitoringFunction(conf, FilePathUtils.toFlinkPath(path), maxCompactionMemoryInBytes, getRequiredPartitionPaths());
                InputFormat<RowData, ?> inputFormat = getInputFormat(true);
                OneInputStreamOperatorFactory<MergeOnReadInputSplit, RowData> factory = StreamReadOperator.factory((MergeOnReadInputFormat) inputFormat);
                SingleOutputStreamOperator<RowData> source = execEnv.addSource(monitoringFunction, getSourceOperatorName("split_monitor")).setParallelism(1).transform("split_reader", typeInfo, factory).setParallelism(conf.getInteger(FlinkOptions.READ_TASKS));
                return new DataStreamSource<>(source);
            } else {
                InputFormatSourceFunction<RowData> func = new InputFormatSourceFunction<>(getInputFormat(), typeInfo);
                DataStreamSource<RowData> source = execEnv.addSource(func, asSummaryString(), typeInfo);
                return source.name(getSourceOperatorName("bounded_source")).setParallelism(conf.getInteger(FlinkOptions.READ_TASKS));
            }
        }
    };
}
Also used : DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) InputFormatSourceFunction(org.apache.flink.streaming.api.functions.source.InputFormatSourceFunction) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) MergeOnReadInputSplit(org.apache.hudi.table.format.mor.MergeOnReadInputSplit) RowData(org.apache.flink.table.data.RowData) DataStreamScanProvider(org.apache.flink.table.connector.source.DataStreamScanProvider) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamReadMonitoringFunction(org.apache.hudi.source.StreamReadMonitoringFunction)

Aggregations

TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)1 DataStreamSource (org.apache.flink.streaming.api.datastream.DataStreamSource)1 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)1 InputFormatSourceFunction (org.apache.flink.streaming.api.functions.source.InputFormatSourceFunction)1 DataStreamScanProvider (org.apache.flink.table.connector.source.DataStreamScanProvider)1 RowData (org.apache.flink.table.data.RowData)1 StreamReadMonitoringFunction (org.apache.hudi.source.StreamReadMonitoringFunction)1 MergeOnReadInputSplit (org.apache.hudi.table.format.mor.MergeOnReadInputSplit)1