Search in sources :

Example 1 with RichInputFormat

use of org.apache.flink.api.common.io.RichInputFormat in project flink by apache.

the class InputFormatSourceFunction method run.

@Override
public void run(SourceContext<OUT> ctx) throws Exception {
    try {
        Counter completedSplitsCounter = getRuntimeContext().getMetricGroup().counter("numSplitsProcessed");
        if (isRunning && format instanceof RichInputFormat) {
            ((RichInputFormat) format).openInputFormat();
        }
        OUT nextElement = serializer.createInstance();
        while (isRunning) {
            format.open(splitIterator.next());
            while (isRunning && !format.reachedEnd()) {
                nextElement = format.nextRecord(nextElement);
                if (nextElement != null) {
                    ctx.collect(nextElement);
                } else {
                    break;
                }
            }
            format.close();
            completedSplitsCounter.inc();
            if (isRunning) {
                isRunning = splitIterator.hasNext();
            }
        }
    } finally {
        format.close();
        if (format instanceof RichInputFormat) {
            ((RichInputFormat) format).closeInputFormat();
        }
        isRunning = false;
    }
}
Also used : Counter(org.apache.flink.metrics.Counter) RichInputFormat(org.apache.flink.api.common.io.RichInputFormat)

Example 2 with RichInputFormat

use of org.apache.flink.api.common.io.RichInputFormat in project flink by apache.

the class GenericDataSourceBase method executeOnCollections.

// --------------------------------------------------------------------------------------------
protected List<OUT> executeOnCollections(RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
    @SuppressWarnings("unchecked") InputFormat<OUT, InputSplit> inputFormat = (InputFormat<OUT, InputSplit>) this.formatWrapper.getUserCodeObject();
    //configure the input format
    inputFormat.configure(this.parameters);
    //open the input format
    if (inputFormat instanceof RichInputFormat) {
        ((RichInputFormat) inputFormat).setRuntimeContext(ctx);
        ((RichInputFormat) inputFormat).openInputFormat();
    }
    List<OUT> result = new ArrayList<OUT>();
    // splits
    InputSplit[] splits = inputFormat.createInputSplits(1);
    TypeSerializer<OUT> serializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);
    for (InputSplit split : splits) {
        inputFormat.open(split);
        while (!inputFormat.reachedEnd()) {
            OUT next = inputFormat.nextRecord(serializer.createInstance());
            if (next != null) {
                result.add(serializer.copy(next));
            }
        }
        inputFormat.close();
    }
    //close the input format
    if (inputFormat instanceof RichInputFormat) {
        ((RichInputFormat) inputFormat).closeInputFormat();
    }
    return result;
}
Also used : RichInputFormat(org.apache.flink.api.common.io.RichInputFormat) InputFormat(org.apache.flink.api.common.io.InputFormat) RichInputFormat(org.apache.flink.api.common.io.RichInputFormat) ArrayList(java.util.ArrayList) InputSplit(org.apache.flink.core.io.InputSplit)

Example 3 with RichInputFormat

use of org.apache.flink.api.common.io.RichInputFormat in project flink by apache.

the class InputFormatSourceFunction method open.

@Override
@SuppressWarnings("unchecked")
public void open(Configuration parameters) throws Exception {
    StreamingRuntimeContext context = (StreamingRuntimeContext) getRuntimeContext();
    if (format instanceof RichInputFormat) {
        ((RichInputFormat) format).setRuntimeContext(context);
    }
    format.configure(parameters);
    provider = context.getInputSplitProvider();
    serializer = typeInfo.createSerializer(getRuntimeContext().getExecutionConfig());
    splitIterator = getInputSplits();
    isRunning = splitIterator.hasNext();
}
Also used : StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) RichInputFormat(org.apache.flink.api.common.io.RichInputFormat)

Example 4 with RichInputFormat

use of org.apache.flink.api.common.io.RichInputFormat in project flink by apache.

the class DataSourceTask method invoke.

@Override
public void invoke() throws Exception {
    // --------------------------------------------------------------------
    // Initialize
    // --------------------------------------------------------------------
    initInputFormat();
    LOG.debug(getLogString("Start registering input and output"));
    try {
        initOutputs(getUserCodeClassLoader());
    } catch (Exception ex) {
        throw new RuntimeException("The initialization of the DataSource's outputs caused an error: " + ex.getMessage(), ex);
    }
    LOG.debug(getLogString("Finished registering input and output"));
    // --------------------------------------------------------------------
    // Invoke
    // --------------------------------------------------------------------
    LOG.debug(getLogString("Starting data source operator"));
    RuntimeContext ctx = createRuntimeContext();
    Counter completedSplitsCounter = ctx.getMetricGroup().counter("numSplitsProcessed");
    ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().reuseInputMetricsForTask();
    Counter numRecordsOut = ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().getNumRecordsOutCounter();
    if (this.config.getNumberOfChainedStubs() == 0) {
        ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().reuseOutputMetricsForTask();
    }
    if (RichInputFormat.class.isAssignableFrom(this.format.getClass())) {
        ((RichInputFormat) this.format).setRuntimeContext(ctx);
        LOG.debug(getLogString("Rich Source detected. Initializing runtime context."));
        ((RichInputFormat) this.format).openInputFormat();
        LOG.debug(getLogString("Rich Source detected. Opening the InputFormat."));
    }
    ExecutionConfig executionConfig = getExecutionConfig();
    boolean objectReuseEnabled = executionConfig.isObjectReuseEnabled();
    LOG.debug("DataSourceTask object reuse: " + (objectReuseEnabled ? "ENABLED" : "DISABLED") + ".");
    final TypeSerializer<OT> serializer = this.serializerFactory.getSerializer();
    try {
        // start all chained tasks
        BatchTask.openChainedTasks(this.chainedTasks, this);
        // get input splits to read
        final Iterator<InputSplit> splitIterator = getInputSplits();
        // for each assigned input split
        while (!this.taskCanceled && splitIterator.hasNext()) {
            // get start and end
            final InputSplit split = splitIterator.next();
            LOG.debug(getLogString("Opening input split " + split.toString()));
            final InputFormat<OT, InputSplit> format = this.format;
            // open input format
            format.open(split);
            LOG.debug(getLogString("Starting to read input from split " + split.toString()));
            try {
                final Collector<OT> output = new CountingCollector<>(this.output, numRecordsOut);
                if (objectReuseEnabled) {
                    OT reuse = serializer.createInstance();
                    // as long as there is data to read
                    while (!this.taskCanceled && !format.reachedEnd()) {
                        OT returned;
                        if ((returned = format.nextRecord(reuse)) != null) {
                            output.collect(returned);
                        }
                    }
                } else {
                    // as long as there is data to read
                    while (!this.taskCanceled && !format.reachedEnd()) {
                        OT returned;
                        if ((returned = format.nextRecord(serializer.createInstance())) != null) {
                            output.collect(returned);
                        }
                    }
                }
                if (LOG.isDebugEnabled() && !this.taskCanceled) {
                    LOG.debug(getLogString("Closing input split " + split.toString()));
                }
            } finally {
                // close. We close here such that a regular close throwing an exception marks a task as failed.
                format.close();
            }
            completedSplitsCounter.inc();
        }
        // end for all input splits
        // close the collector. if it is a chaining task collector, it will close its chained tasks
        this.output.close();
        // close all chained tasks letting them report failure
        BatchTask.closeChainedTasks(this.chainedTasks, this);
    } catch (Exception ex) {
        // close the input, but do not report any exceptions, since we already have another root cause
        try {
            this.format.close();
        } catch (Throwable ignored) {
        }
        BatchTask.cancelChainedTasks(this.chainedTasks);
        ex = ExceptionInChainedStubException.exceptionUnwrap(ex);
        if (ex instanceof CancelTaskException) {
            // forward canceling exception
            throw ex;
        } else if (!this.taskCanceled) {
            // drop exception, if the task was canceled
            BatchTask.logAndThrowException(ex, this);
        }
    } finally {
        BatchTask.clearWriters(eventualOutputs);
        // --------------------------------------------------------------------
        if (this.format != null && RichInputFormat.class.isAssignableFrom(this.format.getClass())) {
            ((RichInputFormat) this.format).closeInputFormat();
            LOG.debug(getLogString("Rich Source detected. Closing the InputFormat."));
        }
    }
    if (!this.taskCanceled) {
        LOG.debug(getLogString("Finished data source operator"));
    } else {
        LOG.debug(getLogString("Data source operator cancelled"));
    }
}
Also used : RichInputFormat(org.apache.flink.api.common.io.RichInputFormat) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ExceptionInChainedStubException(org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException) NoSuchElementException(java.util.NoSuchElementException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) InputSplitProviderException(org.apache.flink.runtime.jobgraph.tasks.InputSplitProviderException) CountingCollector(org.apache.flink.runtime.operators.util.metrics.CountingCollector) Counter(org.apache.flink.metrics.Counter) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) InputSplit(org.apache.flink.core.io.InputSplit)

Aggregations

RichInputFormat (org.apache.flink.api.common.io.RichInputFormat)4 InputSplit (org.apache.flink.core.io.InputSplit)2 Counter (org.apache.flink.metrics.Counter)2 ArrayList (java.util.ArrayList)1 NoSuchElementException (java.util.NoSuchElementException)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)1 InputFormat (org.apache.flink.api.common.io.InputFormat)1 CancelTaskException (org.apache.flink.runtime.execution.CancelTaskException)1 InputSplitProviderException (org.apache.flink.runtime.jobgraph.tasks.InputSplitProviderException)1 ExceptionInChainedStubException (org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException)1 CountingCollector (org.apache.flink.runtime.operators.util.metrics.CountingCollector)1 StreamingRuntimeContext (org.apache.flink.streaming.api.operators.StreamingRuntimeContext)1