Search in sources :

Example 16 with CountingCollector

use of org.apache.flink.runtime.operators.util.metrics.CountingCollector in project flink by apache.

the class CrossDriver method runStreamedOuterFirst.

private void runStreamedOuterFirst() throws Exception {
    if (LOG.isDebugEnabled()) {
        LOG.debug(this.taskContext.formatLogString("Running Cross with Nested-Loops: " + "First input is outer side, second input is inner (spilling) side."));
    }
    final Counter numRecordsIn = taskContext.getMetricGroup().getIOMetricGroup().getNumRecordsInCounter();
    final Counter numRecordsOut = taskContext.getMetricGroup().getIOMetricGroup().getNumRecordsOutCounter();
    final MutableObjectIterator<T1> in1 = new CountingMutableObjectIterator<>(this.taskContext.<T1>getInput(0), numRecordsIn);
    final MutableObjectIterator<T2> in2 = new CountingMutableObjectIterator<>(this.taskContext.<T2>getInput(1), numRecordsIn);
    final TypeSerializer<T1> serializer1 = this.taskContext.<T1>getInputSerializer(0).getSerializer();
    final TypeSerializer<T2> serializer2 = this.taskContext.<T2>getInputSerializer(1).getSerializer();
    final SpillingResettableMutableObjectIterator<T2> spillVals = new SpillingResettableMutableObjectIterator<T2>(in2, serializer2, this.memManager, this.taskContext.getIOManager(), this.memPagesForSpillingSide, this.taskContext.getContainingTask());
    this.spillIter = spillVals;
    final CrossFunction<T1, T2, OT> crosser = this.taskContext.getStub();
    final Collector<OT> collector = new CountingCollector<>(this.taskContext.getOutputCollector(), numRecordsOut);
    if (objectReuseEnabled) {
        final T1 val1Reuse = serializer1.createInstance();
        final T2 val2Reuse = serializer2.createInstance();
        T1 val1;
        T2 val2;
        // for all blocks
        while (this.running && ((val1 = in1.next(val1Reuse)) != null)) {
            // for all values from the spilling side
            while (this.running && ((val2 = spillVals.next(val2Reuse)) != null)) {
                collector.collect(crosser.cross(val1, val2));
            }
            spillVals.reset();
        }
    } else {
        T1 val1;
        T2 val2;
        // for all blocks
        while (this.running && ((val1 = in1.next()) != null)) {
            // for all values from the spilling side
            while (this.running && ((val2 = spillVals.next()) != null)) {
                collector.collect(crosser.cross(serializer1.copy(val1), val2));
            }
            spillVals.reset();
        }
    }
}
Also used : SpillingResettableMutableObjectIterator(org.apache.flink.runtime.operators.resettable.SpillingResettableMutableObjectIterator) CountingCollector(org.apache.flink.runtime.operators.util.metrics.CountingCollector) Counter(org.apache.flink.metrics.Counter) CountingMutableObjectIterator(org.apache.flink.runtime.operators.util.metrics.CountingMutableObjectIterator)

Example 17 with CountingCollector

use of org.apache.flink.runtime.operators.util.metrics.CountingCollector in project flink by apache.

the class DataSourceTask method invoke.

@Override
public void invoke() throws Exception {
    // --------------------------------------------------------------------
    // Initialize
    // --------------------------------------------------------------------
    initInputFormat();
    LOG.debug(getLogString("Start registering input and output"));
    try {
        initOutputs(getUserCodeClassLoader());
    } catch (Exception ex) {
        throw new RuntimeException("The initialization of the DataSource's outputs caused an error: " + ex.getMessage(), ex);
    }
    LOG.debug(getLogString("Finished registering input and output"));
    // --------------------------------------------------------------------
    // Invoke
    // --------------------------------------------------------------------
    LOG.debug(getLogString("Starting data source operator"));
    RuntimeContext ctx = createRuntimeContext();
    Counter completedSplitsCounter = ctx.getMetricGroup().counter("numSplitsProcessed");
    ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().reuseInputMetricsForTask();
    Counter numRecordsOut = ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().getNumRecordsOutCounter();
    if (this.config.getNumberOfChainedStubs() == 0) {
        ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().reuseOutputMetricsForTask();
    }
    if (RichInputFormat.class.isAssignableFrom(this.format.getClass())) {
        ((RichInputFormat) this.format).setRuntimeContext(ctx);
        LOG.debug(getLogString("Rich Source detected. Initializing runtime context."));
        ((RichInputFormat) this.format).openInputFormat();
        LOG.debug(getLogString("Rich Source detected. Opening the InputFormat."));
    }
    ExecutionConfig executionConfig = getExecutionConfig();
    boolean objectReuseEnabled = executionConfig.isObjectReuseEnabled();
    LOG.debug("DataSourceTask object reuse: " + (objectReuseEnabled ? "ENABLED" : "DISABLED") + ".");
    final TypeSerializer<OT> serializer = this.serializerFactory.getSerializer();
    try {
        // start all chained tasks
        BatchTask.openChainedTasks(this.chainedTasks, this);
        // get input splits to read
        final Iterator<InputSplit> splitIterator = getInputSplits();
        // for each assigned input split
        while (!this.taskCanceled && splitIterator.hasNext()) {
            // get start and end
            final InputSplit split = splitIterator.next();
            LOG.debug(getLogString("Opening input split " + split.toString()));
            final InputFormat<OT, InputSplit> format = this.format;
            // open input format
            format.open(split);
            LOG.debug(getLogString("Starting to read input from split " + split.toString()));
            try {
                final Collector<OT> output = new CountingCollector<>(this.output, numRecordsOut);
                if (objectReuseEnabled) {
                    OT reuse = serializer.createInstance();
                    // as long as there is data to read
                    while (!this.taskCanceled && !format.reachedEnd()) {
                        OT returned;
                        if ((returned = format.nextRecord(reuse)) != null) {
                            output.collect(returned);
                        }
                    }
                } else {
                    // as long as there is data to read
                    while (!this.taskCanceled && !format.reachedEnd()) {
                        OT returned;
                        if ((returned = format.nextRecord(serializer.createInstance())) != null) {
                            output.collect(returned);
                        }
                    }
                }
                if (LOG.isDebugEnabled() && !this.taskCanceled) {
                    LOG.debug(getLogString("Closing input split " + split.toString()));
                }
            } finally {
                // close. We close here such that a regular close throwing an exception marks a task as failed.
                format.close();
            }
            completedSplitsCounter.inc();
        }
        // end for all input splits
        // close the collector. if it is a chaining task collector, it will close its chained tasks
        this.output.close();
        // close all chained tasks letting them report failure
        BatchTask.closeChainedTasks(this.chainedTasks, this);
    } catch (Exception ex) {
        // close the input, but do not report any exceptions, since we already have another root cause
        try {
            this.format.close();
        } catch (Throwable ignored) {
        }
        BatchTask.cancelChainedTasks(this.chainedTasks);
        ex = ExceptionInChainedStubException.exceptionUnwrap(ex);
        if (ex instanceof CancelTaskException) {
            // forward canceling exception
            throw ex;
        } else if (!this.taskCanceled) {
            // drop exception, if the task was canceled
            BatchTask.logAndThrowException(ex, this);
        }
    } finally {
        BatchTask.clearWriters(eventualOutputs);
        // --------------------------------------------------------------------
        if (this.format != null && RichInputFormat.class.isAssignableFrom(this.format.getClass())) {
            ((RichInputFormat) this.format).closeInputFormat();
            LOG.debug(getLogString("Rich Source detected. Closing the InputFormat."));
        }
    }
    if (!this.taskCanceled) {
        LOG.debug(getLogString("Finished data source operator"));
    } else {
        LOG.debug(getLogString("Data source operator cancelled"));
    }
}
Also used : RichInputFormat(org.apache.flink.api.common.io.RichInputFormat) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ExceptionInChainedStubException(org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException) NoSuchElementException(java.util.NoSuchElementException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) InputSplitProviderException(org.apache.flink.runtime.jobgraph.tasks.InputSplitProviderException) CountingCollector(org.apache.flink.runtime.operators.util.metrics.CountingCollector) Counter(org.apache.flink.metrics.Counter) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) InputSplit(org.apache.flink.core.io.InputSplit)

Example 18 with CountingCollector

use of org.apache.flink.runtime.operators.util.metrics.CountingCollector in project flink by apache.

the class FlatMapDriver method run.

@Override
public void run() throws Exception {
    final Counter numRecordsIn = this.taskContext.getMetricGroup().getIOMetricGroup().getNumRecordsInCounter();
    final Counter numRecordsOut = this.taskContext.getMetricGroup().getIOMetricGroup().getNumRecordsOutCounter();
    // cache references on the stack
    final MutableObjectIterator<IT> input = this.taskContext.getInput(0);
    final FlatMapFunction<IT, OT> function = this.taskContext.getStub();
    final Collector<OT> output = new CountingCollector<>(this.taskContext.getOutputCollector(), numRecordsOut);
    if (objectReuseEnabled) {
        IT record = this.taskContext.<IT>getInputSerializer(0).getSerializer().createInstance();
        while (this.running && ((record = input.next(record)) != null)) {
            numRecordsIn.inc();
            function.flatMap(record, output);
        }
    } else {
        IT record;
        while (this.running && ((record = input.next()) != null)) {
            numRecordsIn.inc();
            function.flatMap(record, output);
        }
    }
}
Also used : CountingCollector(org.apache.flink.runtime.operators.util.metrics.CountingCollector) Counter(org.apache.flink.metrics.Counter)

Aggregations

Counter (org.apache.flink.metrics.Counter)18 CountingCollector (org.apache.flink.runtime.operators.util.metrics.CountingCollector)18 CountingMutableObjectIterator (org.apache.flink.runtime.operators.util.metrics.CountingMutableObjectIterator)6 SpillingResettableMutableObjectIterator (org.apache.flink.runtime.operators.resettable.SpillingResettableMutableObjectIterator)4 BlockResettableMutableObjectIterator (org.apache.flink.runtime.operators.resettable.BlockResettableMutableObjectIterator)2 NonReusingMutableToRegularIteratorWrapper (org.apache.flink.runtime.util.NonReusingMutableToRegularIteratorWrapper)2 ReusingMutableToRegularIteratorWrapper (org.apache.flink.runtime.util.ReusingMutableToRegularIteratorWrapper)2 NoSuchElementException (java.util.NoSuchElementException)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)1 RichInputFormat (org.apache.flink.api.common.io.RichInputFormat)1 InputSplit (org.apache.flink.core.io.InputSplit)1 CancelTaskException (org.apache.flink.runtime.execution.CancelTaskException)1 InputSplitProviderException (org.apache.flink.runtime.jobgraph.tasks.InputSplitProviderException)1 ExceptionInChainedStubException (org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException)1 NonReusingKeyGroupedIterator (org.apache.flink.runtime.util.NonReusingKeyGroupedIterator)1 ReusingKeyGroupedIterator (org.apache.flink.runtime.util.ReusingKeyGroupedIterator)1