use of org.apache.flink.runtime.operators.shipping.OutputCollector in project flink by apache.
the class BatchTask method initOutputs.
/**
* Creates a writer for each output. Creates an OutputCollector which forwards its input to all writers.
* The output collector applies the configured shipping strategy.
*/
@SuppressWarnings("unchecked")
public static <T> Collector<T> initOutputs(AbstractInvokable containingTask, ClassLoader cl, TaskConfig config, List<ChainedDriver<?, ?>> chainedTasksTarget, List<RecordWriter<?>> eventualOutputs, ExecutionConfig executionConfig, Map<String, Accumulator<?, ?>> accumulatorMap) throws Exception {
final int numOutputs = config.getNumOutputs();
// check whether we got any chained tasks
final int numChained = config.getNumberOfChainedStubs();
if (numChained > 0) {
// got chained stubs. that means that this one may only have a single forward connection
if (numOutputs != 1 || config.getOutputShipStrategy(0) != ShipStrategyType.FORWARD) {
throw new RuntimeException("Plan Generation Bug: Found a chained stub that is not connected via an only forward connection.");
}
// instantiate each task
@SuppressWarnings("rawtypes") Collector previous = null;
for (int i = numChained - 1; i >= 0; --i) {
// get the task first
final ChainedDriver<?, ?> ct;
try {
Class<? extends ChainedDriver<?, ?>> ctc = config.getChainedTask(i);
ct = ctc.newInstance();
} catch (Exception ex) {
throw new RuntimeException("Could not instantiate chained task driver.", ex);
}
// get the configuration for the task
final TaskConfig chainedStubConf = config.getChainedStubConfig(i);
final String taskName = config.getChainedTaskName(i);
if (i == numChained - 1) {
// last in chain, instantiate the output collector for this task
previous = getOutputCollector(containingTask, chainedStubConf, cl, eventualOutputs, 0, chainedStubConf.getNumOutputs());
}
ct.setup(chainedStubConf, taskName, previous, containingTask, cl, executionConfig, accumulatorMap);
chainedTasksTarget.add(0, ct);
if (i == numChained - 1) {
ct.getIOMetrics().reuseOutputMetricsForTask();
}
previous = ct;
}
// the collector of the first in the chain is the collector for the task
return (Collector<T>) previous;
}
// instantiate the output collector the default way from this configuration
return getOutputCollector(containingTask, config, cl, eventualOutputs, 0, numOutputs);
}
use of org.apache.flink.runtime.operators.shipping.OutputCollector in project flink by apache.
the class BatchTask method getOutputCollector.
// --------------------------------------------------------------------------------------------
// Result Shipping and Chained Tasks
// --------------------------------------------------------------------------------------------
/**
* Creates the {@link Collector} for the given task, as described by the given configuration.
* The output collector contains the writers that forward the data to the different tasks that
* the given task is connected to. Each writer applies the partitioning as described in the
* configuration.
*
* @param task The task that the output collector is created for.
* @param config The configuration describing the output shipping strategies.
* @param cl The classloader used to load user defined types.
* @param eventualOutputs The output writers that this task forwards to the next task for each
* output.
* @param outputOffset The offset to start to get the writers for the outputs
* @param numOutputs The number of outputs described in the configuration.
* @return The OutputCollector that data produced in this task is submitted to.
*/
public static <T> Collector<T> getOutputCollector(AbstractInvokable task, TaskConfig config, ClassLoader cl, List<RecordWriter<?>> eventualOutputs, int outputOffset, int numOutputs) throws Exception {
if (numOutputs == 0) {
return null;
}
// get the factory for the serializer
final TypeSerializerFactory<T> serializerFactory = config.getOutputSerializer(cl);
final List<RecordWriter<SerializationDelegate<T>>> writers = new ArrayList<>(numOutputs);
// create a writer for each output
for (int i = 0; i < numOutputs; i++) {
// create the OutputEmitter from output ship strategy
final ShipStrategyType strategy = config.getOutputShipStrategy(i);
final int indexInSubtaskGroup = task.getIndexInSubtaskGroup();
final TypeComparatorFactory<T> compFactory = config.getOutputComparator(i, cl);
final ChannelSelector<SerializationDelegate<T>> oe;
if (compFactory == null) {
oe = new OutputEmitter<>(strategy, indexInSubtaskGroup);
} else {
final DataDistribution dataDist = config.getOutputDataDistribution(i, cl);
final Partitioner<?> partitioner = config.getOutputPartitioner(i, cl);
final TypeComparator<T> comparator = compFactory.createComparator();
oe = new OutputEmitter<>(strategy, indexInSubtaskGroup, comparator, partitioner, dataDist);
}
final RecordWriter<SerializationDelegate<T>> recordWriter = new RecordWriterBuilder().setChannelSelector(oe).setTaskName(task.getEnvironment().getTaskInfo().getTaskNameWithSubtasks()).build(task.getEnvironment().getWriter(outputOffset + i));
recordWriter.setMetricGroup(task.getEnvironment().getMetricGroup().getIOMetricGroup());
writers.add(recordWriter);
}
if (eventualOutputs != null) {
eventualOutputs.addAll(writers);
}
return new OutputCollector<>(writers, serializerFactory.getSerializer());
}
use of org.apache.flink.runtime.operators.shipping.OutputCollector in project flink by apache.
the class BatchTask method initOutputs.
/**
* Creates a writer for each output. Creates an OutputCollector which forwards its input to all
* writers. The output collector applies the configured shipping strategy.
*/
@SuppressWarnings("unchecked")
public static <T> Collector<T> initOutputs(AbstractInvokable containingTask, UserCodeClassLoader cl, TaskConfig config, List<ChainedDriver<?, ?>> chainedTasksTarget, List<RecordWriter<?>> eventualOutputs, ExecutionConfig executionConfig, Map<String, Accumulator<?, ?>> accumulatorMap) throws Exception {
final int numOutputs = config.getNumOutputs();
// check whether we got any chained tasks
final int numChained = config.getNumberOfChainedStubs();
if (numChained > 0) {
// got chained stubs. that means that this one may only have a single forward connection
if (numOutputs != 1 || config.getOutputShipStrategy(0) != ShipStrategyType.FORWARD) {
throw new RuntimeException("Plan Generation Bug: Found a chained stub that is not connected via an only forward connection.");
}
// instantiate each task
@SuppressWarnings("rawtypes") Collector previous = null;
for (int i = numChained - 1; i >= 0; --i) {
// get the task first
final ChainedDriver<?, ?> ct;
try {
Class<? extends ChainedDriver<?, ?>> ctc = config.getChainedTask(i);
ct = ctc.newInstance();
} catch (Exception ex) {
throw new RuntimeException("Could not instantiate chained task driver.", ex);
}
// get the configuration for the task
final TaskConfig chainedStubConf = config.getChainedStubConfig(i);
final String taskName = config.getChainedTaskName(i);
if (i == numChained - 1) {
// last in chain, instantiate the output collector for this task
previous = getOutputCollector(containingTask, chainedStubConf, cl.asClassLoader(), eventualOutputs, 0, chainedStubConf.getNumOutputs());
}
ct.setup(chainedStubConf, taskName, previous, containingTask, cl, executionConfig, accumulatorMap);
chainedTasksTarget.add(0, ct);
if (i == numChained - 1) {
ct.getIOMetrics().reuseOutputMetricsForTask();
}
previous = ct;
}
// the collector of the first in the chain is the collector for the task
return (Collector<T>) previous;
}
// instantiate the output collector the default way from this configuration
return getOutputCollector(containingTask, config, cl.asClassLoader(), eventualOutputs, 0, numOutputs);
}
Aggregations