use of org.apache.flink.runtime.io.network.api.writer.RecordWriterBuilder in project flink by apache.
the class BatchTask method getOutputCollector.
// --------------------------------------------------------------------------------------------
// Result Shipping and Chained Tasks
// --------------------------------------------------------------------------------------------
/**
* Creates the {@link Collector} for the given task, as described by the given configuration.
* The output collector contains the writers that forward the data to the different tasks that
* the given task is connected to. Each writer applies the partitioning as described in the
* configuration.
*
* @param task The task that the output collector is created for.
* @param config The configuration describing the output shipping strategies.
* @param cl The classloader used to load user defined types.
* @param eventualOutputs The output writers that this task forwards to the next task for each
* output.
* @param outputOffset The offset to start to get the writers for the outputs
* @param numOutputs The number of outputs described in the configuration.
* @return The OutputCollector that data produced in this task is submitted to.
*/
public static <T> Collector<T> getOutputCollector(AbstractInvokable task, TaskConfig config, ClassLoader cl, List<RecordWriter<?>> eventualOutputs, int outputOffset, int numOutputs) throws Exception {
if (numOutputs == 0) {
return null;
}
// get the factory for the serializer
final TypeSerializerFactory<T> serializerFactory = config.getOutputSerializer(cl);
final List<RecordWriter<SerializationDelegate<T>>> writers = new ArrayList<>(numOutputs);
// create a writer for each output
for (int i = 0; i < numOutputs; i++) {
// create the OutputEmitter from output ship strategy
final ShipStrategyType strategy = config.getOutputShipStrategy(i);
final int indexInSubtaskGroup = task.getIndexInSubtaskGroup();
final TypeComparatorFactory<T> compFactory = config.getOutputComparator(i, cl);
final ChannelSelector<SerializationDelegate<T>> oe;
if (compFactory == null) {
oe = new OutputEmitter<>(strategy, indexInSubtaskGroup);
} else {
final DataDistribution dataDist = config.getOutputDataDistribution(i, cl);
final Partitioner<?> partitioner = config.getOutputPartitioner(i, cl);
final TypeComparator<T> comparator = compFactory.createComparator();
oe = new OutputEmitter<>(strategy, indexInSubtaskGroup, comparator, partitioner, dataDist);
}
final RecordWriter<SerializationDelegate<T>> recordWriter = new RecordWriterBuilder().setChannelSelector(oe).setTaskName(task.getEnvironment().getTaskInfo().getTaskNameWithSubtasks()).build(task.getEnvironment().getWriter(outputOffset + i));
recordWriter.setMetricGroup(task.getEnvironment().getMetricGroup().getIOMetricGroup());
writers.add(recordWriter);
}
if (eventualOutputs != null) {
eventualOutputs.addAll(writers);
}
return new OutputCollector<>(writers, serializerFactory.getSerializer());
}
use of org.apache.flink.runtime.io.network.api.writer.RecordWriterBuilder in project flink by apache.
the class StreamTask method createRecordWriter.
@SuppressWarnings("unchecked")
private static <OUT> RecordWriter<SerializationDelegate<StreamRecord<OUT>>> createRecordWriter(StreamEdge edge, int outputIndex, Environment environment, String taskNameWithSubtask, long bufferTimeout) {
StreamPartitioner<OUT> outputPartitioner = null;
// like the case of https://issues.apache.org/jira/browse/FLINK-14087.
try {
outputPartitioner = InstantiationUtil.clone((StreamPartitioner<OUT>) edge.getPartitioner(), environment.getUserCodeClassLoader().asClassLoader());
} catch (Exception e) {
ExceptionUtils.rethrow(e);
}
LOG.debug("Using partitioner {} for output {} of task {}", outputPartitioner, outputIndex, taskNameWithSubtask);
ResultPartitionWriter bufferWriter = environment.getWriter(outputIndex);
// we initialize the partitioner here with the number of key groups (aka max. parallelism)
if (outputPartitioner instanceof ConfigurableStreamPartitioner) {
int numKeyGroups = bufferWriter.getNumTargetKeyGroups();
if (0 < numKeyGroups) {
((ConfigurableStreamPartitioner) outputPartitioner).configure(numKeyGroups);
}
}
RecordWriter<SerializationDelegate<StreamRecord<OUT>>> output = new RecordWriterBuilder<SerializationDelegate<StreamRecord<OUT>>>().setChannelSelector(outputPartitioner).setTimeout(bufferTimeout).setTaskName(taskNameWithSubtask).build(bufferWriter);
output.setMetricGroup(environment.getMetricGroup().getIOMetricGroup());
return output;
}
use of org.apache.flink.runtime.io.network.api.writer.RecordWriterBuilder in project flink by apache.
the class StreamNetworkPointToPointBenchmark method setUp.
/**
* Initializes the throughput benchmark with the given parameters.
*
* @param flushTimeout output flushing interval of the {@link
* org.apache.flink.runtime.io.network.api.writer.RecordWriter}'s output flusher thread
*/
public void setUp(long flushTimeout, Configuration config) throws Exception {
environment = new StreamNetworkBenchmarkEnvironment<>();
environment.setUp(1, 1, false, -1, -1, config);
ResultPartitionWriter resultPartitionWriter = environment.createResultPartitionWriter(0);
recordWriter = new RecordWriterBuilder().setTimeout(flushTimeout).build(resultPartitionWriter);
receiver = environment.createReceiver();
}
use of org.apache.flink.runtime.io.network.api.writer.RecordWriterBuilder in project flink by apache.
the class StreamNetworkThroughputBenchmark method setUp.
/**
* Initializes the throughput benchmark with the given parameters.
*
* @param recordWriters number of senders, i.e. {@link
* org.apache.flink.runtime.io.network.api.writer.RecordWriter} instances
* @param channels number of outgoing channels / receivers
*/
public void setUp(int recordWriters, int channels, int flushTimeout, boolean broadcastMode, boolean localMode, int senderBufferPoolSize, int receiverBufferPoolSize, Configuration config) throws Exception {
environment = new StreamNetworkBenchmarkEnvironment<>();
environment.setUp(recordWriters, channels, localMode, senderBufferPoolSize, receiverBufferPoolSize, config);
writerThreads = new LongRecordWriterThread[recordWriters];
for (int writer = 0; writer < recordWriters; writer++) {
ResultPartitionWriter resultPartitionWriter = environment.createResultPartitionWriter(writer);
RecordWriterBuilder recordWriterBuilder = new RecordWriterBuilder().setTimeout(flushTimeout);
setChannelSelector(recordWriterBuilder, broadcastMode);
writerThreads[writer] = new LongRecordWriterThread(recordWriterBuilder.build(resultPartitionWriter), broadcastMode);
writerThreads[writer].start();
}
receiver = environment.createReceiver();
}
use of org.apache.flink.runtime.io.network.api.writer.RecordWriterBuilder in project flink by apache.
the class IterationHeadTask method initOutputs.
@Override
protected void initOutputs() throws Exception {
// initialize the regular outputs first (the ones into the step function).
super.initOutputs();
// at this time, the outputs to the step function are created
// add the outputs for the final solution
List<RecordWriter<?>> finalOutputWriters = new ArrayList<RecordWriter<?>>();
final TaskConfig finalOutConfig = this.config.getIterationHeadFinalOutputConfig();
final ClassLoader userCodeClassLoader = getUserCodeClassLoader();
this.finalOutputCollector = BatchTask.getOutputCollector(this, finalOutConfig, userCodeClassLoader, finalOutputWriters, config.getNumOutputs(), finalOutConfig.getNumOutputs());
// sanity check the setup
final int writersIntoStepFunction = this.eventualOutputs.size();
final int writersIntoFinalResult = finalOutputWriters.size();
final int syncGateIndex = this.config.getIterationHeadIndexOfSyncOutput();
if (writersIntoStepFunction + writersIntoFinalResult != syncGateIndex) {
throw new Exception("Error: Inconsistent head task setup - wrong mapping of output gates.");
}
// now, we can instantiate the sync gate
this.toSync = new RecordWriterBuilder<>().build(getEnvironment().getWriter(syncGateIndex));
this.toSyncPartitionId = getEnvironment().getWriter(syncGateIndex).getPartitionId();
}
Aggregations