use of org.apache.flink.api.common.distributions.DataDistribution in project flink by apache.
the class RequestedGlobalPropertiesFilteringTest method testRangePartitioningPreserved3.
@Test
public void testRangePartitioningPreserved3() {
SingleInputSemanticProperties sProp = new SingleInputSemanticProperties();
SemanticPropUtil.getSemanticPropsSingleFromString(sProp, new String[] { "7->3;1->1;2->6" }, null, null, tupleInfo, tupleInfo);
DataDistribution dd = new MockDistribution();
Ordering o = new Ordering();
o.appendOrdering(3, LongValue.class, Order.DESCENDING);
o.appendOrdering(1, IntValue.class, Order.ASCENDING);
o.appendOrdering(6, ByteValue.class, Order.DESCENDING);
RequestedGlobalProperties rgProps = new RequestedGlobalProperties();
rgProps.setRangePartitioned(o, dd);
RequestedGlobalProperties filtered = rgProps.filterBySemanticProperties(sProp, 0);
assertNotNull(filtered);
assertEquals(PartitioningProperty.RANGE_PARTITIONED, filtered.getPartitioning());
assertNotNull(filtered.getOrdering());
assertEquals(3, filtered.getOrdering().getNumberOfFields());
assertEquals(7, filtered.getOrdering().getFieldNumber(0).intValue());
assertEquals(1, filtered.getOrdering().getFieldNumber(1).intValue());
assertEquals(2, filtered.getOrdering().getFieldNumber(2).intValue());
assertEquals(LongValue.class, filtered.getOrdering().getType(0));
assertEquals(IntValue.class, filtered.getOrdering().getType(1));
assertEquals(ByteValue.class, filtered.getOrdering().getType(2));
assertEquals(Order.DESCENDING, filtered.getOrdering().getOrder(0));
assertEquals(Order.ASCENDING, filtered.getOrdering().getOrder(1));
assertEquals(Order.DESCENDING, filtered.getOrdering().getOrder(2));
assertNotNull(filtered.getDataDistribution());
assertEquals(dd, filtered.getDataDistribution());
assertNull(filtered.getPartitionedFields());
assertNull(filtered.getCustomPartitioner());
}
use of org.apache.flink.api.common.distributions.DataDistribution in project flink by apache.
the class JobGraphGenerator method connectJobVertices.
// ------------------------------------------------------------------------
// Connecting Vertices
// ------------------------------------------------------------------------
/**
* NOTE: The channel for global and local strategies are different if we connect a union. The global strategy
* channel is then the channel into the union node, the local strategy channel the one from the union to the
* actual target operator.
*
* @throws CompilerException
*/
private DistributionPattern connectJobVertices(Channel channel, int inputNumber, final JobVertex sourceVertex, final TaskConfig sourceConfig, final JobVertex targetVertex, final TaskConfig targetConfig, boolean isBroadcast) throws CompilerException {
// ------------ connect the vertices to the job graph --------------
final DistributionPattern distributionPattern;
switch(channel.getShipStrategy()) {
case FORWARD:
distributionPattern = DistributionPattern.POINTWISE;
break;
case PARTITION_RANDOM:
case BROADCAST:
case PARTITION_HASH:
case PARTITION_CUSTOM:
case PARTITION_RANGE:
case PARTITION_FORCED_REBALANCE:
distributionPattern = DistributionPattern.ALL_TO_ALL;
break;
default:
throw new RuntimeException("Unknown runtime ship strategy: " + channel.getShipStrategy());
}
final ResultPartitionType resultType;
switch(channel.getDataExchangeMode()) {
case PIPELINED:
resultType = ResultPartitionType.PIPELINED;
break;
case BATCH:
// BLOCKING results are currently not supported in closed loop iterations
//
// See https://issues.apache.org/jira/browse/FLINK-1713 for details
resultType = channel.getSource().isOnDynamicPath() ? ResultPartitionType.PIPELINED : ResultPartitionType.BLOCKING;
break;
case PIPELINE_WITH_BATCH_FALLBACK:
throw new UnsupportedOperationException("Data exchange mode " + channel.getDataExchangeMode() + " currently not supported.");
default:
throw new UnsupportedOperationException("Unknown data exchange mode.");
}
JobEdge edge = targetVertex.connectNewDataSetAsInput(sourceVertex, distributionPattern, resultType);
// -------------- configure the source task's ship strategy strategies in task config --------------
final int outputIndex = sourceConfig.getNumOutputs();
sourceConfig.addOutputShipStrategy(channel.getShipStrategy());
if (outputIndex == 0) {
sourceConfig.setOutputSerializer(channel.getSerializer());
}
if (channel.getShipStrategyComparator() != null) {
sourceConfig.setOutputComparator(channel.getShipStrategyComparator(), outputIndex);
}
if (channel.getShipStrategy() == ShipStrategyType.PARTITION_RANGE) {
final DataDistribution dataDistribution = channel.getDataDistribution();
if (dataDistribution != null) {
sourceConfig.setOutputDataDistribution(dataDistribution, outputIndex);
} else {
throw new RuntimeException("Range partitioning requires data distribution.");
}
}
if (channel.getShipStrategy() == ShipStrategyType.PARTITION_CUSTOM) {
if (channel.getPartitioner() != null) {
sourceConfig.setOutputPartitioner(channel.getPartitioner(), outputIndex);
} else {
throw new CompilerException("The ship strategy was set to custom partitioning, but no partitioner was set.");
}
}
// ---------------- configure the receiver -------------------
if (isBroadcast) {
targetConfig.addBroadcastInputToGroup(inputNumber);
} else {
targetConfig.addInputToGroup(inputNumber);
}
// ---------------- attach the additional infos to the job edge -------------------
String shipStrategy = JsonMapper.getShipStrategyString(channel.getShipStrategy());
if (channel.getShipStrategyKeys() != null && channel.getShipStrategyKeys().size() > 0) {
shipStrategy += " on " + (channel.getShipStrategySortOrder() == null ? channel.getShipStrategyKeys().toString() : Utils.createOrdering(channel.getShipStrategyKeys(), channel.getShipStrategySortOrder()).toString());
}
String localStrategy;
if (channel.getLocalStrategy() == null || channel.getLocalStrategy() == LocalStrategy.NONE) {
localStrategy = null;
} else {
localStrategy = JsonMapper.getLocalStrategyString(channel.getLocalStrategy());
if (localStrategy != null && channel.getLocalStrategyKeys() != null && channel.getLocalStrategyKeys().size() > 0) {
localStrategy += " on " + (channel.getLocalStrategySortOrder() == null ? channel.getLocalStrategyKeys().toString() : Utils.createOrdering(channel.getLocalStrategyKeys(), channel.getLocalStrategySortOrder()).toString());
}
}
String caching = channel.getTempMode() == TempMode.NONE ? null : channel.getTempMode().toString();
edge.setShipStrategyName(shipStrategy);
edge.setPreProcessingOperationName(localStrategy);
edge.setOperatorLevelCachingDescription(caching);
return distributionPattern;
}
use of org.apache.flink.api.common.distributions.DataDistribution in project flink by apache.
the class BatchTask method getOutputCollector.
// --------------------------------------------------------------------------------------------
// Result Shipping and Chained Tasks
// --------------------------------------------------------------------------------------------
/**
* Creates the {@link Collector} for the given task, as described by the given configuration. The
* output collector contains the writers that forward the data to the different tasks that the given task
* is connected to. Each writer applies the partitioning as described in the configuration.
*
* @param task The task that the output collector is created for.
* @param config The configuration describing the output shipping strategies.
* @param cl The classloader used to load user defined types.
* @param eventualOutputs The output writers that this task forwards to the next task for each output.
* @param outputOffset The offset to start to get the writers for the outputs
* @param numOutputs The number of outputs described in the configuration.
*
* @return The OutputCollector that data produced in this task is submitted to.
*/
public static <T> Collector<T> getOutputCollector(AbstractInvokable task, TaskConfig config, ClassLoader cl, List<RecordWriter<?>> eventualOutputs, int outputOffset, int numOutputs) throws Exception {
if (numOutputs == 0) {
return null;
}
// get the factory for the serializer
final TypeSerializerFactory<T> serializerFactory = config.getOutputSerializer(cl);
final List<RecordWriter<SerializationDelegate<T>>> writers = new ArrayList<>(numOutputs);
// create a writer for each output
for (int i = 0; i < numOutputs; i++) {
// create the OutputEmitter from output ship strategy
final ShipStrategyType strategy = config.getOutputShipStrategy(i);
final int indexInSubtaskGroup = task.getIndexInSubtaskGroup();
final TypeComparatorFactory<T> compFactory = config.getOutputComparator(i, cl);
final ChannelSelector<SerializationDelegate<T>> oe;
if (compFactory == null) {
oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup);
} else {
final DataDistribution dataDist = config.getOutputDataDistribution(i, cl);
final Partitioner<?> partitioner = config.getOutputPartitioner(i, cl);
final TypeComparator<T> comparator = compFactory.createComparator();
oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup, comparator, partitioner, dataDist);
}
final RecordWriter<SerializationDelegate<T>> recordWriter = new RecordWriter<SerializationDelegate<T>>(task.getEnvironment().getWriter(outputOffset + i), oe);
recordWriter.setMetricGroup(task.getEnvironment().getMetricGroup().getIOMetricGroup());
writers.add(recordWriter);
}
if (eventualOutputs != null) {
eventualOutputs.addAll(writers);
}
return new OutputCollector<T>(writers, serializerFactory.getSerializer());
}
use of org.apache.flink.api.common.distributions.DataDistribution in project flink by apache.
the class TaskConfig method getOutputDataDistribution.
public DataDistribution getOutputDataDistribution(int outputNum, final ClassLoader cl) throws ClassNotFoundException {
final String className = this.config.getString(OUTPUT_DATA_DISTRIBUTION_CLASS, null);
if (className == null) {
return null;
}
final Class<? extends DataDistribution> clazz;
try {
clazz = Class.forName(className, true, cl).asSubclass(DataDistribution.class);
} catch (ClassCastException ccex) {
throw new CorruptConfigurationException("The class noted in the configuration as the data distribution " + "is no subclass of DataDistribution.");
}
final DataDistribution distribution = InstantiationUtil.instantiate(clazz, DataDistribution.class);
final byte[] stateEncoded = this.config.getBytes(OUTPUT_DATA_DISTRIBUTION_PREFIX + outputNum, null);
if (stateEncoded == null) {
throw new CorruptConfigurationException("The configuration contained the data distribution type, but no serialized state.");
}
final ByteArrayInputStream bais = new ByteArrayInputStream(stateEncoded);
final DataInputViewStreamWrapper in = new DataInputViewStreamWrapper(bais);
try {
distribution.read(in);
return distribution;
} catch (Exception ex) {
throw new RuntimeException("The deserialization of the encoded data distribution state caused an error" + (ex.getMessage() == null ? "." : ": " + ex.getMessage()), ex);
}
}
Aggregations