Search in sources :

Example 1 with DataDistribution

use of org.apache.flink.api.common.distributions.DataDistribution in project flink by apache.

the class RequestedGlobalPropertiesFilteringTest method testRangePartitioningPreserved3.

@Test
public void testRangePartitioningPreserved3() {
    SingleInputSemanticProperties sProp = new SingleInputSemanticProperties();
    SemanticPropUtil.getSemanticPropsSingleFromString(sProp, new String[] { "7->3;1->1;2->6" }, null, null, tupleInfo, tupleInfo);
    DataDistribution dd = new MockDistribution();
    Ordering o = new Ordering();
    o.appendOrdering(3, LongValue.class, Order.DESCENDING);
    o.appendOrdering(1, IntValue.class, Order.ASCENDING);
    o.appendOrdering(6, ByteValue.class, Order.DESCENDING);
    RequestedGlobalProperties rgProps = new RequestedGlobalProperties();
    rgProps.setRangePartitioned(o, dd);
    RequestedGlobalProperties filtered = rgProps.filterBySemanticProperties(sProp, 0);
    assertNotNull(filtered);
    assertEquals(PartitioningProperty.RANGE_PARTITIONED, filtered.getPartitioning());
    assertNotNull(filtered.getOrdering());
    assertEquals(3, filtered.getOrdering().getNumberOfFields());
    assertEquals(7, filtered.getOrdering().getFieldNumber(0).intValue());
    assertEquals(1, filtered.getOrdering().getFieldNumber(1).intValue());
    assertEquals(2, filtered.getOrdering().getFieldNumber(2).intValue());
    assertEquals(LongValue.class, filtered.getOrdering().getType(0));
    assertEquals(IntValue.class, filtered.getOrdering().getType(1));
    assertEquals(ByteValue.class, filtered.getOrdering().getType(2));
    assertEquals(Order.DESCENDING, filtered.getOrdering().getOrder(0));
    assertEquals(Order.ASCENDING, filtered.getOrdering().getOrder(1));
    assertEquals(Order.DESCENDING, filtered.getOrdering().getOrder(2));
    assertNotNull(filtered.getDataDistribution());
    assertEquals(dd, filtered.getDataDistribution());
    assertNull(filtered.getPartitionedFields());
    assertNull(filtered.getCustomPartitioner());
}
Also used : DataDistribution(org.apache.flink.api.common.distributions.DataDistribution) Ordering(org.apache.flink.api.common.operators.Ordering) SingleInputSemanticProperties(org.apache.flink.api.common.operators.SingleInputSemanticProperties) Test(org.junit.Test)

Example 2 with DataDistribution

use of org.apache.flink.api.common.distributions.DataDistribution in project flink by apache.

the class JobGraphGenerator method connectJobVertices.

// ------------------------------------------------------------------------
// Connecting Vertices
// ------------------------------------------------------------------------
/**
	 * NOTE: The channel for global and local strategies are different if we connect a union. The global strategy
	 * channel is then the channel into the union node, the local strategy channel the one from the union to the
	 * actual target operator.
	 *
	 * @throws CompilerException
	 */
private DistributionPattern connectJobVertices(Channel channel, int inputNumber, final JobVertex sourceVertex, final TaskConfig sourceConfig, final JobVertex targetVertex, final TaskConfig targetConfig, boolean isBroadcast) throws CompilerException {
    // ------------ connect the vertices to the job graph --------------
    final DistributionPattern distributionPattern;
    switch(channel.getShipStrategy()) {
        case FORWARD:
            distributionPattern = DistributionPattern.POINTWISE;
            break;
        case PARTITION_RANDOM:
        case BROADCAST:
        case PARTITION_HASH:
        case PARTITION_CUSTOM:
        case PARTITION_RANGE:
        case PARTITION_FORCED_REBALANCE:
            distributionPattern = DistributionPattern.ALL_TO_ALL;
            break;
        default:
            throw new RuntimeException("Unknown runtime ship strategy: " + channel.getShipStrategy());
    }
    final ResultPartitionType resultType;
    switch(channel.getDataExchangeMode()) {
        case PIPELINED:
            resultType = ResultPartitionType.PIPELINED;
            break;
        case BATCH:
            // BLOCKING results are currently not supported in closed loop iterations
            //
            // See https://issues.apache.org/jira/browse/FLINK-1713 for details
            resultType = channel.getSource().isOnDynamicPath() ? ResultPartitionType.PIPELINED : ResultPartitionType.BLOCKING;
            break;
        case PIPELINE_WITH_BATCH_FALLBACK:
            throw new UnsupportedOperationException("Data exchange mode " + channel.getDataExchangeMode() + " currently not supported.");
        default:
            throw new UnsupportedOperationException("Unknown data exchange mode.");
    }
    JobEdge edge = targetVertex.connectNewDataSetAsInput(sourceVertex, distributionPattern, resultType);
    // -------------- configure the source task's ship strategy strategies in task config --------------
    final int outputIndex = sourceConfig.getNumOutputs();
    sourceConfig.addOutputShipStrategy(channel.getShipStrategy());
    if (outputIndex == 0) {
        sourceConfig.setOutputSerializer(channel.getSerializer());
    }
    if (channel.getShipStrategyComparator() != null) {
        sourceConfig.setOutputComparator(channel.getShipStrategyComparator(), outputIndex);
    }
    if (channel.getShipStrategy() == ShipStrategyType.PARTITION_RANGE) {
        final DataDistribution dataDistribution = channel.getDataDistribution();
        if (dataDistribution != null) {
            sourceConfig.setOutputDataDistribution(dataDistribution, outputIndex);
        } else {
            throw new RuntimeException("Range partitioning requires data distribution.");
        }
    }
    if (channel.getShipStrategy() == ShipStrategyType.PARTITION_CUSTOM) {
        if (channel.getPartitioner() != null) {
            sourceConfig.setOutputPartitioner(channel.getPartitioner(), outputIndex);
        } else {
            throw new CompilerException("The ship strategy was set to custom partitioning, but no partitioner was set.");
        }
    }
    // ---------------- configure the receiver -------------------
    if (isBroadcast) {
        targetConfig.addBroadcastInputToGroup(inputNumber);
    } else {
        targetConfig.addInputToGroup(inputNumber);
    }
    // ---------------- attach the additional infos to the job edge -------------------
    String shipStrategy = JsonMapper.getShipStrategyString(channel.getShipStrategy());
    if (channel.getShipStrategyKeys() != null && channel.getShipStrategyKeys().size() > 0) {
        shipStrategy += " on " + (channel.getShipStrategySortOrder() == null ? channel.getShipStrategyKeys().toString() : Utils.createOrdering(channel.getShipStrategyKeys(), channel.getShipStrategySortOrder()).toString());
    }
    String localStrategy;
    if (channel.getLocalStrategy() == null || channel.getLocalStrategy() == LocalStrategy.NONE) {
        localStrategy = null;
    } else {
        localStrategy = JsonMapper.getLocalStrategyString(channel.getLocalStrategy());
        if (localStrategy != null && channel.getLocalStrategyKeys() != null && channel.getLocalStrategyKeys().size() > 0) {
            localStrategy += " on " + (channel.getLocalStrategySortOrder() == null ? channel.getLocalStrategyKeys().toString() : Utils.createOrdering(channel.getLocalStrategyKeys(), channel.getLocalStrategySortOrder()).toString());
        }
    }
    String caching = channel.getTempMode() == TempMode.NONE ? null : channel.getTempMode().toString();
    edge.setShipStrategyName(shipStrategy);
    edge.setPreProcessingOperationName(localStrategy);
    edge.setOperatorLevelCachingDescription(caching);
    return distributionPattern;
}
Also used : ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) DataDistribution(org.apache.flink.api.common.distributions.DataDistribution) JobEdge(org.apache.flink.runtime.jobgraph.JobEdge) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) CompilerException(org.apache.flink.optimizer.CompilerException)

Example 3 with DataDistribution

use of org.apache.flink.api.common.distributions.DataDistribution in project flink by apache.

the class BatchTask method getOutputCollector.

// --------------------------------------------------------------------------------------------
//                             Result Shipping and Chained Tasks
// --------------------------------------------------------------------------------------------
/**
	 * Creates the {@link Collector} for the given task, as described by the given configuration. The
	 * output collector contains the writers that forward the data to the different tasks that the given task
	 * is connected to. Each writer applies the partitioning as described in the configuration.
	 *
	 * @param task The task that the output collector is created for.
	 * @param config The configuration describing the output shipping strategies.
	 * @param cl The classloader used to load user defined types.
	 * @param eventualOutputs The output writers that this task forwards to the next task for each output.
	 * @param outputOffset The offset to start to get the writers for the outputs
	 * @param numOutputs The number of outputs described in the configuration.
	 *
	 * @return The OutputCollector that data produced in this task is submitted to.
	 */
public static <T> Collector<T> getOutputCollector(AbstractInvokable task, TaskConfig config, ClassLoader cl, List<RecordWriter<?>> eventualOutputs, int outputOffset, int numOutputs) throws Exception {
    if (numOutputs == 0) {
        return null;
    }
    // get the factory for the serializer
    final TypeSerializerFactory<T> serializerFactory = config.getOutputSerializer(cl);
    final List<RecordWriter<SerializationDelegate<T>>> writers = new ArrayList<>(numOutputs);
    // create a writer for each output
    for (int i = 0; i < numOutputs; i++) {
        // create the OutputEmitter from output ship strategy
        final ShipStrategyType strategy = config.getOutputShipStrategy(i);
        final int indexInSubtaskGroup = task.getIndexInSubtaskGroup();
        final TypeComparatorFactory<T> compFactory = config.getOutputComparator(i, cl);
        final ChannelSelector<SerializationDelegate<T>> oe;
        if (compFactory == null) {
            oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup);
        } else {
            final DataDistribution dataDist = config.getOutputDataDistribution(i, cl);
            final Partitioner<?> partitioner = config.getOutputPartitioner(i, cl);
            final TypeComparator<T> comparator = compFactory.createComparator();
            oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup, comparator, partitioner, dataDist);
        }
        final RecordWriter<SerializationDelegate<T>> recordWriter = new RecordWriter<SerializationDelegate<T>>(task.getEnvironment().getWriter(outputOffset + i), oe);
        recordWriter.setMetricGroup(task.getEnvironment().getMetricGroup().getIOMetricGroup());
        writers.add(recordWriter);
    }
    if (eventualOutputs != null) {
        eventualOutputs.addAll(writers);
    }
    return new OutputCollector<T>(writers, serializerFactory.getSerializer());
}
Also used : OutputCollector(org.apache.flink.runtime.operators.shipping.OutputCollector) ArrayList(java.util.ArrayList) SerializationDelegate(org.apache.flink.runtime.plugable.SerializationDelegate) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) RecordWriter(org.apache.flink.runtime.io.network.api.writer.RecordWriter) DataDistribution(org.apache.flink.api.common.distributions.DataDistribution)

Example 4 with DataDistribution

use of org.apache.flink.api.common.distributions.DataDistribution in project flink by apache.

the class TaskConfig method getOutputDataDistribution.

public DataDistribution getOutputDataDistribution(int outputNum, final ClassLoader cl) throws ClassNotFoundException {
    final String className = this.config.getString(OUTPUT_DATA_DISTRIBUTION_CLASS, null);
    if (className == null) {
        return null;
    }
    final Class<? extends DataDistribution> clazz;
    try {
        clazz = Class.forName(className, true, cl).asSubclass(DataDistribution.class);
    } catch (ClassCastException ccex) {
        throw new CorruptConfigurationException("The class noted in the configuration as the data distribution " + "is no subclass of DataDistribution.");
    }
    final DataDistribution distribution = InstantiationUtil.instantiate(clazz, DataDistribution.class);
    final byte[] stateEncoded = this.config.getBytes(OUTPUT_DATA_DISTRIBUTION_PREFIX + outputNum, null);
    if (stateEncoded == null) {
        throw new CorruptConfigurationException("The configuration contained the data distribution type, but no serialized state.");
    }
    final ByteArrayInputStream bais = new ByteArrayInputStream(stateEncoded);
    final DataInputViewStreamWrapper in = new DataInputViewStreamWrapper(bais);
    try {
        distribution.read(in);
        return distribution;
    } catch (Exception ex) {
        throw new RuntimeException("The deserialization of the encoded data distribution state caused an error" + (ex.getMessage() == null ? "." : ": " + ex.getMessage()), ex);
    }
}
Also used : DataDistribution(org.apache.flink.api.common.distributions.DataDistribution) ByteArrayInputStream(java.io.ByteArrayInputStream) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) IOException(java.io.IOException)

Aggregations

DataDistribution (org.apache.flink.api.common.distributions.DataDistribution)4 ByteArrayInputStream (java.io.ByteArrayInputStream)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Ordering (org.apache.flink.api.common.operators.Ordering)1 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)1 DataInputViewStreamWrapper (org.apache.flink.core.memory.DataInputViewStreamWrapper)1 CompilerException (org.apache.flink.optimizer.CompilerException)1 RecordWriter (org.apache.flink.runtime.io.network.api.writer.RecordWriter)1 ResultPartitionType (org.apache.flink.runtime.io.network.partition.ResultPartitionType)1 DistributionPattern (org.apache.flink.runtime.jobgraph.DistributionPattern)1 JobEdge (org.apache.flink.runtime.jobgraph.JobEdge)1 OutputCollector (org.apache.flink.runtime.operators.shipping.OutputCollector)1 ShipStrategyType (org.apache.flink.runtime.operators.shipping.ShipStrategyType)1 SerializationDelegate (org.apache.flink.runtime.plugable.SerializationDelegate)1 Test (org.junit.Test)1