Search in sources :

Example 1 with ManagedMemoryUseCase

use of org.apache.flink.core.memory.ManagedMemoryUseCase in project flink by apache.

the class PythonOperatorChainingOptimizer method createChainedTransformation.

private static Transformation<?> createChainedTransformation(Transformation<?> upTransform, Transformation<?> downTransform) {
    final AbstractDataStreamPythonFunctionOperator<?> upOperator = (AbstractDataStreamPythonFunctionOperator<?>) ((SimpleOperatorFactory<?>) getOperatorFactory(upTransform)).getOperator();
    final PythonProcessOperator<?, ?> downOperator = (PythonProcessOperator<?, ?>) ((SimpleOperatorFactory<?>) getOperatorFactory(downTransform)).getOperator();
    final DataStreamPythonFunctionInfo upPythonFunctionInfo = upOperator.getPythonFunctionInfo().copy();
    final DataStreamPythonFunctionInfo downPythonFunctionInfo = downOperator.getPythonFunctionInfo().copy();
    DataStreamPythonFunctionInfo headPythonFunctionInfoOfDownOperator = downPythonFunctionInfo;
    while (headPythonFunctionInfoOfDownOperator.getInputs().length != 0) {
        headPythonFunctionInfoOfDownOperator = (DataStreamPythonFunctionInfo) headPythonFunctionInfoOfDownOperator.getInputs()[0];
    }
    headPythonFunctionInfoOfDownOperator.setInputs(new DataStreamPythonFunctionInfo[] { upPythonFunctionInfo });
    final AbstractDataStreamPythonFunctionOperator<?> chainedOperator = upOperator.copy(downPythonFunctionInfo, downOperator.getProducedType());
    // set partition custom flag
    chainedOperator.setContainsPartitionCustom(downOperator.containsPartitionCustom() || upOperator.containsPartitionCustom());
    PhysicalTransformation<?> chainedTransformation;
    if (upOperator instanceof AbstractOneInputPythonFunctionOperator) {
        chainedTransformation = new OneInputTransformation(upTransform.getInputs().get(0), upTransform.getName() + ", " + downTransform.getName(), (OneInputStreamOperator<?, ?>) chainedOperator, downTransform.getOutputType(), upTransform.getParallelism());
        ((OneInputTransformation<?, ?>) chainedTransformation).setStateKeySelector(((OneInputTransformation) upTransform).getStateKeySelector());
        ((OneInputTransformation<?, ?>) chainedTransformation).setStateKeyType(((OneInputTransformation<?, ?>) upTransform).getStateKeyType());
    } else {
        chainedTransformation = new TwoInputTransformation(upTransform.getInputs().get(0), upTransform.getInputs().get(1), upTransform.getName() + ", " + downTransform.getName(), (TwoInputStreamOperator<?, ?, ?>) chainedOperator, downTransform.getOutputType(), upTransform.getParallelism());
        ((TwoInputTransformation<?, ?, ?>) chainedTransformation).setStateKeySelectors(((TwoInputTransformation) upTransform).getStateKeySelector1(), ((TwoInputTransformation) upTransform).getStateKeySelector2());
        ((TwoInputTransformation<?, ?, ?>) chainedTransformation).setStateKeyType(((TwoInputTransformation<?, ?, ?>) upTransform).getStateKeyType());
    }
    chainedTransformation.setUid(upTransform.getUid());
    if (upTransform.getUserProvidedNodeHash() != null) {
        chainedTransformation.setUidHash(upTransform.getUserProvidedNodeHash());
    }
    for (ManagedMemoryUseCase useCase : upTransform.getManagedMemorySlotScopeUseCases()) {
        chainedTransformation.declareManagedMemoryUseCaseAtSlotScope(useCase);
    }
    for (ManagedMemoryUseCase useCase : downTransform.getManagedMemorySlotScopeUseCases()) {
        chainedTransformation.declareManagedMemoryUseCaseAtSlotScope(useCase);
    }
    for (Map.Entry<ManagedMemoryUseCase, Integer> useCase : upTransform.getManagedMemoryOperatorScopeUseCaseWeights().entrySet()) {
        chainedTransformation.declareManagedMemoryUseCaseAtOperatorScope(useCase.getKey(), useCase.getValue());
    }
    for (Map.Entry<ManagedMemoryUseCase, Integer> useCase : downTransform.getManagedMemoryOperatorScopeUseCaseWeights().entrySet()) {
        chainedTransformation.declareManagedMemoryUseCaseAtOperatorScope(useCase.getKey(), useCase.getValue() + chainedTransformation.getManagedMemoryOperatorScopeUseCaseWeights().getOrDefault(useCase.getKey(), 0));
    }
    chainedTransformation.setBufferTimeout(Math.min(upTransform.getBufferTimeout(), downTransform.getBufferTimeout()));
    if (upTransform.getMaxParallelism() > 0) {
        chainedTransformation.setMaxParallelism(upTransform.getMaxParallelism());
    }
    chainedTransformation.setChainingStrategy(getOperatorFactory(upTransform).getChainingStrategy());
    chainedTransformation.setCoLocationGroupKey(upTransform.getCoLocationGroupKey());
    chainedTransformation.setResources(upTransform.getMinResources().merge(downTransform.getMinResources()), upTransform.getPreferredResources().merge(downTransform.getPreferredResources()));
    if (upTransform.getSlotSharingGroup().isPresent()) {
        chainedTransformation.setSlotSharingGroup(upTransform.getSlotSharingGroup().get());
    }
    if (upTransform.getDescription() != null && downTransform.getDescription() != null) {
        chainedTransformation.setDescription(upTransform.getDescription() + ", " + downTransform.getDescription());
    } else if (upTransform.getDescription() != null) {
        chainedTransformation.setDescription(upTransform.getDescription());
    } else if (downTransform.getDescription() != null) {
        chainedTransformation.setDescription(downTransform.getDescription());
    }
    return chainedTransformation;
}
Also used : AbstractOneInputPythonFunctionOperator(org.apache.flink.streaming.api.operators.python.AbstractOneInputPythonFunctionOperator) DataStreamPythonFunctionInfo(org.apache.flink.streaming.api.functions.python.DataStreamPythonFunctionInfo) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) PythonProcessOperator(org.apache.flink.streaming.api.operators.python.PythonProcessOperator) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) HashMap(java.util.HashMap) Map(java.util.Map) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) AbstractDataStreamPythonFunctionOperator(org.apache.flink.streaming.api.operators.python.AbstractDataStreamPythonFunctionOperator)

Example 2 with ManagedMemoryUseCase

use of org.apache.flink.core.memory.ManagedMemoryUseCase in project flink by apache.

the class BatchExecutionUtils method applyBatchExecutionSettings.

static void applyBatchExecutionSettings(int transformationId, TransformationTranslator.Context context, StreamConfig.InputRequirement... inputRequirements) {
    StreamNode node = context.getStreamGraph().getStreamNode(transformationId);
    boolean sortInputs = context.getGraphGeneratorConfig().get(ExecutionOptions.SORT_INPUTS);
    boolean isInputSelectable = isInputSelectable(node);
    adjustChainingStrategy(node);
    checkState(!isInputSelectable || !sortInputs, "Batch state backend and sorting inputs are not supported in graphs with an InputSelectable operator.");
    if (sortInputs) {
        LOG.debug("Applying sorting/pass-through input requirements for operator {}.", node);
        for (int i = 0; i < inputRequirements.length; i++) {
            node.addInputRequirement(i, inputRequirements[i]);
        }
        Map<ManagedMemoryUseCase, Integer> operatorScopeUseCaseWeights = new HashMap<>();
        operatorScopeUseCaseWeights.put(ManagedMemoryUseCase.OPERATOR, deriveMemoryWeight(context.getGraphGeneratorConfig()));
        node.setManagedMemoryUseCaseWeights(operatorScopeUseCaseWeights, Collections.emptySet());
    }
}
Also used : HashMap(java.util.HashMap) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase)

Example 3 with ManagedMemoryUseCase

use of org.apache.flink.core.memory.ManagedMemoryUseCase in project flink by apache.

the class StreamingJobGraphGeneratorTest method createJobGraphForManagedMemoryFractionTest.

private JobGraph createJobGraphForManagedMemoryFractionTest(final List<ResourceSpec> resourceSpecs, final List<Map<ManagedMemoryUseCase, Integer>> operatorScopeUseCaseWeights, final List<Set<ManagedMemoryUseCase>> slotScopeUseCases) throws Exception {
    final Method opMethod = getSetResourcesMethodAndSetAccessible(SingleOutputStreamOperator.class);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    final DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {

        @Override
        public void run(SourceContext<Integer> ctx) {
        }

        @Override
        public void cancel() {
        }
    });
    opMethod.invoke(source, resourceSpecs.get(0));
    // CHAIN(source -> map1) in default slot sharing group
    final DataStream<Integer> map1 = source.map((MapFunction<Integer, Integer>) value -> value);
    opMethod.invoke(map1, resourceSpecs.get(1));
    // CHAIN(map2) in default slot sharing group
    final DataStream<Integer> map2 = map1.rebalance().map((MapFunction<Integer, Integer>) value -> value);
    opMethod.invoke(map2, resourceSpecs.get(2));
    // CHAIN(map3) in test slot sharing group
    final DataStream<Integer> map3 = map2.rebalance().map(value -> value).slotSharingGroup("test");
    opMethod.invoke(map3, resourceSpecs.get(3));
    declareManagedMemoryUseCaseForTranformation(source.getTransformation(), operatorScopeUseCaseWeights.get(0), slotScopeUseCases.get(0));
    declareManagedMemoryUseCaseForTranformation(map1.getTransformation(), operatorScopeUseCaseWeights.get(1), slotScopeUseCases.get(1));
    declareManagedMemoryUseCaseForTranformation(map2.getTransformation(), operatorScopeUseCaseWeights.get(2), slotScopeUseCases.get(2));
    declareManagedMemoryUseCaseForTranformation(map3.getTransformation(), operatorScopeUseCaseWeights.get(3), slotScopeUseCases.get(3));
    return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}
Also used : Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TypeSerializerInputFormat(org.apache.flink.api.java.io.TypeSerializerInputFormat) YieldingOperatorFactory(org.apache.flink.streaming.api.operators.YieldingOperatorFactory) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) UserCodeWrapper(org.apache.flink.api.common.operators.util.UserCodeWrapper) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) Map(java.util.Map) CoLocationGroup(org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) Set(java.util.Set) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Assert.assertFalse(org.junit.Assert.assertFalse) StreamingJobGraphGenerator.areOperatorsChainable(org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator.areOperatorsChainable) Boundedness(org.apache.flink.api.connector.source.Boundedness) OneInputStreamOperatorFactory(org.apache.flink.streaming.api.operators.OneInputStreamOperatorFactory) MultipleInputTransformation(org.apache.flink.streaming.api.transformations.MultipleInputTransformation) NumberSequenceSource(org.apache.flink.api.connector.source.lib.NumberSequenceSource) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) ArrayList(java.util.ArrayList) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) Collector(org.apache.flink.util.Collector) Iterables(org.apache.flink.shaded.guava30.com.google.common.collect.Iterables) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Types(org.apache.flink.api.common.typeinfo.Types) DataStreamSink(org.apache.flink.streaming.api.datastream.DataStreamSink) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) MailboxExecutor(org.apache.flink.api.common.operators.MailboxExecutor) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Assert.assertNotEquals(org.junit.Assert.assertNotEquals) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Assert.assertNull(org.junit.Assert.assertNull) Matcher(org.hamcrest.Matcher) Transformation(org.apache.flink.api.dag.Transformation) Assert(org.junit.Assert) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Assert.assertEquals(org.junit.Assert.assertEquals) CoreMatchers.is(org.hamcrest.CoreMatchers.is) PipelineOptions(org.apache.flink.configuration.PipelineOptions) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) CheckpointingMode(org.apache.flink.streaming.api.CheckpointingMode) MapFunction(org.apache.flink.api.common.functions.MapFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) TestLogger(org.apache.flink.util.TestLogger) InputFormat(org.apache.flink.api.common.io.InputFormat) Assert.fail(org.junit.Assert.fail) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Method(java.lang.reflect.Method) OutputFormat(org.apache.flink.api.common.io.OutputFormat) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) PartitionTransformation(org.apache.flink.streaming.api.transformations.PartitionTransformation) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) Collectors(java.util.stream.Collectors) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) SimpleOperatorFactory(org.apache.flink.streaming.api.operators.SimpleOperatorFactory) List(java.util.List) MultipleInputStreamTask(org.apache.flink.streaming.runtime.tasks.MultipleInputStreamTask) SerializedValue(org.apache.flink.util.SerializedValue) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) IterativeStream(org.apache.flink.streaming.api.datastream.IterativeStream) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) InputOutputFormatVertex(org.apache.flink.runtime.jobgraph.InputOutputFormatVertex) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) JobType(org.apache.flink.runtime.jobgraph.JobType) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) MockSource(org.apache.flink.api.connector.source.mocks.MockSource) SourceOperatorStreamTask(org.apache.flink.streaming.runtime.tasks.SourceOperatorStreamTask) StreamMap(org.apache.flink.streaming.api.operators.StreamMap) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) CoordinatedOperatorFactory(org.apache.flink.streaming.api.operators.CoordinatedOperatorFactory) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) InputFormatSourceFunction(org.apache.flink.streaming.api.functions.source.InputFormatSourceFunction) DataStream(org.apache.flink.streaming.api.datastream.DataStream) RescalePartitioner(org.apache.flink.streaming.runtime.partitioner.RescalePartitioner) FeatureMatcher(org.hamcrest.FeatureMatcher) StreamExchangeMode(org.apache.flink.streaming.api.transformations.StreamExchangeMode) TestAnyModeReadingStreamOperator(org.apache.flink.streaming.util.TestAnyModeReadingStreamOperator) OperatorCoordinator(org.apache.flink.runtime.operators.coordination.OperatorCoordinator) InputOutputFormatContainer(org.apache.flink.runtime.jobgraph.InputOutputFormatContainer) Comparator(java.util.Comparator) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Collections(java.util.Collections) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Method(java.lang.reflect.Method)

Example 4 with ManagedMemoryUseCase

use of org.apache.flink.core.memory.ManagedMemoryUseCase in project flink by apache.

the class StreamGraphGeneratorBatchExecutionTest method testManagedMemoryWeights.

@Test
public void testManagedMemoryWeights() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    SingleOutputStreamOperator<Integer> process = env.fromElements(1, 2).keyBy(Integer::intValue).process(DUMMY_PROCESS_FUNCTION);
    DataStreamSink<Integer> sink = process.addSink(new DiscardingSink<>());
    StreamGraph graph = getStreamGraphInBatchMode(sink);
    StreamNode processNode = graph.getStreamNode(process.getId());
    final Map<ManagedMemoryUseCase, Integer> expectedOperatorWeights = new HashMap<>();
    expectedOperatorWeights.put(ManagedMemoryUseCase.OPERATOR, ExecutionOptions.SORTED_INPUTS_MEMORY.defaultValue().getMebiBytes());
    assertThat(processNode.getManagedMemoryOperatorScopeUseCaseWeights(), equalTo(expectedOperatorWeights));
    assertThat(processNode.getManagedMemorySlotScopeUseCases(), equalTo(Collections.singleton(ManagedMemoryUseCase.STATE_BACKEND)));
}
Also used : HashMap(java.util.HashMap) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) Test(org.junit.Test)

Example 5 with ManagedMemoryUseCase

use of org.apache.flink.core.memory.ManagedMemoryUseCase in project flink by apache.

the class StreamGraphGeneratorBatchExecutionTest method testCustomManagedMemoryWeights.

@Test
public void testCustomManagedMemoryWeights() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    SingleOutputStreamOperator<Integer> process = env.fromElements(1, 2).keyBy(Integer::intValue).process(DUMMY_PROCESS_FUNCTION);
    DataStreamSink<Integer> sink = process.addSink(new DiscardingSink<>());
    final Configuration configuration = new Configuration();
    configuration.set(ExecutionOptions.SORTED_INPUTS_MEMORY, MemorySize.ofMebiBytes(42));
    StreamGraph graph = getStreamGraphInBatchMode(sink, configuration);
    StreamNode processNode = graph.getStreamNode(process.getId());
    final Map<ManagedMemoryUseCase, Integer> expectedOperatorWeights = new HashMap<>();
    expectedOperatorWeights.put(ManagedMemoryUseCase.OPERATOR, 42);
    assertThat(processNode.getManagedMemoryOperatorScopeUseCaseWeights(), equalTo(expectedOperatorWeights));
    assertThat(processNode.getManagedMemorySlotScopeUseCases(), equalTo(Collections.singleton(ManagedMemoryUseCase.STATE_BACKEND)));
}
Also used : Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) Test(org.junit.Test)

Aggregations

ManagedMemoryUseCase (org.apache.flink.core.memory.ManagedMemoryUseCase)9 HashMap (java.util.HashMap)7 Test (org.junit.Test)5 Map (java.util.Map)4 Configuration (org.apache.flink.configuration.Configuration)4 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2 Comparator (java.util.Comparator)2 IdentityHashMap (java.util.IdentityHashMap)2 List (java.util.List)2 Set (java.util.Set)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 Collectors (java.util.stream.Collectors)2 ResourceSpec (org.apache.flink.api.common.operators.ResourceSpec)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)2 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)2 IOException (java.io.IOException)1 Method (java.lang.reflect.Method)1