Search in sources :

Example 11 with SourceTransformation

use of org.apache.flink.streaming.api.transformations.SourceTransformation in project flink by apache.

the class PythonOperatorChainingOptimizerTest method testChainedTransformationPropertiesCorrectlySet.

@Test
public void testChainedTransformationPropertiesCorrectlySet() {
    PythonKeyedProcessOperator<?> keyedProcessOperator = createKeyedProcessOperator("f1", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    PythonProcessOperator<?, ?> processOperator = createProcessOperator("f2", Types.STRING(), Types.STRING());
    Transformation<?> sourceTransformation = mock(SourceTransformation.class);
    OneInputTransformation<?, ?> keyedProcessTransformation = new OneInputTransformation(sourceTransformation, "keyedProcess", keyedProcessOperator, keyedProcessOperator.getProducedType(), 2);
    keyedProcessTransformation.setUid("uid");
    keyedProcessTransformation.setSlotSharingGroup("group");
    keyedProcessTransformation.setCoLocationGroupKey("col");
    keyedProcessTransformation.setMaxParallelism(64);
    keyedProcessTransformation.declareManagedMemoryUseCaseAtOperatorScope(ManagedMemoryUseCase.OPERATOR, 5);
    keyedProcessTransformation.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
    keyedProcessTransformation.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.STATE_BACKEND);
    keyedProcessTransformation.setBufferTimeout(1000L);
    keyedProcessTransformation.setChainingStrategy(ChainingStrategy.HEAD);
    Transformation<?> processTransformation = new OneInputTransformation(keyedProcessTransformation, "process", processOperator, processOperator.getProducedType(), 2);
    processTransformation.setSlotSharingGroup("group");
    processTransformation.declareManagedMemoryUseCaseAtOperatorScope(ManagedMemoryUseCase.OPERATOR, 10);
    processTransformation.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
    processTransformation.setMaxParallelism(64);
    processTransformation.setBufferTimeout(500L);
    List<Transformation<?>> transformations = new ArrayList<>();
    transformations.add(sourceTransformation);
    transformations.add(keyedProcessTransformation);
    transformations.add(processTransformation);
    List<Transformation<?>> optimized = PythonOperatorChainingOptimizer.optimize(transformations);
    assertEquals(2, optimized.size());
    OneInputTransformation<?, ?> chainedTransformation = (OneInputTransformation<?, ?>) optimized.get(1);
    assertEquals(2, chainedTransformation.getParallelism());
    assertEquals(sourceTransformation.getOutputType(), chainedTransformation.getInputType());
    assertEquals(processOperator.getProducedType(), chainedTransformation.getOutputType());
    assertEquals(keyedProcessTransformation.getUid(), chainedTransformation.getUid());
    assertEquals("group", chainedTransformation.getSlotSharingGroup().get().getName());
    assertEquals("col", chainedTransformation.getCoLocationGroupKey());
    assertEquals(64, chainedTransformation.getMaxParallelism());
    assertEquals(500L, chainedTransformation.getBufferTimeout());
    assertEquals(15, (int) chainedTransformation.getManagedMemoryOperatorScopeUseCaseWeights().getOrDefault(ManagedMemoryUseCase.OPERATOR, 0));
    assertEquals(ChainingStrategy.HEAD, chainedTransformation.getOperatorFactory().getChainingStrategy());
    assertTrue(chainedTransformation.getManagedMemorySlotScopeUseCases().contains(ManagedMemoryUseCase.PYTHON));
    assertTrue(chainedTransformation.getManagedMemorySlotScopeUseCases().contains(ManagedMemoryUseCase.STATE_BACKEND));
    OneInputStreamOperator<?, ?> chainedOperator = chainedTransformation.getOperator();
    assertTrue(chainedOperator instanceof PythonKeyedProcessOperator);
    validateChainedPythonFunctions(((PythonKeyedProcessOperator<?>) chainedOperator).getPythonFunctionInfo(), "f2", "f1");
}
Also used : SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) PythonKeyedProcessOperator(org.apache.flink.streaming.api.operators.python.PythonKeyedProcessOperator) ArrayList(java.util.ArrayList) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Test(org.junit.Test)

Example 12 with SourceTransformation

use of org.apache.flink.streaming.api.transformations.SourceTransformation in project flink by apache.

the class PythonOperatorChainingOptimizerTest method testMultipleChainedOperators.

@Test
public void testMultipleChainedOperators() {
    PythonKeyedProcessOperator<?> keyedProcessOperator1 = createKeyedProcessOperator("f1", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    PythonProcessOperator<?, ?> processOperator1 = createProcessOperator("f2", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    PythonProcessOperator<?, ?> processOperator2 = createProcessOperator("f3", new RowTypeInfo(Types.INT(), Types.INT()), Types.LONG());
    PythonKeyedProcessOperator<?> keyedProcessOperator2 = createKeyedProcessOperator("f4", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    PythonProcessOperator<?, ?> processOperator3 = createProcessOperator("f5", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    Transformation<?> sourceTransformation = mock(SourceTransformation.class);
    OneInputTransformation<?, ?> keyedProcessTransformation1 = new OneInputTransformation(sourceTransformation, "keyedProcess", keyedProcessOperator1, keyedProcessOperator1.getProducedType(), 2);
    Transformation<?> processTransformation1 = new OneInputTransformation(keyedProcessTransformation1, "process", processOperator1, processOperator1.getProducedType(), 2);
    Transformation<?> processTransformation2 = new OneInputTransformation(processTransformation1, "process", processOperator2, processOperator2.getProducedType(), 2);
    OneInputTransformation<?, ?> keyedProcessTransformation2 = new OneInputTransformation(processTransformation2, "keyedProcess", keyedProcessOperator2, keyedProcessOperator2.getProducedType(), 2);
    Transformation<?> processTransformation3 = new OneInputTransformation(keyedProcessTransformation2, "process", processOperator3, processOperator3.getProducedType(), 2);
    List<Transformation<?>> transformations = new ArrayList<>();
    transformations.add(sourceTransformation);
    transformations.add(keyedProcessTransformation1);
    transformations.add(processTransformation1);
    transformations.add(processTransformation2);
    transformations.add(keyedProcessTransformation2);
    transformations.add(processTransformation3);
    List<Transformation<?>> optimized = PythonOperatorChainingOptimizer.optimize(transformations);
    assertEquals(3, optimized.size());
    OneInputTransformation<?, ?> chainedTransformation1 = (OneInputTransformation<?, ?>) optimized.get(1);
    assertEquals(sourceTransformation.getOutputType(), chainedTransformation1.getInputType());
    assertEquals(processOperator2.getProducedType(), chainedTransformation1.getOutputType());
    OneInputTransformation<?, ?> chainedTransformation2 = (OneInputTransformation<?, ?>) optimized.get(2);
    assertEquals(processOperator2.getProducedType(), chainedTransformation2.getInputType());
    assertEquals(processOperator3.getProducedType(), chainedTransformation2.getOutputType());
    OneInputStreamOperator<?, ?> chainedOperator1 = chainedTransformation1.getOperator();
    assertTrue(chainedOperator1 instanceof PythonKeyedProcessOperator);
    validateChainedPythonFunctions(((PythonKeyedProcessOperator<?>) chainedOperator1).getPythonFunctionInfo(), "f3", "f2", "f1");
    OneInputStreamOperator<?, ?> chainedOperator2 = chainedTransformation2.getOperator();
    assertTrue(chainedOperator2 instanceof PythonKeyedProcessOperator);
    validateChainedPythonFunctions(((PythonKeyedProcessOperator<?>) chainedOperator2).getPythonFunctionInfo(), "f5", "f4");
}
Also used : SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) PythonKeyedProcessOperator(org.apache.flink.streaming.api.operators.python.PythonKeyedProcessOperator) ArrayList(java.util.ArrayList) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Test(org.junit.Test)

Example 13 with SourceTransformation

use of org.apache.flink.streaming.api.transformations.SourceTransformation in project flink by apache.

the class PythonOperatorChainingOptimizerTest method testChainingUnorderedTransformations.

@Test
public void testChainingUnorderedTransformations() {
    PythonKeyedProcessOperator<?> keyedProcessOperator = createKeyedProcessOperator("f1", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    PythonProcessOperator<?, ?> processOperator1 = createProcessOperator("f2", Types.STRING(), Types.LONG());
    PythonProcessOperator<?, ?> processOperator2 = createProcessOperator("f3", Types.LONG(), Types.INT());
    Transformation<?> sourceTransformation = mock(SourceTransformation.class);
    OneInputTransformation<?, ?> keyedProcessTransformation = new OneInputTransformation(sourceTransformation, "keyedProcess", keyedProcessOperator, keyedProcessOperator.getProducedType(), 2);
    Transformation<?> processTransformation1 = new OneInputTransformation(keyedProcessTransformation, "process", processOperator1, processOperator1.getProducedType(), 2);
    Transformation<?> processTransformation2 = new OneInputTransformation(processTransformation1, "process", processOperator2, processOperator2.getProducedType(), 2);
    List<Transformation<?>> transformations = new ArrayList<>();
    transformations.add(sourceTransformation);
    transformations.add(processTransformation2);
    transformations.add(processTransformation1);
    transformations.add(keyedProcessTransformation);
    List<Transformation<?>> optimized = PythonOperatorChainingOptimizer.optimize(transformations);
    assertEquals(2, optimized.size());
    OneInputTransformation<?, ?> chainedTransformation = (OneInputTransformation<?, ?>) optimized.get(1);
    assertEquals(sourceTransformation.getOutputType(), chainedTransformation.getInputType());
    assertEquals(processOperator2.getProducedType(), chainedTransformation.getOutputType());
    OneInputStreamOperator<?, ?> chainedOperator = chainedTransformation.getOperator();
    assertTrue(chainedOperator instanceof PythonKeyedProcessOperator);
    validateChainedPythonFunctions(((PythonKeyedProcessOperator<?>) chainedOperator).getPythonFunctionInfo(), "f3", "f2", "f1");
}
Also used : SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) PythonKeyedProcessOperator(org.apache.flink.streaming.api.operators.python.PythonKeyedProcessOperator) ArrayList(java.util.ArrayList) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Test(org.junit.Test)

Aggregations

SourceTransformation (org.apache.flink.streaming.api.transformations.SourceTransformation)13 Test (org.junit.Test)12 ArrayList (java.util.ArrayList)10 Transformation (org.apache.flink.api.dag.Transformation)9 RowTypeInfo (org.apache.flink.api.java.typeutils.RowTypeInfo)7 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)7 TwoInputTransformation (org.apache.flink.streaming.api.transformations.TwoInputTransformation)7 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)5 PythonKeyedProcessOperator (org.apache.flink.streaming.api.operators.python.PythonKeyedProcessOperator)5 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 Optional (java.util.Optional)2 Properties (java.util.Properties)2 Consumer (java.util.function.Consumer)2 DeserializationSchema (org.apache.flink.api.common.serialization.DeserializationSchema)2 SerializationSchema (org.apache.flink.api.common.serialization.SerializationSchema)2 Sink (org.apache.flink.api.connector.sink2.Sink)2 DeliveryGuarantee (org.apache.flink.connector.base.DeliveryGuarantee)2