Search in sources :

Example 1 with ReduceFunction

use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.

the class WordCountSubclassInterfacePOJOITCase method testProgram.

@Override
protected void testProgram() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<String> text = env.readTextFile(textPath);
    DataSet<WCBase> counts = text.flatMap(new Tokenizer()).groupBy("word").reduce(new ReduceFunction<WCBase>() {

        private static final long serialVersionUID = 1L;

        public WCBase reduce(WCBase value1, WCBase value2) {
            WC wc1 = (WC) value1;
            WC wc2 = (WC) value2;
            int c = wc1.secretCount.getCount() + wc2.secretCount.getCount();
            wc1.secretCount.setCount(c);
            return wc1;
        }
    }).map(new MapFunction<WCBase, WCBase>() {

        @Override
        public WCBase map(WCBase value) throws Exception {
            WC wc = (WC) value;
            wc.count = wc.secretCount.getCount();
            return wc;
        }
    });
    counts.writeAsText(resultPath);
    env.execute("WordCount with custom data types example");
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction)

Example 2 with ReduceFunction

use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.

the class JobManagerHAProcessFailureBatchRecoveryITCase method testJobManagerFailure.

/**
	 * Test program with JobManager failure.
	 *
	 * @param zkQuorum ZooKeeper quorum to connect to
	 * @param coordinateDir Coordination directory
	 * @throws Exception
	 */
public void testJobManagerFailure(String zkQuorum, final File coordinateDir) throws Exception {
    Configuration config = new Configuration();
    config.setString(HighAvailabilityOptions.HA_MODE, "ZOOKEEPER");
    config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zkQuorum);
    ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("leader", 1, config);
    env.setParallelism(PARALLELISM);
    env.setNumberOfExecutionRetries(1);
    env.getConfig().setExecutionMode(executionMode);
    env.getConfig().disableSysoutLogging();
    final long NUM_ELEMENTS = 100000L;
    final DataSet<Long> result = env.generateSequence(1, NUM_ELEMENTS).rebalance().map(new RichMapFunction<Long, Long>() {

        private final File proceedFile = new File(coordinateDir, PROCEED_MARKER_FILE);

        private boolean markerCreated = false;

        private boolean checkForProceedFile = true;

        @Override
        public Long map(Long value) throws Exception {
            if (!markerCreated) {
                int taskIndex = getRuntimeContext().getIndexOfThisSubtask();
                AbstractTaskManagerProcessFailureRecoveryTest.touchFile(new File(coordinateDir, READY_MARKER_FILE_PREFIX + taskIndex));
                markerCreated = true;
            }
            // check if the proceed file exists
            if (checkForProceedFile) {
                if (proceedFile.exists()) {
                    checkForProceedFile = false;
                } else {
                    // otherwise wait so that we make slow progress
                    Thread.sleep(100);
                }
            }
            return value;
        }
    }).reduce(new ReduceFunction<Long>() {

        @Override
        public Long reduce(Long value1, Long value2) {
            return value1 + value2;
        }
    }).flatMap(new RichFlatMapFunction<Long, Long>() {

        @Override
        public void flatMap(Long value, Collector<Long> out) throws Exception {
            assertEquals(NUM_ELEMENTS * (NUM_ELEMENTS + 1L) / 2L, (long) value);
            int taskIndex = getRuntimeContext().getIndexOfThisSubtask();
            AbstractTaskManagerProcessFailureRecoveryTest.touchFile(new File(coordinateDir, FINISH_MARKER_FILE_PREFIX + taskIndex));
        }
    });
    result.output(new DiscardingOutputFormat<Long>());
    env.execute();
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) IOException(java.io.IOException) File(java.io.File)

Example 3 with ReduceFunction

use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.

the class WindowedStream method createFastTimeOperatorIfValid.

private <R> SingleOutputStreamOperator<R> createFastTimeOperatorIfValid(ReduceFunction<?> function, TypeInformation<R> resultType, String functionName) {
    if (windowAssigner.getClass() == SlidingAlignedProcessingTimeWindows.class && trigger == null && evictor == null) {
        SlidingAlignedProcessingTimeWindows timeWindows = (SlidingAlignedProcessingTimeWindows) windowAssigner;
        final long windowLength = timeWindows.getSize();
        final long windowSlide = timeWindows.getSlide();
        String opName = "Fast " + timeWindows + " of " + functionName;
        @SuppressWarnings("unchecked") ReduceFunction<T> reducer = (ReduceFunction<T>) function;
        @SuppressWarnings("unchecked") OneInputStreamOperator<T, R> op = (OneInputStreamOperator<T, R>) new AggregatingProcessingTimeWindowOperator<>(reducer, input.getKeySelector(), input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), input.getType().createSerializer(getExecutionEnvironment().getConfig()), windowLength, windowSlide);
        return input.transform(opName, resultType, op);
    } else if (windowAssigner.getClass() == TumblingAlignedProcessingTimeWindows.class && trigger == null && evictor == null) {
        TumblingAlignedProcessingTimeWindows timeWindows = (TumblingAlignedProcessingTimeWindows) windowAssigner;
        final long windowLength = timeWindows.getSize();
        final long windowSlide = timeWindows.getSize();
        String opName = "Fast " + timeWindows + " of " + functionName;
        @SuppressWarnings("unchecked") ReduceFunction<T> reducer = (ReduceFunction<T>) function;
        @SuppressWarnings("unchecked") OneInputStreamOperator<T, R> op = (OneInputStreamOperator<T, R>) new AggregatingProcessingTimeWindowOperator<>(reducer, input.getKeySelector(), input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), input.getType().createSerializer(getExecutionEnvironment().getConfig()), windowLength, windowSlide);
        return input.transform(opName, resultType, op);
    }
    return null;
}
Also used : ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) TumblingAlignedProcessingTimeWindows(org.apache.flink.streaming.api.windowing.assigners.TumblingAlignedProcessingTimeWindows) AggregatingProcessingTimeWindowOperator(org.apache.flink.streaming.runtime.operators.windowing.AggregatingProcessingTimeWindowOperator) SlidingAlignedProcessingTimeWindows(org.apache.flink.streaming.api.windowing.assigners.SlidingAlignedProcessingTimeWindows)

Example 4 with ReduceFunction

use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.

the class ReducingStateDescriptorTest method testReducingStateDescriptor.

@Test
public void testReducingStateDescriptor() throws Exception {
    ReduceFunction<String> reducer = (a, b) -> a;
    TypeSerializer<String> serializer = new KryoSerializer<>(String.class, new ExecutionConfig());
    ReducingStateDescriptor<String> descr = new ReducingStateDescriptor<>("testName", reducer, serializer);
    assertEquals("testName", descr.getName());
    assertNotNull(descr.getSerializer());
    assertEquals(serializer, descr.getSerializer());
    assertEquals(reducer, descr.getReduceFunction());
    ReducingStateDescriptor<String> copy = CommonTestUtils.createCopySerializable(descr);
    assertEquals("testName", copy.getName());
    assertNotNull(copy.getSerializer());
    assertEquals(serializer, copy.getSerializer());
}
Also used : TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) Assert.assertNotNull(org.junit.Assert.assertNotNull) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) TestLogger(org.apache.flink.util.TestLogger) Test(org.junit.Test) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) CommonTestUtils(org.apache.flink.core.testutils.CommonTestUtils) Assert.assertEquals(org.junit.Assert.assertEquals) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) Test(org.junit.Test)

Example 5 with ReduceFunction

use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.

the class ReduceOperatorTest method testReduceCollectionWithRuntimeContext.

@Test
public void testReduceCollectionWithRuntimeContext() {
    try {
        final String taskName = "Test Task";
        final AtomicBoolean opened = new AtomicBoolean();
        final AtomicBoolean closed = new AtomicBoolean();
        final ReduceFunction<Tuple2<String, Integer>> reducer = new RichReduceFunction<Tuple2<String, Integer>>() {

            @Override
            public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
                return new Tuple2<>(value1.f0, value1.f1 + value2.f1);
            }

            @Override
            public void open(Configuration parameters) throws Exception {
                opened.set(true);
                RuntimeContext ctx = getRuntimeContext();
                assertEquals(0, ctx.getIndexOfThisSubtask());
                assertEquals(1, ctx.getNumberOfParallelSubtasks());
                assertEquals(taskName, ctx.getTaskName());
            }

            @Override
            public void close() throws Exception {
                closed.set(true);
            }
        };
        ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>> op = new ReduceOperatorBase<>(reducer, new UnaryOperatorInformation<>(STRING_INT_TUPLE, STRING_INT_TUPLE), new int[] { 0 }, "TestReducer");
        List<Tuple2<String, Integer>> input = new ArrayList<>(asList(new Tuple2<>("foo", 1), new Tuple2<>("foo", 3), new Tuple2<>("bar", 2), new Tuple2<>("bar", 4)));
        final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
        ExecutionConfig executionConfig = new ExecutionConfig();
        executionConfig.disableObjectReuse();
        List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<>(), new HashMap<>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
        executionConfig.enableObjectReuse();
        List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<>(), new HashMap<>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
        Set<Tuple2<String, Integer>> resultSetMutableSafe = new HashSet<>(resultMutableSafe);
        Set<Tuple2<String, Integer>> resultSetRegular = new HashSet<>(resultRegular);
        Set<Tuple2<String, Integer>> expectedResult = new HashSet<>(asList(new Tuple2<>("foo", 4), new Tuple2<>("bar", 6)));
        assertEquals(expectedResult, resultSetMutableSafe);
        assertEquals(expectedResult, resultSetRegular);
        assertTrue(opened.get());
        assertTrue(closed.get());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TaskInfo(org.apache.flink.api.common.TaskInfo) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RuntimeUDFContext(org.apache.flink.api.common.functions.util.RuntimeUDFContext) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

ReduceFunction (org.apache.flink.api.common.functions.ReduceFunction)26 Test (org.junit.Test)16 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)8 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)8 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)8 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)7 Configuration (org.apache.flink.configuration.Configuration)6 File (java.io.File)5 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)5 KryoSerializer (org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer)5 ArrayList (java.util.ArrayList)4 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)4 RichReduceFunction (org.apache.flink.api.common.functions.RichReduceFunction)4 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)4 KeySelector (org.apache.flink.api.java.functions.KeySelector)4 DataStream (org.apache.flink.streaming.api.datastream.DataStream)4 HashMap (java.util.HashMap)3 List (java.util.List)3 TaskInfo (org.apache.flink.api.common.TaskInfo)3 MapStateDescriptor (org.apache.flink.api.common.state.MapStateDescriptor)3