Search in sources :

Example 6 with ReduceFunction

use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.

the class ReduceDriverTest method testReduceDriverImmutableEmpty.

@Test
public void testReduceDriverImmutableEmpty() {
    try {
        TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context = new TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>>();
        List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
        TupleTypeInfo<Tuple2<String, Integer>> typeInfo = (TupleTypeInfo<Tuple2<String, Integer>>) TypeExtractor.getForObject(data.get(0));
        MutableObjectIterator<Tuple2<String, Integer>> input = EmptyMutableObjectIterator.get();
        context.setDriverStrategy(DriverStrategy.SORTED_REDUCE);
        TypeComparator<Tuple2<String, Integer>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
        GatheringCollector<Tuple2<String, Integer>> result = new GatheringCollector<Tuple2<String, Integer>>(typeInfo.createSerializer(new ExecutionConfig()));
        context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
        context.setComparator1(comparator);
        context.setCollector(result);
        ReduceDriver<Tuple2<String, Integer>> driver = new ReduceDriver<Tuple2<String, Integer>>();
        driver.setup(context);
        driver.prepare();
        driver.run();
        Assert.assertEquals(0, result.getList().size());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ReduceDriver(org.apache.flink.runtime.operators.ReduceDriver) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 7 with ReduceFunction

use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.

the class WordCountSubclassInterfacePOJOITCase method testProgram.

@Override
protected void testProgram() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<String> text = env.readTextFile(textPath);
    DataSet<WCBase> counts = text.flatMap(new Tokenizer()).groupBy("word").reduce(new ReduceFunction<WCBase>() {

        private static final long serialVersionUID = 1L;

        public WCBase reduce(WCBase value1, WCBase value2) {
            WC wc1 = (WC) value1;
            WC wc2 = (WC) value2;
            int c = wc1.secretCount.getCount() + wc2.secretCount.getCount();
            wc1.secretCount.setCount(c);
            return wc1;
        }
    }).map(new MapFunction<WCBase, WCBase>() {

        @Override
        public WCBase map(WCBase value) throws Exception {
            WC wc = (WC) value;
            wc.count = wc.secretCount.getCount();
            return wc;
        }
    });
    counts.writeAsText(resultPath);
    env.execute("WordCount with custom data types example");
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction)

Example 8 with ReduceFunction

use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.

the class ReduceOperatorTest method testReduceCollectionWithRuntimeContext.

@Test
public void testReduceCollectionWithRuntimeContext() {
    try {
        final String taskName = "Test Task";
        final AtomicBoolean opened = new AtomicBoolean();
        final AtomicBoolean closed = new AtomicBoolean();
        final ReduceFunction<Tuple2<String, Integer>> reducer = new RichReduceFunction<Tuple2<String, Integer>>() {

            @Override
            public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
                return new Tuple2<String, Integer>(value1.f0, value1.f1 + value2.f1);
            }

            @Override
            public void open(Configuration parameters) throws Exception {
                opened.set(true);
                RuntimeContext ctx = getRuntimeContext();
                assertEquals(0, ctx.getIndexOfThisSubtask());
                assertEquals(1, ctx.getNumberOfParallelSubtasks());
                assertEquals(taskName, ctx.getTaskName());
            }

            @Override
            public void close() throws Exception {
                closed.set(true);
            }
        };
        ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>> op = new ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>>(reducer, new UnaryOperatorInformation<Tuple2<String, Integer>, Tuple2<String, Integer>>(TypeInfoParser.<Tuple2<String, Integer>>parse("Tuple2<String, Integer>"), TypeInfoParser.<Tuple2<String, Integer>>parse("Tuple2<String, " + "Integer>")), new int[] { 0 }, "TestReducer");
        List<Tuple2<String, Integer>> input = new ArrayList<Tuple2<String, Integer>>(asList(new Tuple2<String, Integer>("foo", 1), new Tuple2<String, Integer>("foo", 3), new Tuple2<String, Integer>("bar", 2), new Tuple2<String, Integer>("bar", 4)));
        final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
        ExecutionConfig executionConfig = new ExecutionConfig();
        executionConfig.disableObjectReuse();
        List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()), executionConfig);
        executionConfig.enableObjectReuse();
        List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()), executionConfig);
        Set<Tuple2<String, Integer>> resultSetMutableSafe = new HashSet<Tuple2<String, Integer>>(resultMutableSafe);
        Set<Tuple2<String, Integer>> resultSetRegular = new HashSet<Tuple2<String, Integer>>(resultRegular);
        Set<Tuple2<String, Integer>> expectedResult = new HashSet<Tuple2<String, Integer>>(asList(new Tuple2<String, Integer>("foo", 4), new Tuple2<String, Integer>("bar", 6)));
        assertEquals(expectedResult, resultSetMutableSafe);
        assertEquals(expectedResult, resultSetRegular);
        assertTrue(opened.get());
        assertTrue(closed.get());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Path(org.apache.flink.core.fs.Path) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TaskInfo(org.apache.flink.api.common.TaskInfo) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RuntimeUDFContext(org.apache.flink.api.common.functions.util.RuntimeUDFContext) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 9 with ReduceFunction

use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.

the class ReduceOperatorTest method testReduceCollection.

@Test
public void testReduceCollection() {
    try {
        final ReduceFunction<Tuple2<String, Integer>> reducer = new ReduceFunction<Tuple2<String, Integer>>() {

            @Override
            public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
                return new Tuple2<String, Integer>(value1.f0, value1.f1 + value2.f1);
            }
        };
        ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>> op = new ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>>(reducer, new UnaryOperatorInformation<Tuple2<String, Integer>, Tuple2<String, Integer>>(TypeInfoParser.<Tuple2<String, Integer>>parse("Tuple2<String, Integer>"), TypeInfoParser.<Tuple2<String, Integer>>parse("Tuple2<String, " + "Integer>")), new int[] { 0 }, "TestReducer");
        List<Tuple2<String, Integer>> input = new ArrayList<Tuple2<String, Integer>>(asList(new Tuple2<String, Integer>("foo", 1), new Tuple2<String, Integer>("foo", 3), new Tuple2<String, Integer>("bar", 2), new Tuple2<String, Integer>("bar", 4)));
        ExecutionConfig executionConfig = new ExecutionConfig();
        executionConfig.disableObjectReuse();
        List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input, null, executionConfig);
        executionConfig.enableObjectReuse();
        List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input, null, executionConfig);
        Set<Tuple2<String, Integer>> resultSetMutableSafe = new HashSet<Tuple2<String, Integer>>(resultMutableSafe);
        Set<Tuple2<String, Integer>> resultSetRegular = new HashSet<Tuple2<String, Integer>>(resultRegular);
        Set<Tuple2<String, Integer>> expectedResult = new HashSet<Tuple2<String, Integer>>(asList(new Tuple2<String, Integer>("foo", 4), new Tuple2<String, Integer>("bar", 6)));
        assertEquals(expectedResult, resultSetMutableSafe);
        assertEquals(expectedResult, resultSetRegular);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Tuple2(org.apache.flink.api.java.tuple.Tuple2) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 10 with ReduceFunction

use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.

the class DistinctOperator method translateToDataFlow.

@Override
protected org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> translateToDataFlow(Operator<T> input) {
    final ReduceFunction<T> function = new DistinctFunction<>();
    String name = getName() != null ? getName() : "Distinct at " + distinctLocationName;
    if (keys instanceof Keys.ExpressionKeys) {
        int[] logicalKeyPositions = keys.computeLogicalKeyPositions();
        UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        ReduceOperatorBase<T, ReduceFunction<T>> po = new ReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name);
        po.setCombineHint(hint);
        po.setInput(input);
        po.setParallelism(getParallelism());
        // make sure that distinct preserves the partitioning for the fields on which they operate
        if (getType().isTupleType()) {
            SingleInputSemanticProperties sProps = new SingleInputSemanticProperties();
            for (int field : keys.computeLogicalKeyPositions()) {
                sProps.addForwardedField(field, field);
            }
            po.setSemanticProperties(sProps);
        }
        return po;
    } else if (keys instanceof SelectorFunctionKeys) {
        @SuppressWarnings("unchecked") SelectorFunctionKeys<T, ?> selectorKeys = (SelectorFunctionKeys<T, ?>) keys;
        org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> po = translateSelectorFunctionDistinct(selectorKeys, function, getResultType(), name, input, parallelism, hint);
        return po;
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}
Also used : SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) CombineHint(org.apache.flink.api.common.operators.base.ReduceOperatorBase.CombineHint) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) SingleInputSemanticProperties(org.apache.flink.api.common.operators.SingleInputSemanticProperties)

Aggregations

ReduceFunction (org.apache.flink.api.common.functions.ReduceFunction)18 Test (org.junit.Test)11 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)7 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)7 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)6 ArrayList (java.util.ArrayList)5 RichReduceFunction (org.apache.flink.api.common.functions.RichReduceFunction)4 Configuration (org.apache.flink.configuration.Configuration)3 HashSet (java.util.HashSet)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 TaskInfo (org.apache.flink.api.common.TaskInfo)2 FoldFunction (org.apache.flink.api.common.functions.FoldFunction)2 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)2 FoldingStateDescriptor (org.apache.flink.api.common.state.FoldingStateDescriptor)2 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)2 MapStateDescriptor (org.apache.flink.api.common.state.MapStateDescriptor)2 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)2 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)2 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)2 ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)2