Search in sources :

Example 6 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class ComputeFunction method sendMessageToAllNeighbors.

/**
	 * Sends the given message to all vertices that adjacent to the changed vertex.
	 * This method is mutually exclusive to the method {@link #getEdges()} and may be called only once.
	 * 
	 * @param m The message to send.
	 */
public final void sendMessageToAllNeighbors(Message m) {
    verifyEdgeUsage();
    outMsg.f1 = m;
    while (edges.hasNext()) {
        Tuple next = edges.next();
        outMsg.f0 = next.getField(1);
        out.collect(Either.Right(outMsg));
    }
}
Also used : Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 7 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class ScatterFunction method sendMessageToAllNeighbors.

/**
	 * Sends the given message to all vertices that are targets of an edge of the changed vertex.
	 * This method is mutually exclusive to the method {@link #getEdges()} and may be called only once.
	 * <p>
	 * If the {@link EdgeDirection} is OUT (default), the message will be sent to out-neighbors.
	 * If the {@link EdgeDirection} is IN, the message will be sent to in-neighbors.
	 * If the {@link EdgeDirection} is ALL, the message will be sent to all neighbors.
	 * 
	 * @param m The message to send.
	 */
public void sendMessageToAllNeighbors(Message m) {
    if (edgesUsed) {
        throw new IllegalStateException("Can use either 'getEdges()' or 'sendMessageToAllNeighbors()'" + "exactly once.");
    }
    edgesUsed = true;
    outValue.f1 = m;
    while (edges.hasNext()) {
        Tuple next = (Tuple) edges.next();
        /*
			 * When EdgeDirection is OUT, the edges iterator only has the out-edges 
			 * of the vertex, i.e. the ones where this vertex is src. 
			 * next.getField(1) gives the neighbor of the vertex running this ScatterFunction.
			 */
        if (getDirection().equals(EdgeDirection.OUT)) {
            outValue.f0 = next.getField(1);
        } else /*
			 * When EdgeDirection is IN, the edges iterator only has the in-edges 
			 * of the vertex, i.e. the ones where this vertex is trg. 
			 * next.getField(10) gives the neighbor of the vertex running this ScatterFunction.
			 */
        if (getDirection().equals(EdgeDirection.IN)) {
            outValue.f0 = next.getField(0);
        }
        // When EdgeDirection is ALL, the edges iterator contains both in- and out- edges
        if (getDirection().equals(EdgeDirection.ALL)) {
            if (next.getField(0).equals(vertexId)) {
                // send msg to the trg
                outValue.f0 = next.getField(1);
            } else {
                // send msg to the src
                outValue.f0 = next.getField(0);
            }
        }
        out.collect(outValue);
    }
}
Also used : Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 8 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class WindowFoldITCase method testFoldProcessWindow.

@Test
public void testFoldProcessWindow() throws Exception {
    testResults = new ArrayList<>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(1);
    DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple2.of("a", 0));
            ctx.collect(Tuple2.of("a", 1));
            ctx.collect(Tuple2.of("a", 2));
            ctx.collect(Tuple2.of("b", 3));
            ctx.collect(Tuple2.of("b", 4));
            ctx.collect(Tuple2.of("b", 5));
            ctx.collect(Tuple2.of("a", 6));
            ctx.collect(Tuple2.of("a", 7));
            ctx.collect(Tuple2.of("a", 8));
        // source is finite, so it will have an implicit MAX watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
    source1.keyBy(0).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).fold(Tuple2.of(0, "R:"), new FoldFunction<Tuple2<String, Integer>, Tuple2<Integer, String>>() {

        @Override
        public Tuple2<Integer, String> fold(Tuple2<Integer, String> accumulator, Tuple2<String, Integer> value) throws Exception {
            accumulator.f1 += value.f0;
            accumulator.f0 += value.f1;
            return accumulator;
        }
    }, new ProcessWindowFunction<Tuple2<Integer, String>, Tuple3<String, Integer, Integer>, Tuple, TimeWindow>() {

        @Override
        public void process(Tuple tuple, Context context, Iterable<Tuple2<Integer, String>> elements, Collector<Tuple3<String, Integer, Integer>> out) throws Exception {
            int i = 0;
            for (Tuple2<Integer, String> in : elements) {
                out.collect(new Tuple3<>(in.f1, in.f0, i++));
            }
        }
    }).addSink(new SinkFunction<Tuple3<String, Integer, Integer>>() {

        @Override
        public void invoke(Tuple3<String, Integer, Integer> value) throws Exception {
            testResults.add(value.toString());
        }
    });
    env.execute("Fold Process Window Test");
    List<String> expectedResult = Arrays.asList("(R:aaa,3,0)", "(R:aaa,21,0)", "(R:bbb,12,0)");
    Collections.sort(expectedResult);
    Collections.sort(testResults);
    Assert.assertEquals(expectedResult, testResults);
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) FoldFunction(org.apache.flink.api.common.functions.FoldFunction) ProcessWindowFunction(org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple(org.apache.flink.api.java.tuple.Tuple) Test(org.junit.Test)

Example 9 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class DataSetUtilsITCase method testSummarize.

@Test
public void testSummarize() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    List<Tuple8<Short, Integer, Long, Float, Double, String, Boolean, DoubleValue>> data = new ArrayList<>();
    data.add(new Tuple8<>((short) 1, 1, 100L, 0.1f, 1.012376, "hello", false, new DoubleValue(50.0)));
    data.add(new Tuple8<>((short) 2, 2, 1000L, 0.2f, 2.003453, "hello", true, new DoubleValue(50.0)));
    data.add(new Tuple8<>((short) 4, 10, 10000L, 0.2f, 75.00005, "null", true, new DoubleValue(50.0)));
    data.add(new Tuple8<>((short) 10, 4, 100L, 0.9f, 79.5, "", true, new DoubleValue(50.0)));
    data.add(new Tuple8<>((short) 5, 5, 1000L, 0.2f, 10.0000001, "a", false, new DoubleValue(50.0)));
    data.add(new Tuple8<>((short) 6, 6, 10L, 0.1f, 0.0000000000023, "", true, new DoubleValue(100.0)));
    data.add(new Tuple8<>((short) 7, 7, 1L, 0.2f, Double.POSITIVE_INFINITY, "abcdefghijklmnop", true, new DoubleValue(100.0)));
    data.add(new Tuple8<>((short) 8, 8, -100L, 0.001f, Double.NaN, "abcdefghi", true, new DoubleValue(100.0)));
    Collections.shuffle(data);
    DataSet<Tuple8<Short, Integer, Long, Float, Double, String, Boolean, DoubleValue>> ds = env.fromCollection(data);
    // call method under test
    Tuple results = DataSetUtils.summarize(ds);
    Assert.assertEquals(8, results.getArity());
    NumericColumnSummary<Short> col0Summary = results.getField(0);
    Assert.assertEquals(8, col0Summary.getNonMissingCount());
    Assert.assertEquals(1, col0Summary.getMin().shortValue());
    Assert.assertEquals(10, col0Summary.getMax().shortValue());
    Assert.assertEquals(5.375, col0Summary.getMean().doubleValue(), 0.0);
    NumericColumnSummary<Integer> col1Summary = results.getField(1);
    Assert.assertEquals(1, col1Summary.getMin().intValue());
    Assert.assertEquals(10, col1Summary.getMax().intValue());
    Assert.assertEquals(5.375, col1Summary.getMean().doubleValue(), 0.0);
    NumericColumnSummary<Long> col2Summary = results.getField(2);
    Assert.assertEquals(-100L, col2Summary.getMin().longValue());
    Assert.assertEquals(10000L, col2Summary.getMax().longValue());
    NumericColumnSummary<Float> col3Summary = results.getField(3);
    Assert.assertEquals(8, col3Summary.getTotalCount());
    Assert.assertEquals(0.001000, col3Summary.getMin().doubleValue(), 0.0000001);
    Assert.assertEquals(0.89999999, col3Summary.getMax().doubleValue(), 0.0000001);
    Assert.assertEquals(0.2376249988883501, col3Summary.getMean().doubleValue(), 0.000000000001);
    Assert.assertEquals(0.0768965488108089, col3Summary.getVariance().doubleValue(), 0.00000001);
    Assert.assertEquals(0.27730226975415995, col3Summary.getStandardDeviation().doubleValue(), 0.000000000001);
    NumericColumnSummary<Double> col4Summary = results.getField(4);
    Assert.assertEquals(6, col4Summary.getNonMissingCount());
    Assert.assertEquals(2, col4Summary.getMissingCount());
    Assert.assertEquals(0.0000000000023, col4Summary.getMin().doubleValue(), 0.0);
    Assert.assertEquals(79.5, col4Summary.getMax().doubleValue(), 0.000000000001);
    StringColumnSummary col5Summary = results.getField(5);
    Assert.assertEquals(8, col5Summary.getTotalCount());
    Assert.assertEquals(0, col5Summary.getNullCount());
    Assert.assertEquals(8, col5Summary.getNonNullCount());
    Assert.assertEquals(2, col5Summary.getEmptyCount());
    Assert.assertEquals(0, col5Summary.getMinLength().intValue());
    Assert.assertEquals(16, col5Summary.getMaxLength().intValue());
    Assert.assertEquals(5.0, col5Summary.getMeanLength().doubleValue(), 0.0001);
    BooleanColumnSummary col6Summary = results.getField(6);
    Assert.assertEquals(8, col6Summary.getTotalCount());
    Assert.assertEquals(2, col6Summary.getFalseCount());
    Assert.assertEquals(6, col6Summary.getTrueCount());
    Assert.assertEquals(0, col6Summary.getNullCount());
    NumericColumnSummary<Double> col7Summary = results.getField(7);
    Assert.assertEquals(100.0, col7Summary.getMax().doubleValue(), 0.00001);
    Assert.assertEquals(50.0, col7Summary.getMin().doubleValue(), 0.00001);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ArrayList(java.util.ArrayList) StringColumnSummary(org.apache.flink.api.java.summarize.StringColumnSummary) BooleanColumnSummary(org.apache.flink.api.java.summarize.BooleanColumnSummary) Tuple8(org.apache.flink.api.java.tuple.Tuple8) DoubleValue(org.apache.flink.types.DoubleValue) Tuple(org.apache.flink.api.java.tuple.Tuple) Test(org.junit.Test)

Example 10 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class AggregationFunctionTest method groupSumIntegerTest.

@Test
public void groupSumIntegerTest() throws Exception {
    // preparing expected outputs
    List<Tuple2<Integer, Integer>> expectedGroupSumList = new ArrayList<>();
    List<Tuple2<Integer, Integer>> expectedGroupMinList = new ArrayList<>();
    List<Tuple2<Integer, Integer>> expectedGroupMaxList = new ArrayList<>();
    int groupedSum0 = 0;
    int groupedSum1 = 0;
    int groupedSum2 = 0;
    for (int i = 0; i < 9; i++) {
        int groupedSum;
        switch(i % 3) {
            case 0:
                groupedSum = groupedSum0 += i;
                break;
            case 1:
                groupedSum = groupedSum1 += i;
                break;
            default:
                groupedSum = groupedSum2 += i;
                break;
        }
        expectedGroupSumList.add(new Tuple2<>(i % 3, groupedSum));
        expectedGroupMinList.add(new Tuple2<>(i % 3, i % 3));
        expectedGroupMaxList.add(new Tuple2<>(i % 3, i));
    }
    // some necessary boiler plate
    TypeInformation<Tuple2<Integer, Integer>> typeInfo = TypeExtractor.getForObject(new Tuple2<>(0, 0));
    ExecutionConfig config = new ExecutionConfig();
    KeySelector<Tuple2<Integer, Integer>, Tuple> keySelector = KeySelectorUtil.getSelectorForKeys(new Keys.ExpressionKeys<>(new int[] { 0 }, typeInfo), typeInfo, config);
    TypeInformation<Tuple> keyType = TypeExtractor.getKeySelectorTypes(keySelector, typeInfo);
    // aggregations tested
    ReduceFunction<Tuple2<Integer, Integer>> sumFunction = new SumAggregator<>(1, typeInfo, config);
    ReduceFunction<Tuple2<Integer, Integer>> minFunction = new ComparableAggregator<>(1, typeInfo, AggregationType.MIN, config);
    ReduceFunction<Tuple2<Integer, Integer>> maxFunction = new ComparableAggregator<>(1, typeInfo, AggregationType.MAX, config);
    List<Tuple2<Integer, Integer>> groupedSumList = MockContext.createAndExecuteForKeyedStream(new StreamGroupedReduce<>(sumFunction, typeInfo.createSerializer(config)), getInputList(), keySelector, keyType);
    List<Tuple2<Integer, Integer>> groupedMinList = MockContext.createAndExecuteForKeyedStream(new StreamGroupedReduce<>(minFunction, typeInfo.createSerializer(config)), getInputList(), keySelector, keyType);
    List<Tuple2<Integer, Integer>> groupedMaxList = MockContext.createAndExecuteForKeyedStream(new StreamGroupedReduce<>(maxFunction, typeInfo.createSerializer(config)), getInputList(), keySelector, keyType);
    assertEquals(expectedGroupSumList, groupedSumList);
    assertEquals(expectedGroupMinList, groupedMinList);
    assertEquals(expectedGroupMaxList, groupedMaxList);
}
Also used : ComparableAggregator(org.apache.flink.streaming.api.functions.aggregation.ComparableAggregator) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Keys(org.apache.flink.api.common.operators.Keys) SumAggregator(org.apache.flink.streaming.api.functions.aggregation.SumAggregator) Tuple(org.apache.flink.api.java.tuple.Tuple) Test(org.junit.Test)

Aggregations

Tuple (org.apache.flink.api.java.tuple.Tuple)59 Test (org.junit.Test)38 AbstractTest (org.apache.flink.storm.util.AbstractTest)17 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)14 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)14 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)13 Tuple5 (org.apache.flink.api.java.tuple.Tuple5)10 ArrayList (java.util.ArrayList)9 Configuration (org.apache.flink.configuration.Configuration)8 SuccessException (org.apache.flink.test.util.SuccessException)7 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6 Fields (org.apache.storm.tuple.Fields)6 Tuple4 (org.apache.flink.api.java.tuple.Tuple4)5 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)5 Keys (org.apache.flink.api.common.operators.Keys)4 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)4 ComparableAggregator (org.apache.flink.streaming.api.functions.aggregation.ComparableAggregator)4 Values (org.apache.storm.tuple.Values)4