Search in sources :

Example 51 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class DataSetUtils method summarize.

// --------------------------------------------------------------------------------------------
//  Summarize
// --------------------------------------------------------------------------------------------
/**
	 * Summarize a DataSet of Tuples by collecting single pass statistics for all columns
	 *
	 * Example usage:
	 * <pre>
	 * {@code
	 * Dataset<Tuple3<Double, String, Boolean>> input = // [...]
	 * Tuple3<NumericColumnSummary,StringColumnSummary, BooleanColumnSummary> summary = DataSetUtils.summarize(input)
	 *
	 * summary.f0.getStandardDeviation()
	 * summary.f1.getMaxLength()
	 * }
	 * </pre>
	 * @return the summary as a Tuple the same width as input rows
	 */
public static <R extends Tuple, T extends Tuple> R summarize(DataSet<T> input) throws Exception {
    if (!input.getType().isTupleType()) {
        throw new IllegalArgumentException("summarize() is only implemented for DataSet's of Tuples");
    }
    final TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();
    DataSet<TupleSummaryAggregator<R>> result = input.mapPartition(new MapPartitionFunction<T, TupleSummaryAggregator<R>>() {

        @Override
        public void mapPartition(Iterable<T> values, Collector<TupleSummaryAggregator<R>> out) throws Exception {
            TupleSummaryAggregator<R> aggregator = SummaryAggregatorFactory.create(inType);
            for (Tuple value : values) {
                aggregator.aggregate(value);
            }
            out.collect(aggregator);
        }
    }).reduce(new ReduceFunction<TupleSummaryAggregator<R>>() {

        @Override
        public TupleSummaryAggregator<R> reduce(TupleSummaryAggregator<R> agg1, TupleSummaryAggregator<R> agg2) throws Exception {
            agg1.combine(agg2);
            return agg1;
        }
    });
    return result.collect().get(0).result();
}
Also used : RichMapPartitionFunction(org.apache.flink.api.common.functions.RichMapPartitionFunction) MapPartitionFunction(org.apache.flink.api.common.functions.MapPartitionFunction) TupleTypeInfoBase(org.apache.flink.api.java.typeutils.TupleTypeInfoBase) Collector(org.apache.flink.util.Collector) Tuple(org.apache.flink.api.java.tuple.Tuple) TupleSummaryAggregator(org.apache.flink.api.java.summarize.aggregation.TupleSummaryAggregator)

Example 52 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class FieldsFromTupleTest method testUserSpecifiedOrder.

@Test
public void testUserSpecifiedOrder() throws InstantiationException, IllegalAccessException {
    Tuple currentTuple = (Tuple) CLASSES[Tuple.MAX_ARITY - 1].newInstance();
    for (int i = 0; i < Tuple.MAX_ARITY; i++) {
        currentTuple.setField(testDouble[i], i);
    }
    double[] expected = { testDouble[5], testDouble[3], testDouble[6], testDouble[7], testDouble[0] };
    arrayEqualityCheck(expected, new FieldsFromTuple(5, 3, 6, 7, 0).extract(currentTuple));
    double[] expected2 = { testDouble[0], testDouble[Tuple.MAX_ARITY - 1] };
    arrayEqualityCheck(expected2, new FieldsFromTuple(0, Tuple.MAX_ARITY - 1).extract(currentTuple));
    double[] expected3 = { testDouble[Tuple.MAX_ARITY - 1], testDouble[0] };
    arrayEqualityCheck(expected3, new FieldsFromTuple(Tuple.MAX_ARITY - 1, 0).extract(currentTuple));
    double[] expected4 = { testDouble[13], testDouble[4], testDouble[5], testDouble[4], testDouble[2], testDouble[8], testDouble[6], testDouble[2], testDouble[8], testDouble[3], testDouble[5], testDouble[2], testDouble[16], testDouble[4], testDouble[3], testDouble[2], testDouble[6], testDouble[4], testDouble[7], testDouble[4], testDouble[2], testDouble[8], testDouble[7], testDouble[2] };
    arrayEqualityCheck(expected4, new FieldsFromTuple(13, 4, 5, 4, 2, 8, 6, 2, 8, 3, 5, 2, 16, 4, 3, 2, 6, 4, 7, 4, 2, 8, 7, 2).extract(currentTuple));
}
Also used : FieldsFromTuple(org.apache.flink.streaming.api.functions.windowing.delta.extractor.FieldsFromTuple) Tuple(org.apache.flink.api.java.tuple.Tuple) FieldsFromTuple(org.apache.flink.streaming.api.functions.windowing.delta.extractor.FieldsFromTuple) Test(org.junit.Test)

Example 53 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class TestBaseUtils method compareResult.

private static <T> void compareResult(List<T> result, String expected, boolean asTuples, boolean sort) {
    String[] expectedStrings = expected.split("\n");
    String[] resultStrings = new String[result.size()];
    for (int i = 0; i < resultStrings.length; i++) {
        T val = result.get(i);
        if (asTuples) {
            if (val instanceof Tuple) {
                Tuple t = (Tuple) val;
                Object first = t.getField(0);
                StringBuilder bld = new StringBuilder(first == null ? "null" : first.toString());
                for (int pos = 1; pos < t.getArity(); pos++) {
                    Object next = t.getField(pos);
                    bld.append(',').append(next == null ? "null" : next.toString());
                }
                resultStrings[i] = bld.toString();
            } else {
                throw new IllegalArgumentException(val + " is no tuple");
            }
        } else {
            resultStrings[i] = (val == null) ? "null" : val.toString();
        }
    }
    if (sort) {
        Arrays.sort(expectedStrings);
        Arrays.sort(resultStrings);
    }
    // Include content of both arrays to provide more context in case of a test failure
    String msg = String.format("Different elements in arrays: expected %d elements and received %d\n expected: %s\n received: %s", expectedStrings.length, resultStrings.length, Arrays.toString(expectedStrings), Arrays.toString(resultStrings));
    assertEquals(msg, expectedStrings.length, resultStrings.length);
    for (int i = 0; i < expectedStrings.length; i++) {
        assertEquals(msg, expectedStrings[i], resultStrings[i]);
    }
}
Also used : Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 54 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class AbstractEventTimeWindowCheckpointingITCase method testTumblingTimeWindow.

// ------------------------------------------------------------------------
@Test
public void testTumblingTimeWindow() {
    final int NUM_ELEMENTS_PER_KEY = numElementsPerKey();
    final int WINDOW_SIZE = windowSize();
    final int NUM_KEYS = numKeys();
    FailingSource.reset();
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARALLELISM);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(100);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
        env.getConfig().disableSysoutLogging();
        env.setStateBackend(this.stateBackend);
        env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().keyBy(0).timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS)).apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

            private boolean open = false;

            @Override
            public void open(Configuration parameters) {
                assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                open = true;
            }

            @Override
            public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) {
                // validate that the function has been opened properly
                assertTrue(open);
                int sum = 0;
                long key = -1;
                for (Tuple2<Long, IntType> value : values) {
                    sum += value.f1.value;
                    key = value.f0;
                }
                out.collect(new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum)));
            }
        }).addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE)).setParallelism(1);
        tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) SuccessException(org.apache.flink.test.util.SuccessException) IOException(java.io.IOException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple(org.apache.flink.api.java.tuple.Tuple) Test(org.junit.Test)

Example 55 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class AbstractEventTimeWindowCheckpointingITCase method testSlidingTimeWindow.

@Test
public void testSlidingTimeWindow() {
    final int NUM_ELEMENTS_PER_KEY = numElementsPerKey();
    final int WINDOW_SIZE = windowSize();
    final int WINDOW_SLIDE = windowSlide();
    final int NUM_KEYS = numKeys();
    FailingSource.reset();
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARALLELISM);
        env.setMaxParallelism(2 * PARALLELISM);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(100);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
        env.getConfig().disableSysoutLogging();
        env.setStateBackend(this.stateBackend);
        env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().keyBy(0).timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS), Time.of(WINDOW_SLIDE, MILLISECONDS)).apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

            private boolean open = false;

            @Override
            public void open(Configuration parameters) {
                assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                open = true;
            }

            @Override
            public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) {
                // validate that the function has been opened properly
                assertTrue(open);
                int sum = 0;
                long key = -1;
                for (Tuple2<Long, IntType> value : values) {
                    sum += value.f1.value;
                    key = value.f0;
                }
                out.collect(new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum)));
            }
        }).addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SLIDE)).setParallelism(1);
        tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) SuccessException(org.apache.flink.test.util.SuccessException) IOException(java.io.IOException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple(org.apache.flink.api.java.tuple.Tuple) Test(org.junit.Test)

Aggregations

Tuple (org.apache.flink.api.java.tuple.Tuple)59 Test (org.junit.Test)38 AbstractTest (org.apache.flink.storm.util.AbstractTest)17 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)14 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)14 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)13 Tuple5 (org.apache.flink.api.java.tuple.Tuple5)10 ArrayList (java.util.ArrayList)9 Configuration (org.apache.flink.configuration.Configuration)8 SuccessException (org.apache.flink.test.util.SuccessException)7 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6 Fields (org.apache.storm.tuple.Fields)6 Tuple4 (org.apache.flink.api.java.tuple.Tuple4)5 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)5 Keys (org.apache.flink.api.common.operators.Keys)4 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)4 ComparableAggregator (org.apache.flink.streaming.api.functions.aggregation.ComparableAggregator)4 Values (org.apache.storm.tuple.Values)4