Search in sources :

Example 86 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class ValueCollectionDataSets method getGroupSortedNestedTupleDataSet.

public static DataSet<Tuple2<Tuple2<IntValue, IntValue>, StringValue>> getGroupSortedNestedTupleDataSet(ExecutionEnvironment env) {
    List<Tuple2<Tuple2<IntValue, IntValue>, StringValue>> data = new ArrayList<>();
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(1), new IntValue(3)), new StringValue("a")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(1), new IntValue(2)), new StringValue("a")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(2), new IntValue(1)), new StringValue("a")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(2), new IntValue(2)), new StringValue("b")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(3), new IntValue(3)), new StringValue("c")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(3), new IntValue(6)), new StringValue("c")));
    data.add(new Tuple2<>(new Tuple2<>(new IntValue(4), new IntValue(9)), new StringValue("c")));
    TupleTypeInfo<Tuple2<Tuple2<IntValue, IntValue>, StringValue>> type = new TupleTypeInfo<>(new TupleTypeInfo<Tuple2<IntValue, IntValue>>(ValueTypeInfo.INT_VALUE_TYPE_INFO, ValueTypeInfo.INT_VALUE_TYPE_INFO), ValueTypeInfo.STRING_VALUE_TYPE_INFO);
    return env.fromCollection(data, type);
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) ArrayList(java.util.ArrayList) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo)

Example 87 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class AbstractQueryableStateITCase method testFoldingState.

/**
	 * Tests simple folding state queryable state instance. Each source emits
	 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
	 * queried. The folding state sums these up and maps them to Strings. The
	 * test succeeds after each subtask index is queried with result n*(n+1)/2
	 * (as a String).
	 */
@Test
public void testFoldingState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Folding state
        FoldingStateDescriptor<Tuple2<Integer, Long>, String> foldingState = new FoldingStateDescriptor<>("any", "0", new SumFold(), StringSerializer.INSTANCE);
        QueryableStateStream<Integer, String> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState("pumba", foldingState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // Now query
        String expected = Integer.toString(numElements * (numElements + 1) / 2);
        for (int key = 0; key < NUM_SLOTS; key++) {
            final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
            boolean success = false;
            while (deadline.hasTimeLeft() && !success) {
                Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, false);
                byte[] serializedValue = Await.result(future, deadline.timeLeft());
                String value = KvStateRequestSerializer.deserializeValue(serializedValue, queryableState.getValueSerializer());
                if (expected.equals(value)) {
                    success = true;
                } else {
                    // Retry
                    Thread.sleep(50);
                }
            }
            assertTrue("Did not succeed query", success);
        }
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) FoldingStateDescriptor(org.apache.flink.api.common.state.FoldingStateDescriptor) KeySelector(org.apache.flink.api.java.functions.KeySelector) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 88 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class AbstractQueryableStateITCase method testReducingState.

/**
	 * Tests simple reducing state queryable state instance. Each source emits
	 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
	 * queried. The reducing state instance sums these up. The test succeeds
	 * after each subtask index is queried with result n*(n+1)/2.
	 */
@Test
public void testReducingState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Reducing state
        ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any", new SumReduce(), source.getType());
        QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState("jungle", reducingState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // Wait until job is running
        // Now query
        long expected = numElements * (numElements + 1) / 2;
        executeValueQuery(deadline, client, jobId, queryableState, expected);
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 89 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class PartitionITCase method testRangePartitionerWithKeySelectorOnSequenceNestedDataWithOrders.

@Test
public void testRangePartitionerWithKeySelectorOnSequenceNestedDataWithOrders() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    final DataSet<Tuple2<ComparablePojo, Long>> dataSet = env.generateSequence(0, 10000).map(new MapFunction<Long, Tuple2<ComparablePojo, Long>>() {

        @Override
        public Tuple2<ComparablePojo, Long> map(Long value) throws Exception {
            return new Tuple2<>(new ComparablePojo(value / 5000, value % 5000), value);
        }
    });
    final List<Tuple2<ComparablePojo, ComparablePojo>> collected = dataSet.partitionByRange(new KeySelector<Tuple2<ComparablePojo, Long>, ComparablePojo>() {

        @Override
        public ComparablePojo getKey(Tuple2<ComparablePojo, Long> value) throws Exception {
            return value.f0;
        }
    }).withOrders(Order.ASCENDING).mapPartition(new MinMaxSelector<>(new ComparablePojoComparator())).mapPartition(new ExtractComparablePojo()).collect();
    final Comparator<Tuple2<ComparablePojo, ComparablePojo>> pojoComparator = new Comparator<Tuple2<ComparablePojo, ComparablePojo>>() {

        @Override
        public int compare(Tuple2<ComparablePojo, ComparablePojo> o1, Tuple2<ComparablePojo, ComparablePojo> o2) {
            return o1.f0.compareTo(o2.f1);
        }
    };
    Collections.sort(collected, pojoComparator);
    ComparablePojo previousMax = null;
    for (Tuple2<ComparablePojo, ComparablePojo> element : collected) {
        assertTrue("Min element in each partition should be smaller than max.", element.f0.compareTo(element.f1) <= 0);
        if (previousMax == null) {
            previousMax = element.f1;
        } else {
            assertTrue("Partitions overlap. Previous max should be smaller than current min.", previousMax.compareTo(element.f0) < 0);
            if (previousMax.first.equals(element.f0.first)) {
                assertEquals("Ordering on the second field should be continous.", previousMax.second - 1, element.f0.second.longValue());
            }
            previousMax = element.f1;
        }
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Comparator(java.util.Comparator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 90 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class PartitionITCase method testRangePartitionInIteration.

@Test(expected = InvalidProgramException.class)
public void testRangePartitionInIteration() throws Exception {
    // does not apply for collection execution
    if (super.mode == TestExecutionMode.COLLECTION) {
        throw new InvalidProgramException("Does not apply for collection execution");
    }
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSource<Long> source = env.generateSequence(0, 10000);
    DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() {

        @Override
        public Tuple2<Long, String> map(Long v) throws Exception {
            return new Tuple2<>(v, Long.toString(v));
        }
    });
    DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0);
    DataSet<Tuple2<Long, String>> body = it.getWorkset().partitionByRange(// Verify that range partition is not allowed in iteration
    1).join(it.getSolutionSet()).where(0).equalTo(0).projectFirst(0).projectSecond(1);
    DataSet<Tuple2<Long, String>> result = it.closeWith(body, body);
    // should fail
    result.collect();
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Test(org.junit.Test)

Aggregations

Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1159 Test (org.junit.Test)871 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)486 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)266 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)195 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)137 ArrayList (java.util.ArrayList)136 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)103 Plan (org.apache.flink.api.common.Plan)103 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)103 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)99 Configuration (org.apache.flink.configuration.Configuration)87 List (java.util.List)82 IOException (java.io.IOException)79 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)77 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)74 HashMap (java.util.HashMap)72 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)66 Collection (java.util.Collection)61 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)60