Search in sources :

Example 41 with HashSet

use of java.util.HashSet in project flink by apache.

the class StreamGraphHasherV2 method traverseStreamGraphAndGenerateHashes.

/**
	 * Returns a map with a hash for each {@link StreamNode} of the {@link
	 * StreamGraph}. The hash is used as the {@link JobVertexID} in order to
	 * identify nodes across job submissions if they didn't change.
	 *
	 * <p>
	 * <p>The complete {@link StreamGraph} is traversed. The hash is either
	 * computed from the transformation's user-specified id (see
	 * {@link StreamTransformation#getUid()}) or generated in a deterministic way.
	 *
	 * <p>
	 * <p>The generated hash is deterministic with respect to:
	 * <ul>
	 * <li>node-local properties (like parallelism, UDF, node ID),
	 * <li>chained output nodes, and
	 * <li>input nodes hashes
	 * </ul>
	 *
	 * @return A map from {@link StreamNode#id} to hash as 16-byte array.
	 */
@Override
public Map<Integer, byte[]> traverseStreamGraphAndGenerateHashes(StreamGraph streamGraph) {
    // The hash function used to generate the hash
    final HashFunction hashFunction = Hashing.murmur3_128(0);
    final Map<Integer, byte[]> hashes = new HashMap<>();
    Set<Integer> visited = new HashSet<>();
    Queue<StreamNode> remaining = new ArrayDeque<>();
    // We need to make the source order deterministic. The source IDs are
    // not returned in the same order, which means that submitting the same
    // program twice might result in different traversal, which breaks the
    // deterministic hash assignment.
    List<Integer> sources = new ArrayList<>();
    for (Integer sourceNodeId : streamGraph.getSourceIDs()) {
        sources.add(sourceNodeId);
    }
    Collections.sort(sources);
    // Start with source nodes
    for (Integer sourceNodeId : sources) {
        remaining.add(streamGraph.getStreamNode(sourceNodeId));
        visited.add(sourceNodeId);
    }
    StreamNode currentNode;
    while ((currentNode = remaining.poll()) != null) {
        // generate the hash code.
        if (generateNodeHash(currentNode, hashFunction, hashes, streamGraph.isChainingEnabled())) {
            // Add the child nodes
            for (StreamEdge outEdge : currentNode.getOutEdges()) {
                StreamNode child = outEdge.getTargetVertex();
                if (!visited.contains(child.getId())) {
                    remaining.add(child);
                    visited.add(child.getId());
                }
            }
        } else {
            // We will revisit this later.
            visited.remove(currentNode.getId());
        }
    }
    return hashes;
}
Also used : HashFunction(com.google.common.hash.HashFunction) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArrayDeque(java.util.ArrayDeque) HashSet(java.util.HashSet)

Example 42 with HashSet

use of java.util.HashSet in project flink by apache.

the class UnorderedStreamElementQueueTest method testCompletionOrder.

/**
	 * Tests that only elements before the oldest watermark are returned if they are completed.
	 */
@Test
public void testCompletionOrder() throws Exception {
    OperatorActions operatorActions = mock(OperatorActions.class);
    final UnorderedStreamElementQueue queue = new UnorderedStreamElementQueue(8, executor, operatorActions);
    StreamRecordQueueEntry<Integer> record1 = new StreamRecordQueueEntry<>(new StreamRecord<>(1, 0L));
    StreamRecordQueueEntry<Integer> record2 = new StreamRecordQueueEntry<>(new StreamRecord<>(2, 1L));
    WatermarkQueueEntry watermark1 = new WatermarkQueueEntry(new Watermark(2L));
    StreamRecordQueueEntry<Integer> record3 = new StreamRecordQueueEntry<>(new StreamRecord<>(3, 3L));
    StreamRecordQueueEntry<Integer> record4 = new StreamRecordQueueEntry<>(new StreamRecord<>(4, 4L));
    WatermarkQueueEntry watermark2 = new WatermarkQueueEntry(new Watermark(5L));
    StreamRecordQueueEntry<Integer> record5 = new StreamRecordQueueEntry<>(new StreamRecord<>(5, 6L));
    StreamRecordQueueEntry<Integer> record6 = new StreamRecordQueueEntry<>(new StreamRecord<>(6, 7L));
    List<StreamElementQueueEntry<?>> entries = Arrays.asList(record1, record2, watermark1, record3, record4, watermark2, record5, record6);
    // The queue should look like R1, R2, W1, R3, R4, W2, R5, R6
    for (StreamElementQueueEntry<?> entry : entries) {
        queue.put(entry);
    }
    Assert.assertTrue(8 == queue.size());
    Future<AsyncResult> firstPoll = FlinkFuture.supplyAsync(new Callable<AsyncResult>() {

        @Override
        public AsyncResult call() throws Exception {
            return queue.poll();
        }
    }, executor);
    // this should not fulfill the poll, because R3 is behind W1
    record3.collect(Collections.<Integer>emptyList());
    Thread.sleep(10L);
    Assert.assertFalse(firstPoll.isDone());
    record2.collect(Collections.<Integer>emptyList());
    Assert.assertEquals(record2, firstPoll.get());
    Future<AsyncResult> secondPoll = FlinkFuture.supplyAsync(new Callable<AsyncResult>() {

        @Override
        public AsyncResult call() throws Exception {
            return queue.poll();
        }
    }, executor);
    record6.collect(Collections.<Integer>emptyList());
    record4.collect(Collections.<Integer>emptyList());
    Thread.sleep(10L);
    // The future should not be completed because R1 has not been completed yet
    Assert.assertFalse(secondPoll.isDone());
    record1.collect(Collections.<Integer>emptyList());
    Assert.assertEquals(record1, secondPoll.get());
    // Now W1, R3, R4 and W2 are completed and should be pollable
    Assert.assertEquals(watermark1, queue.poll());
    // The order of R3 and R4 is not specified
    Set<AsyncResult> expected = new HashSet<>(2);
    expected.add(record3);
    expected.add(record4);
    Set<AsyncResult> actual = new HashSet<>(2);
    actual.add(queue.poll());
    actual.add(queue.poll());
    Assert.assertEquals(expected, actual);
    Assert.assertEquals(watermark2, queue.poll());
    // since R6 has been completed before and W2 has been consumed, we should be able to poll
    // this record as well
    Assert.assertEquals(record6, queue.poll());
    // only R5 left in the queue
    Assert.assertTrue(1 == queue.size());
    Future<AsyncResult> thirdPoll = FlinkFuture.supplyAsync(new Callable<AsyncResult>() {

        @Override
        public AsyncResult call() throws Exception {
            return queue.poll();
        }
    }, executor);
    Thread.sleep(10L);
    Assert.assertFalse(thirdPoll.isDone());
    record5.collect(Collections.<Integer>emptyList());
    Assert.assertEquals(record5, thirdPoll.get());
    Assert.assertTrue(queue.isEmpty());
    verify(operatorActions, never()).failOperator(any(Exception.class));
}
Also used : OperatorActions(org.apache.flink.streaming.api.operators.async.OperatorActions) Watermark(org.apache.flink.streaming.api.watermark.Watermark) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 43 with HashSet

use of java.util.HashSet in project flink by apache.

the class InternalWindowFunctionTest method testInternalAggregateProcessWindowFunction.

@SuppressWarnings("unchecked")
@Test
public void testInternalAggregateProcessWindowFunction() throws Exception {
    AggregateProcessWindowFunctionMock mock = mock(AggregateProcessWindowFunctionMock.class);
    InternalAggregateProcessWindowFunction<Long, Set<Long>, Map<Long, Long>, String, Long, TimeWindow> windowFunction = new InternalAggregateProcessWindowFunction<>(new AggregateFunction<Long, Set<Long>, Map<Long, Long>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Set<Long> createAccumulator() {
            return new HashSet<>();
        }

        @Override
        public void add(Long value, Set<Long> accumulator) {
            accumulator.add(value);
        }

        @Override
        public Map<Long, Long> getResult(Set<Long> accumulator) {
            Map<Long, Long> result = new HashMap<>();
            for (Long in : accumulator) {
                result.put(in, in);
            }
            return result;
        }

        @Override
        public Set<Long> merge(Set<Long> a, Set<Long> b) {
            a.addAll(b);
            return a;
        }
    }, mock);
    // check setOutputType
    TypeInformation<String> stringType = BasicTypeInfo.STRING_TYPE_INFO;
    ExecutionConfig execConf = new ExecutionConfig();
    execConf.setParallelism(42);
    StreamingFunctionUtils.setOutputType(windowFunction, stringType, execConf);
    verify(mock).setOutputType(stringType, execConf);
    // check open
    Configuration config = new Configuration();
    windowFunction.open(config);
    verify(mock).open(config);
    // check setRuntimeContext
    RuntimeContext rCtx = mock(RuntimeContext.class);
    windowFunction.setRuntimeContext(rCtx);
    verify(mock).setRuntimeContext(rCtx);
    // check apply
    TimeWindow w = mock(TimeWindow.class);
    Collector<String> c = (Collector<String>) mock(Collector.class);
    List<Long> args = new LinkedList<>();
    args.add(23L);
    args.add(24L);
    windowFunction.apply(42L, w, args, c);
    verify(mock).process(eq(42L), (AggregateProcessWindowFunctionMock.Context) anyObject(), (Iterable) argThat(containsInAnyOrder(allOf(hasEntry(is(23L), is(23L)), hasEntry(is(24L), is(24L))))), eq(c));
    // check close
    windowFunction.close();
    verify(mock).close();
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) Configuration(org.apache.flink.configuration.Configuration) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) InternalAggregateProcessWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalAggregateProcessWindowFunction) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) LinkedList(java.util.LinkedList) Collector(org.apache.flink.util.Collector) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 44 with HashSet

use of java.util.HashSet in project flink by apache.

the class StateInitializationContextImplTest method getOperatorStateStore.

@Test
public void getOperatorStateStore() throws Exception {
    Set<Integer> readStatesCount = new HashSet<>();
    for (StatePartitionStreamProvider statePartitionStreamProvider : initializationContext.getRawOperatorStateInputs()) {
        Assert.assertNotNull(statePartitionStreamProvider);
        try (InputStream is = statePartitionStreamProvider.getStream()) {
            DataInputView div = new DataInputViewStreamWrapper(is);
            Assert.assertTrue(readStatesCount.add(div.readInt()));
        }
    }
    Assert.assertEquals(writtenOperatorStates, readStatesCount);
}
Also used : StatePartitionStreamProvider(org.apache.flink.runtime.state.StatePartitionStreamProvider) KeyGroupStatePartitionStreamProvider(org.apache.flink.runtime.state.KeyGroupStatePartitionStreamProvider) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) InputStream(java.io.InputStream) DataInputView(org.apache.flink.core.memory.DataInputView) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 45 with HashSet

use of java.util.HashSet in project flink by apache.

the class HeapInternalTimerServiceTest method testTimerAssignmentToKeyGroups.

@Test
public void testTimerAssignmentToKeyGroups() {
    int totalNoOfTimers = 100;
    int totalNoOfKeyGroups = 100;
    int startKeyGroupIdx = 0;
    // we have 0 to 99
    int endKeyGroupIdx = totalNoOfKeyGroups - 1;
    @SuppressWarnings("unchecked") Set<InternalTimer<Integer, String>>[] expectedNonEmptyTimerSets = new HashSet[totalNoOfKeyGroups];
    TestKeyContext keyContext = new TestKeyContext();
    HeapInternalTimerService<Integer, String> timerService = new HeapInternalTimerService<>(totalNoOfKeyGroups, new KeyGroupRange(startKeyGroupIdx, endKeyGroupIdx), keyContext, new TestProcessingTimeService());
    timerService.startTimerService(IntSerializer.INSTANCE, StringSerializer.INSTANCE, mock(Triggerable.class));
    for (int i = 0; i < totalNoOfTimers; i++) {
        // create the timer to be registered
        InternalTimer<Integer, String> timer = new InternalTimer<>(10 + i, i, "hello_world_" + i);
        int keyGroupIdx = KeyGroupRangeAssignment.assignToKeyGroup(timer.getKey(), totalNoOfKeyGroups);
        // add it in the adequate expected set of timers per keygroup
        Set<InternalTimer<Integer, String>> timerSet = expectedNonEmptyTimerSets[keyGroupIdx];
        if (timerSet == null) {
            timerSet = new HashSet<>();
            expectedNonEmptyTimerSets[keyGroupIdx] = timerSet;
        }
        timerSet.add(timer);
        // register the timer as both processing and event time one
        keyContext.setCurrentKey(timer.getKey());
        timerService.registerEventTimeTimer(timer.getNamespace(), timer.getTimestamp());
        timerService.registerProcessingTimeTimer(timer.getNamespace(), timer.getTimestamp());
    }
    Set<InternalTimer<Integer, String>>[] eventTimeTimers = timerService.getEventTimeTimersPerKeyGroup();
    Set<InternalTimer<Integer, String>>[] processingTimeTimers = timerService.getProcessingTimeTimersPerKeyGroup();
    // finally verify that the actual timers per key group sets are the expected ones.
    for (int i = 0; i < expectedNonEmptyTimerSets.length; i++) {
        Set<InternalTimer<Integer, String>> expected = expectedNonEmptyTimerSets[i];
        Set<InternalTimer<Integer, String>> actualEvent = eventTimeTimers[i];
        Set<InternalTimer<Integer, String>> actualProcessing = processingTimeTimers[i];
        if (expected == null) {
            Assert.assertNull(actualEvent);
            Assert.assertNull(actualProcessing);
        } else {
            Assert.assertArrayEquals(expected.toArray(), actualEvent.toArray());
            Assert.assertArrayEquals(expected.toArray(), actualProcessing.toArray());
        }
    }
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

HashSet (java.util.HashSet)12137 Set (java.util.Set)2609 ArrayList (java.util.ArrayList)2318 HashMap (java.util.HashMap)2096 Test (org.junit.Test)2060 Map (java.util.Map)1198 Iterator (java.util.Iterator)979 IOException (java.io.IOException)934 List (java.util.List)911 File (java.io.File)607 LinkedHashSet (java.util.LinkedHashSet)460 Test (org.testng.annotations.Test)460 TreeSet (java.util.TreeSet)271 Collection (java.util.Collection)233 LinkedList (java.util.LinkedList)224 Region (org.apache.geode.cache.Region)202 SSOException (com.iplanet.sso.SSOException)188 Date (java.util.Date)180 LinkedHashMap (java.util.LinkedHashMap)169 PartitionedRegion (org.apache.geode.internal.cache.PartitionedRegion)166