use of java.util.HashSet in project flink by apache.
the class StreamGraphHasherV2 method traverseStreamGraphAndGenerateHashes.
/**
* Returns a map with a hash for each {@link StreamNode} of the {@link
* StreamGraph}. The hash is used as the {@link JobVertexID} in order to
* identify nodes across job submissions if they didn't change.
*
* <p>
* <p>The complete {@link StreamGraph} is traversed. The hash is either
* computed from the transformation's user-specified id (see
* {@link StreamTransformation#getUid()}) or generated in a deterministic way.
*
* <p>
* <p>The generated hash is deterministic with respect to:
* <ul>
* <li>node-local properties (like parallelism, UDF, node ID),
* <li>chained output nodes, and
* <li>input nodes hashes
* </ul>
*
* @return A map from {@link StreamNode#id} to hash as 16-byte array.
*/
@Override
public Map<Integer, byte[]> traverseStreamGraphAndGenerateHashes(StreamGraph streamGraph) {
// The hash function used to generate the hash
final HashFunction hashFunction = Hashing.murmur3_128(0);
final Map<Integer, byte[]> hashes = new HashMap<>();
Set<Integer> visited = new HashSet<>();
Queue<StreamNode> remaining = new ArrayDeque<>();
// We need to make the source order deterministic. The source IDs are
// not returned in the same order, which means that submitting the same
// program twice might result in different traversal, which breaks the
// deterministic hash assignment.
List<Integer> sources = new ArrayList<>();
for (Integer sourceNodeId : streamGraph.getSourceIDs()) {
sources.add(sourceNodeId);
}
Collections.sort(sources);
// Start with source nodes
for (Integer sourceNodeId : sources) {
remaining.add(streamGraph.getStreamNode(sourceNodeId));
visited.add(sourceNodeId);
}
StreamNode currentNode;
while ((currentNode = remaining.poll()) != null) {
// generate the hash code.
if (generateNodeHash(currentNode, hashFunction, hashes, streamGraph.isChainingEnabled())) {
// Add the child nodes
for (StreamEdge outEdge : currentNode.getOutEdges()) {
StreamNode child = outEdge.getTargetVertex();
if (!visited.contains(child.getId())) {
remaining.add(child);
visited.add(child.getId());
}
}
} else {
// We will revisit this later.
visited.remove(currentNode.getId());
}
}
return hashes;
}
use of java.util.HashSet in project flink by apache.
the class UnorderedStreamElementQueueTest method testCompletionOrder.
/**
* Tests that only elements before the oldest watermark are returned if they are completed.
*/
@Test
public void testCompletionOrder() throws Exception {
OperatorActions operatorActions = mock(OperatorActions.class);
final UnorderedStreamElementQueue queue = new UnorderedStreamElementQueue(8, executor, operatorActions);
StreamRecordQueueEntry<Integer> record1 = new StreamRecordQueueEntry<>(new StreamRecord<>(1, 0L));
StreamRecordQueueEntry<Integer> record2 = new StreamRecordQueueEntry<>(new StreamRecord<>(2, 1L));
WatermarkQueueEntry watermark1 = new WatermarkQueueEntry(new Watermark(2L));
StreamRecordQueueEntry<Integer> record3 = new StreamRecordQueueEntry<>(new StreamRecord<>(3, 3L));
StreamRecordQueueEntry<Integer> record4 = new StreamRecordQueueEntry<>(new StreamRecord<>(4, 4L));
WatermarkQueueEntry watermark2 = new WatermarkQueueEntry(new Watermark(5L));
StreamRecordQueueEntry<Integer> record5 = new StreamRecordQueueEntry<>(new StreamRecord<>(5, 6L));
StreamRecordQueueEntry<Integer> record6 = new StreamRecordQueueEntry<>(new StreamRecord<>(6, 7L));
List<StreamElementQueueEntry<?>> entries = Arrays.asList(record1, record2, watermark1, record3, record4, watermark2, record5, record6);
// The queue should look like R1, R2, W1, R3, R4, W2, R5, R6
for (StreamElementQueueEntry<?> entry : entries) {
queue.put(entry);
}
Assert.assertTrue(8 == queue.size());
Future<AsyncResult> firstPoll = FlinkFuture.supplyAsync(new Callable<AsyncResult>() {
@Override
public AsyncResult call() throws Exception {
return queue.poll();
}
}, executor);
// this should not fulfill the poll, because R3 is behind W1
record3.collect(Collections.<Integer>emptyList());
Thread.sleep(10L);
Assert.assertFalse(firstPoll.isDone());
record2.collect(Collections.<Integer>emptyList());
Assert.assertEquals(record2, firstPoll.get());
Future<AsyncResult> secondPoll = FlinkFuture.supplyAsync(new Callable<AsyncResult>() {
@Override
public AsyncResult call() throws Exception {
return queue.poll();
}
}, executor);
record6.collect(Collections.<Integer>emptyList());
record4.collect(Collections.<Integer>emptyList());
Thread.sleep(10L);
// The future should not be completed because R1 has not been completed yet
Assert.assertFalse(secondPoll.isDone());
record1.collect(Collections.<Integer>emptyList());
Assert.assertEquals(record1, secondPoll.get());
// Now W1, R3, R4 and W2 are completed and should be pollable
Assert.assertEquals(watermark1, queue.poll());
// The order of R3 and R4 is not specified
Set<AsyncResult> expected = new HashSet<>(2);
expected.add(record3);
expected.add(record4);
Set<AsyncResult> actual = new HashSet<>(2);
actual.add(queue.poll());
actual.add(queue.poll());
Assert.assertEquals(expected, actual);
Assert.assertEquals(watermark2, queue.poll());
// since R6 has been completed before and W2 has been consumed, we should be able to poll
// this record as well
Assert.assertEquals(record6, queue.poll());
// only R5 left in the queue
Assert.assertTrue(1 == queue.size());
Future<AsyncResult> thirdPoll = FlinkFuture.supplyAsync(new Callable<AsyncResult>() {
@Override
public AsyncResult call() throws Exception {
return queue.poll();
}
}, executor);
Thread.sleep(10L);
Assert.assertFalse(thirdPoll.isDone());
record5.collect(Collections.<Integer>emptyList());
Assert.assertEquals(record5, thirdPoll.get());
Assert.assertTrue(queue.isEmpty());
verify(operatorActions, never()).failOperator(any(Exception.class));
}
use of java.util.HashSet in project flink by apache.
the class InternalWindowFunctionTest method testInternalAggregateProcessWindowFunction.
@SuppressWarnings("unchecked")
@Test
public void testInternalAggregateProcessWindowFunction() throws Exception {
AggregateProcessWindowFunctionMock mock = mock(AggregateProcessWindowFunctionMock.class);
InternalAggregateProcessWindowFunction<Long, Set<Long>, Map<Long, Long>, String, Long, TimeWindow> windowFunction = new InternalAggregateProcessWindowFunction<>(new AggregateFunction<Long, Set<Long>, Map<Long, Long>>() {
private static final long serialVersionUID = 1L;
@Override
public Set<Long> createAccumulator() {
return new HashSet<>();
}
@Override
public void add(Long value, Set<Long> accumulator) {
accumulator.add(value);
}
@Override
public Map<Long, Long> getResult(Set<Long> accumulator) {
Map<Long, Long> result = new HashMap<>();
for (Long in : accumulator) {
result.put(in, in);
}
return result;
}
@Override
public Set<Long> merge(Set<Long> a, Set<Long> b) {
a.addAll(b);
return a;
}
}, mock);
// check setOutputType
TypeInformation<String> stringType = BasicTypeInfo.STRING_TYPE_INFO;
ExecutionConfig execConf = new ExecutionConfig();
execConf.setParallelism(42);
StreamingFunctionUtils.setOutputType(windowFunction, stringType, execConf);
verify(mock).setOutputType(stringType, execConf);
// check open
Configuration config = new Configuration();
windowFunction.open(config);
verify(mock).open(config);
// check setRuntimeContext
RuntimeContext rCtx = mock(RuntimeContext.class);
windowFunction.setRuntimeContext(rCtx);
verify(mock).setRuntimeContext(rCtx);
// check apply
TimeWindow w = mock(TimeWindow.class);
Collector<String> c = (Collector<String>) mock(Collector.class);
List<Long> args = new LinkedList<>();
args.add(23L);
args.add(24L);
windowFunction.apply(42L, w, args, c);
verify(mock).process(eq(42L), (AggregateProcessWindowFunctionMock.Context) anyObject(), (Iterable) argThat(containsInAnyOrder(allOf(hasEntry(is(23L), is(23L)), hasEntry(is(24L), is(24L))))), eq(c));
// check close
windowFunction.close();
verify(mock).close();
}
use of java.util.HashSet in project flink by apache.
the class StateInitializationContextImplTest method getOperatorStateStore.
@Test
public void getOperatorStateStore() throws Exception {
Set<Integer> readStatesCount = new HashSet<>();
for (StatePartitionStreamProvider statePartitionStreamProvider : initializationContext.getRawOperatorStateInputs()) {
Assert.assertNotNull(statePartitionStreamProvider);
try (InputStream is = statePartitionStreamProvider.getStream()) {
DataInputView div = new DataInputViewStreamWrapper(is);
Assert.assertTrue(readStatesCount.add(div.readInt()));
}
}
Assert.assertEquals(writtenOperatorStates, readStatesCount);
}
use of java.util.HashSet in project flink by apache.
the class HeapInternalTimerServiceTest method testTimerAssignmentToKeyGroups.
@Test
public void testTimerAssignmentToKeyGroups() {
int totalNoOfTimers = 100;
int totalNoOfKeyGroups = 100;
int startKeyGroupIdx = 0;
// we have 0 to 99
int endKeyGroupIdx = totalNoOfKeyGroups - 1;
@SuppressWarnings("unchecked") Set<InternalTimer<Integer, String>>[] expectedNonEmptyTimerSets = new HashSet[totalNoOfKeyGroups];
TestKeyContext keyContext = new TestKeyContext();
HeapInternalTimerService<Integer, String> timerService = new HeapInternalTimerService<>(totalNoOfKeyGroups, new KeyGroupRange(startKeyGroupIdx, endKeyGroupIdx), keyContext, new TestProcessingTimeService());
timerService.startTimerService(IntSerializer.INSTANCE, StringSerializer.INSTANCE, mock(Triggerable.class));
for (int i = 0; i < totalNoOfTimers; i++) {
// create the timer to be registered
InternalTimer<Integer, String> timer = new InternalTimer<>(10 + i, i, "hello_world_" + i);
int keyGroupIdx = KeyGroupRangeAssignment.assignToKeyGroup(timer.getKey(), totalNoOfKeyGroups);
// add it in the adequate expected set of timers per keygroup
Set<InternalTimer<Integer, String>> timerSet = expectedNonEmptyTimerSets[keyGroupIdx];
if (timerSet == null) {
timerSet = new HashSet<>();
expectedNonEmptyTimerSets[keyGroupIdx] = timerSet;
}
timerSet.add(timer);
// register the timer as both processing and event time one
keyContext.setCurrentKey(timer.getKey());
timerService.registerEventTimeTimer(timer.getNamespace(), timer.getTimestamp());
timerService.registerProcessingTimeTimer(timer.getNamespace(), timer.getTimestamp());
}
Set<InternalTimer<Integer, String>>[] eventTimeTimers = timerService.getEventTimeTimersPerKeyGroup();
Set<InternalTimer<Integer, String>>[] processingTimeTimers = timerService.getProcessingTimeTimersPerKeyGroup();
// finally verify that the actual timers per key group sets are the expected ones.
for (int i = 0; i < expectedNonEmptyTimerSets.length; i++) {
Set<InternalTimer<Integer, String>> expected = expectedNonEmptyTimerSets[i];
Set<InternalTimer<Integer, String>> actualEvent = eventTimeTimers[i];
Set<InternalTimer<Integer, String>> actualProcessing = processingTimeTimers[i];
if (expected == null) {
Assert.assertNull(actualEvent);
Assert.assertNull(actualProcessing);
} else {
Assert.assertArrayEquals(expected.toArray(), actualEvent.toArray());
Assert.assertArrayEquals(expected.toArray(), actualProcessing.toArray());
}
}
}
Aggregations