use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class LocalStreamingFileSinkTest method testScalingDownAndMergingOfStates.
@Test
public void testScalingDownAndMergingOfStates() throws Exception {
final File outDir = TEMP_FOLDER.newFolder();
OperatorSubtaskState mergedSnapshot;
// we set small file size so that the part file rolls on every element.
try (OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> testHarness1 = TestUtils.createRescalingTestSink(outDir, 2, 0, 100L, 10L);
OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> testHarness2 = TestUtils.createRescalingTestSink(outDir, 2, 1, 100L, 10L)) {
testHarness1.setup();
testHarness1.open();
testHarness2.setup();
testHarness2.open();
testHarness1.processElement(new StreamRecord<>(Tuple2.of("test1", 0), 0L));
TestUtils.checkLocalFs(outDir, 1, 0);
testHarness2.processElement(new StreamRecord<>(Tuple2.of("test1", 1), 1L));
testHarness2.processElement(new StreamRecord<>(Tuple2.of("test2", 1), 1L));
// all the files are in-progress
TestUtils.checkLocalFs(outDir, 3, 0);
int counter = 0;
for (Map.Entry<File, String> fileContents : TestUtils.getFileContentByPath(outDir).entrySet()) {
final String parentFilename = fileContents.getKey().getParentFile().getName();
final String inProgressFilename = fileContents.getKey().getName();
if (parentFilename.equals("test1") && (inProgressFilename.contains(".part-0-0.inprogress") || inProgressFilename.contains(".part-1-0.inprogress"))) {
counter++;
} else if (parentFilename.equals("test2") && inProgressFilename.contains(".part-1-1.inprogress")) {
counter++;
}
}
Assert.assertEquals(3L, counter);
// intentionally we snapshot them in the reverse order so that the states are shuffled
mergedSnapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness1.snapshot(1L, 0L), testHarness2.snapshot(1L, 0L));
}
final OperatorSubtaskState initState = AbstractStreamOperatorTestHarness.repartitionOperatorState(mergedSnapshot, TestUtils.MAX_PARALLELISM, 2, 1, 0);
try (OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> testHarness = TestUtils.createRescalingTestSink(outDir, 1, 0, 100L, 10L)) {
testHarness.setup();
testHarness.initializeState(initState);
testHarness.open();
// still everything in-progress but the in-progress for prev task 1 should be put in
// pending now
TestUtils.checkLocalFs(outDir, 3, 0);
testHarness.snapshot(2L, 2L);
testHarness.notifyOfCompletedCheckpoint(2L);
int counter = 0;
for (Map.Entry<File, String> fileContents : TestUtils.getFileContentByPath(outDir).entrySet()) {
final String parentFilename = fileContents.getKey().getParentFile().getName();
final String filename = fileContents.getKey().getName();
if (parentFilename.equals("test1")) {
// consumed in the initialize state.
if (filename.contains("-0.inprogress") || filename.endsWith("-0")) {
counter++;
Assert.assertTrue(fileContents.getValue().equals("test1@1\n") || fileContents.getValue().equals("test1@0\n"));
}
} else if (parentFilename.equals("test2") && filename.contains(".part-1-1.inprogress")) {
counter++;
Assert.assertEquals("test2@1\n", fileContents.getValue());
}
}
Assert.assertEquals(3L, counter);
}
}
use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class AbstractStreamOperatorTestHarness method repackageState.
/**
* Takes the different {@link OperatorSubtaskState} created by calling {@link #snapshot(long,
* long)} on different instances of {@link AbstractStreamOperatorTestHarness} (each one
* representing one subtask) and repacks them into a single {@link OperatorSubtaskState} so that
* the parallelism of the test can change arbitrarily (i.e. be able to scale both up and down).
*
* <p>After repacking the partial states, remember to use {@link
* #repartitionOperatorState(OperatorSubtaskState, int, int, int, int)} to reshape the state
* handles to include only those key-group states in the local key-group range and the operator
* states that would be assigned to the local subtask. Bear in mind that for parallelism greater
* than one, you have to use the constructor {@link
* #AbstractStreamOperatorTestHarness(StreamOperator, int, int, int)}.
*
* <p><b>NOTE: </b> each of the {@code handles} in the argument list is assumed to be from a
* single task of a single operator (i.e. chain length of one).
*
* <p>For an example of how to use it, have a look at {@link
* AbstractStreamOperatorTest#testStateAndTimerStateShufflingScalingDown()}.
*
* @param handles the different states to be merged.
* @return the resulting state, or {@code null} if no partial states are specified.
*/
public static OperatorSubtaskState repackageState(OperatorSubtaskState... handles) throws Exception {
if (handles.length < 1) {
return null;
} else if (handles.length == 1) {
return handles[0];
}
List<OperatorStateHandle> mergedManagedOperatorState = new ArrayList<>(handles.length);
List<OperatorStateHandle> mergedRawOperatorState = new ArrayList<>(handles.length);
List<KeyedStateHandle> mergedManagedKeyedState = new ArrayList<>(handles.length);
List<KeyedStateHandle> mergedRawKeyedState = new ArrayList<>(handles.length);
for (OperatorSubtaskState handle : handles) {
Collection<OperatorStateHandle> managedOperatorState = handle.getManagedOperatorState();
Collection<OperatorStateHandle> rawOperatorState = handle.getRawOperatorState();
Collection<KeyedStateHandle> managedKeyedState = handle.getManagedKeyedState();
Collection<KeyedStateHandle> rawKeyedState = handle.getRawKeyedState();
mergedManagedOperatorState.addAll(managedOperatorState);
mergedRawOperatorState.addAll(rawOperatorState);
mergedManagedKeyedState.addAll(managedKeyedState);
mergedRawKeyedState.addAll(rawKeyedState);
}
return OperatorSubtaskState.builder().setManagedOperatorState(new StateObjectCollection<>(mergedManagedOperatorState)).setRawOperatorState(new StateObjectCollection<>(mergedRawOperatorState)).setManagedKeyedState(new StateObjectCollection<>(mergedManagedKeyedState)).setRawKeyedState(new StateObjectCollection<>(mergedRawKeyedState)).build();
}
use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class WindowOperatorTest method testEventTimeTumblingWindowsWithEarlyAndLateFirings.
@Test
@SuppressWarnings("unchecked")
public void testEventTimeTumblingWindowsWithEarlyAndLateFirings() throws Exception {
closeCalled.set(0);
WindowOperator operator = WindowOperatorBuilder.builder().withInputFields(inputFieldTypes).withShiftTimezone(shiftTimeZone).tumble(Duration.ofSeconds(3)).withEventTime(2).triggering(EventTimeTriggers.afterEndOfWindow().withEarlyFirings(ProcessingTimeTriggers.every(Duration.ofSeconds(1))).withLateFirings(ElementTriggers.every())).withAllowedLateness(Duration.ofSeconds(3)).produceUpdates().aggregate(new SumAndCountAggTimeWindow(), equaliser, accTypes, aggResultTypes, windowTypes).build();
OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = createTestHarness(operator);
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
testHarness.open();
testHarness.setProcessingTime(0L);
// add elements out-of-order
testHarness.processElement(insertRecord("key2", 1, 3999L));
testHarness.processElement(insertRecord("key2", 1, 3000L));
testHarness.setProcessingTime(1L);
testHarness.processElement(insertRecord("key1", 1, 20L));
testHarness.processElement(insertRecord("key1", 1, 0L));
testHarness.processElement(insertRecord("key1", 1, 999L));
testHarness.processElement(insertRecord("key2", 1, 1998L));
testHarness.processElement(insertRecord("key2", 1, 1999L));
testHarness.processElement(insertRecord("key2", 1, 1000L));
testHarness.setProcessingTime(1000);
expectedOutput.add(insertRecord("key2", 2L, 2L, localMills(3000L), localMills(6000L), localMills(5999L)));
testHarness.processWatermark(new Watermark(999));
expectedOutput.add(new Watermark(999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.setProcessingTime(1001);
expectedOutput.add(insertRecord("key1", 3L, 3L, localMills(0L), localMills(3000L), localMills(2999L)));
expectedOutput.add(insertRecord("key2", 3L, 3L, localMills(0L), localMills(3000L), localMills(2999L)));
testHarness.processWatermark(new Watermark(1999));
testHarness.setProcessingTime(2001);
expectedOutput.add(new Watermark(1999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// do a snapshot, close and restore again
OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
testHarness.close();
expectedOutput.clear();
// new a testHarness
testHarness = createTestHarness(operator);
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.open();
testHarness.setProcessingTime(3001);
testHarness.processWatermark(new Watermark(2999));
// on time fire key1 & key2 [0 ~ 3000) window, but because of early firing, on time result
// is ignored
expectedOutput.add(new Watermark(2999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processElement(insertRecord("key2", 1, 4999L));
testHarness.processWatermark(new Watermark(3999));
testHarness.setProcessingTime(4001);
expectedOutput.add(new Watermark(3999));
expectedOutput.add(updateBeforeRecord("key2", 2L, 2L, localMills(3000L), localMills(6000L), localMills(5999L)));
expectedOutput.add(updateAfterRecord("key2", 3L, 3L, localMills(3000L), localMills(6000L), localMills(5999L)));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// late arrival
testHarness.processElement(insertRecord("key2", 1, 2001L));
expectedOutput.add(updateBeforeRecord("key2", 3L, 3L, localMills(0L), localMills(3000L), localMills(2999L)));
expectedOutput.add(updateAfterRecord("key2", 4L, 4L, localMills(0L), localMills(3000L), localMills(2999L)));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// late arrival
testHarness.processElement(insertRecord("key1", 1, 2030L));
expectedOutput.add(updateBeforeRecord("key1", 3L, 3L, localMills(0L), localMills(3000L), localMills(2999L)));
expectedOutput.add(updateAfterRecord("key1", 4L, 4L, localMills(0L), localMills(3000L), localMills(2999L)));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.setProcessingTime(5100);
testHarness.processElement(insertRecord("key2", 1, 5122L));
testHarness.processWatermark(new Watermark(4999));
expectedOutput.add(new Watermark(4999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processWatermark(new Watermark(5999));
expectedOutput.add(updateBeforeRecord("key2", 3L, 3L, localMills(3000L), localMills(6000L), localMills(5999L)));
expectedOutput.add(updateAfterRecord("key2", 4L, 4L, localMills(3000L), localMills(6000L), localMills(5999L)));
expectedOutput.add(new Watermark(5999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.setProcessingTime(6001);
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// those don't have any effect...
testHarness.processWatermark(new Watermark(6999));
testHarness.processWatermark(new Watermark(7999));
expectedOutput.add(new Watermark(6999));
expectedOutput.add(new Watermark(7999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// late arrival, but too late, drop
testHarness.processElement(insertRecord("key2", 1, 2877L));
testHarness.processElement(insertRecord("key1", 1, 2899L));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.close();
// we close once in the rest...
assertEquals("Close was not called.", 2, closeCalled.get());
}
use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class WindowOperatorTest method testEventTimeTumblingWindowsWithEarlyFiring.
@Test
@SuppressWarnings("unchecked")
public void testEventTimeTumblingWindowsWithEarlyFiring() throws Exception {
closeCalled.set(0);
WindowOperator operator = WindowOperatorBuilder.builder().withInputFields(inputFieldTypes).tumble(Duration.ofSeconds(3)).withShiftTimezone(shiftTimeZone).withEventTime(2).triggering(EventTimeTriggers.afterEndOfWindow().withEarlyFirings(ProcessingTimeTriggers.every(Duration.ofSeconds(1)))).produceUpdates().aggregate(new SumAndCountAggTimeWindow(), equaliser, accTypes, aggResultTypes, windowTypes).build();
OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = createTestHarness(operator);
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
testHarness.open();
testHarness.setProcessingTime(0L);
// add elements out-of-order
testHarness.processElement(insertRecord("key2", 1, 3999L));
testHarness.processElement(insertRecord("key2", 1, 3000L));
testHarness.setProcessingTime(1L);
testHarness.processElement(insertRecord("key1", 1, 20L));
testHarness.processElement(insertRecord("key1", 1, 0L));
testHarness.processElement(insertRecord("key1", 1, 999L));
testHarness.processElement(insertRecord("key2", 1, 1998L));
testHarness.processElement(insertRecord("key2", 1, 1999L));
testHarness.processElement(insertRecord("key2", 1, 1000L));
testHarness.setProcessingTime(1000);
expectedOutput.add(insertRecord("key2", 2L, 2L, localMills(3000L), localMills(6000L), localMills(5999L)));
testHarness.processWatermark(new Watermark(999));
expectedOutput.add(new Watermark(999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.setProcessingTime(1001);
expectedOutput.add(insertRecord("key1", 3L, 3L, localMills(0L), localMills(3000L), localMills(2999L)));
expectedOutput.add(insertRecord("key2", 3L, 3L, localMills(0L), localMills(3000L), localMills(2999L)));
testHarness.processWatermark(new Watermark(1999));
testHarness.setProcessingTime(2001);
expectedOutput.add(new Watermark(1999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// do a snapshot, close and restore again
OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
testHarness.close();
expectedOutput.clear();
// new a testHarness
testHarness = createTestHarness(operator);
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.open();
testHarness.setProcessingTime(3001);
testHarness.processWatermark(new Watermark(2999));
// on time fire key1 & key2 [0 ~ 3000) window, but because of early firing, on time result
// is ignored
expectedOutput.add(new Watermark(2999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processElement(insertRecord("key2", 1, 4999L));
testHarness.processWatermark(new Watermark(3999));
testHarness.setProcessingTime(4001);
expectedOutput.add(new Watermark(3999));
expectedOutput.add(updateBeforeRecord("key2", 2L, 2L, localMills(3000L), localMills(6000L), localMills(5999L)));
expectedOutput.add(updateAfterRecord("key2", 3L, 3L, localMills(3000L), localMills(6000L), localMills(5999L)));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// late arrival
testHarness.processElement(insertRecord("key2", 1, 2001L));
testHarness.processElement(insertRecord("key1", 1, 2030L));
// drop late elements
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.setProcessingTime(5100);
testHarness.processElement(insertRecord("key2", 1, 5122L));
testHarness.processWatermark(new Watermark(4999));
expectedOutput.add(new Watermark(4999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processWatermark(new Watermark(5999));
expectedOutput.add(updateBeforeRecord("key2", 3L, 3L, localMills(3000L), localMills(6000L), localMills(5999L)));
expectedOutput.add(updateAfterRecord("key2", 4L, 4L, localMills(3000L), localMills(6000L), localMills(5999L)));
expectedOutput.add(new Watermark(5999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.setProcessingTime(6001);
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// those don't have any effect...
testHarness.processWatermark(new Watermark(6999));
testHarness.processWatermark(new Watermark(7999));
expectedOutput.add(new Watermark(6999));
expectedOutput.add(new Watermark(7999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// late arrival, drop
testHarness.processElement(insertRecord("key2", 1, 2877L));
testHarness.processElement(insertRecord("key1", 1, 2899L));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.close();
// we close once in the rest...
assertEquals("Close was not called.", 2, closeCalled.get());
}
use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class WindowOperatorTest method testEventTimeSlidingWindows.
@Test
public void testEventTimeSlidingWindows() throws Exception {
closeCalled.set(0);
WindowOperator operator = WindowOperatorBuilder.builder().withInputFields(inputFieldTypes).withShiftTimezone(shiftTimeZone).sliding(Duration.ofSeconds(3), Duration.ofSeconds(1)).withEventTime(2).aggregateAndBuild(getTimeWindowAggFunction(), equaliser, accTypes, aggResultTypes, windowTypes);
OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = createTestHarness(operator);
testHarness.open();
// process elements
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
// add elements out-of-order
testHarness.processElement(insertRecord("key2", 1, 3999L));
testHarness.processElement(insertRecord("key2", 1, 3000L));
testHarness.processElement(insertRecord("key1", 1, 20L));
testHarness.processElement(insertRecord("key1", 1, 0L));
testHarness.processElement(insertRecord("key1", 1, 999L));
testHarness.processElement(insertRecord("key2", 1, 1998L));
testHarness.processElement(insertRecord("key2", 1, 1999L));
testHarness.processElement(insertRecord("key2", 1, 1000L));
testHarness.processWatermark(new Watermark(999));
expectedOutput.addAll(doubleRecord(isTableAggregate, insertRecord("key1", 3L, 3L, localMills(-2000L), localMills(1000L), localMills(999L))));
expectedOutput.add(new Watermark(999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processWatermark(new Watermark(1999));
expectedOutput.addAll(doubleRecord(isTableAggregate, insertRecord("key1", 3L, 3L, localMills(-1000L), localMills(2000L), localMills(1999L))));
expectedOutput.addAll(doubleRecord(isTableAggregate, insertRecord("key2", 3L, 3L, localMills(-1000L), localMills(2000L), localMills(1999L))));
expectedOutput.add(new Watermark(1999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processWatermark(new Watermark(2999));
expectedOutput.addAll(doubleRecord(isTableAggregate, insertRecord("key1", 3L, 3L, localMills(0L), localMills(3000L), localMills(2999L))));
expectedOutput.addAll(doubleRecord(isTableAggregate, insertRecord("key2", 3L, 3L, localMills(0L), localMills(3000L), localMills(2999L))));
expectedOutput.add(new Watermark(2999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// do a snapshot, close and restore again
OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
testHarness.close();
expectedOutput.clear();
testHarness = createTestHarness(operator);
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.open();
testHarness.processWatermark(new Watermark(3999));
expectedOutput.addAll(doubleRecord(isTableAggregate, insertRecord("key2", 5L, 5L, localMills(1000L), localMills(4000L), localMills(3999L))));
expectedOutput.add(new Watermark(3999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processWatermark(new Watermark(4999));
expectedOutput.addAll(doubleRecord(isTableAggregate, insertRecord("key2", 2L, 2L, localMills(2000L), localMills(5000L), localMills(4999L))));
expectedOutput.add(new Watermark(4999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.processWatermark(new Watermark(5999));
expectedOutput.addAll(doubleRecord(isTableAggregate, insertRecord("key2", 2L, 2L, localMills(3000L), localMills(6000L), localMills(5999L))));
expectedOutput.add(new Watermark(5999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
// those don't have any effect...
testHarness.processWatermark(new Watermark(6999));
testHarness.processWatermark(new Watermark(7999));
expectedOutput.add(new Watermark(6999));
expectedOutput.add(new Watermark(7999));
assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.close();
// we close once in the rest...
assertEquals("Close was not called.", 2, closeCalled.get());
}
Aggregations