use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class ContinuousFileProcessingTest method testFunctionRestore.
@Test
public void testFunctionRestore() throws Exception {
String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
org.apache.hadoop.fs.Path path = null;
long fileModTime = Long.MIN_VALUE;
for (int i = 0; i < 1; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
path = file.f0;
fileModTime = hdfs.getFileStatus(file.f0).getModificationTime();
}
TextInputFormat format = new TextInputFormat(new Path(testBasePath));
final ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY);
StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction);
final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
testHarness.open();
final Throwable[] error = new Throwable[1];
final OneShotLatch latch = new OneShotLatch();
final DummySourceContext sourceContext = new DummySourceContext() {
@Override
public void collect(TimestampedFileInputSplit element) {
latch.trigger();
}
};
// run the source asynchronously
Thread runner = new Thread() {
@Override
public void run() {
try {
monitoringFunction.run(sourceContext);
} catch (Throwable t) {
t.printStackTrace();
error[0] = t;
}
}
};
runner.start();
// first condition for the source to have updated its state: emit at least one element
if (!latch.isTriggered()) {
latch.await();
}
// this means it has processed all the splits and updated its state.
synchronized (sourceContext.getCheckpointLock()) {
}
OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
monitoringFunction.cancel();
runner.join();
testHarness.close();
final ContinuousFileMonitoringFunction<String> monitoringFunctionCopy = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY);
StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> srcCopy = new StreamSource<>(monitoringFunctionCopy);
AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
testHarnessCopy.initializeState(snapshot);
testHarnessCopy.open();
Assert.assertNull(error[0]);
Assert.assertEquals(fileModTime, monitoringFunctionCopy.getGlobalModificationTime());
hdfs.delete(path, false);
}
use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class ContinuousFileProcessingMigrationTest method writeMonitoringSourceSnapshot.
/**
* Manually run this to write binary snapshot data. Remove @Ignore to run.
*/
@Ignore
@Test
public void writeMonitoringSourceSnapshot() throws Exception {
File testFolder = tempFolder.newFolder();
long fileModTime = Long.MIN_VALUE;
for (int i = 0; i < 1; i++) {
Tuple2<File, String> file = createFileAndFillWithData(testFolder, "file", i, "This is test line.");
fileModTime = file.f0.lastModified();
}
TextInputFormat format = new TextInputFormat(new Path(testFolder.getAbsolutePath()));
final ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction);
final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
testHarness.open();
final Throwable[] error = new Throwable[1];
final OneShotLatch latch = new OneShotLatch();
// run the source asynchronously
Thread runner = new Thread() {
@Override
public void run() {
try {
monitoringFunction.run(new DummySourceContext() {
@Override
public void collect(TimestampedFileInputSplit element) {
latch.trigger();
}
@Override
public void markAsTemporarilyIdle() {
}
});
} catch (Throwable t) {
t.printStackTrace();
error[0] = t;
}
}
};
runner.start();
if (!latch.isTriggered()) {
latch.await();
}
final OperatorSubtaskState snapshot;
synchronized (testHarness.getCheckpointLock()) {
snapshot = testHarness.snapshot(0L, 0L);
}
OperatorSnapshotUtil.writeStateHandle(snapshot, "src/test/resources/monitoring-function-migration-test-" + fileModTime + "-flink" + flinkGenerateSavepointVersion + "-snapshot");
monitoringFunction.cancel();
runner.join();
testHarness.close();
}
use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class ArrowSourceFunctionTestBase method testRestore.
@Test
public void testRestore() throws Exception {
Tuple2<List<RowData>, Integer> testData = getTestData();
final ArrowSourceFunction arrowSourceFunction = createTestArrowSourceFunction(testData.f0, testData.f1);
final AbstractStreamOperatorTestHarness<RowData> testHarness = new AbstractStreamOperatorTestHarness<>(new StreamSource<>(arrowSourceFunction), 1, 1, 0);
testHarness.open();
final Throwable[] error = new Throwable[1];
final MultiShotLatch latch = new MultiShotLatch();
final AtomicInteger numOfEmittedElements = new AtomicInteger(0);
final List<RowData> results = new ArrayList<>();
final DummySourceContext<RowData> sourceContext = new DummySourceContext<RowData>() {
@Override
public void collect(RowData element) {
if (numOfEmittedElements.get() == 2) {
latch.trigger();
// fail the source function at the second element
throw new RuntimeException("Fail the arrow source");
}
results.add(typeSerializer.copy(element));
numOfEmittedElements.incrementAndGet();
}
};
// run the source asynchronously
Thread runner = new Thread(() -> {
try {
arrowSourceFunction.run(sourceContext);
} catch (Throwable t) {
if (!t.getMessage().equals("Fail the arrow source")) {
error[0] = t;
}
}
});
runner.start();
if (!latch.isTriggered()) {
latch.await();
}
OperatorSubtaskState snapshot;
synchronized (sourceContext.getCheckpointLock()) {
snapshot = testHarness.snapshot(0, 0);
}
runner.join();
testHarness.close();
final ArrowSourceFunction arrowSourceFunction2 = createTestArrowSourceFunction(testData.f0, testData.f1);
AbstractStreamOperatorTestHarness testHarnessCopy = new AbstractStreamOperatorTestHarness(new StreamSource<>(arrowSourceFunction2), 1, 1, 0);
testHarnessCopy.initializeState(snapshot);
testHarnessCopy.open();
// run the source asynchronously
Thread runner2 = new Thread(() -> {
try {
arrowSourceFunction2.run(new DummySourceContext<RowData>() {
@Override
public void collect(RowData element) {
results.add(typeSerializer.copy(element));
if (numOfEmittedElements.incrementAndGet() == testData.f0.size()) {
latch.trigger();
}
}
});
} catch (Throwable t) {
error[0] = t;
}
});
runner2.start();
if (!latch.isTriggered()) {
latch.await();
}
runner2.join();
Assert.assertNull(error[0]);
Assert.assertEquals(testData.f0.size(), numOfEmittedElements.get());
checkElementsEquals(results, testData.f0);
}
use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class CEPOperatorTest method testCEPOperatorComparatorProcessTime.
@Test
public void testCEPOperatorComparatorProcessTime() throws Exception {
Event startEvent1 = new Event(42, "start", 1.0);
Event startEvent2 = new Event(42, "start", 2.0);
SubEvent middleEvent1 = new SubEvent(42, "foo1", 3.0, 10.0);
SubEvent middleEvent2 = new SubEvent(42, "foo2", 4.0, 10.0);
Event endEvent1 = new Event(42, "end", 1.0);
Event startEventK2 = new Event(43, "start", 1.0);
CepOperator<Event, Integer, Map<String, List<Event>>> operator = getKeyedCepOperatorWithComparator(true);
OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness = CepOperatorTestUtilities.getCepTestHarness(operator);
try {
harness.open();
harness.setProcessingTime(0L);
harness.processElement(new StreamRecord<>(startEvent1, 0L));
harness.processElement(new StreamRecord<>(startEventK2, 0L));
harness.processElement(new StreamRecord<>(new Event(42, "foobar", 1.0), 0L));
harness.processElement(new StreamRecord<>(new SubEvent(42, "barfoo", 1.0, 5.0), 0L));
assertTrue(!operator.hasNonEmptySharedBuffer(42));
assertTrue(!operator.hasNonEmptySharedBuffer(43));
harness.setProcessingTime(3L);
assertTrue(operator.hasNonEmptySharedBuffer(42));
assertTrue(operator.hasNonEmptySharedBuffer(43));
harness.processElement(new StreamRecord<>(middleEvent2, 3L));
harness.processElement(new StreamRecord<>(middleEvent1, 3L));
harness.processElement(new StreamRecord<>(startEvent2, 3L));
OperatorSubtaskState snapshot = harness.snapshot(0L, 0L);
harness.close();
CepOperator<Event, Integer, Map<String, List<Event>>> operator2 = getKeyedCepOperatorWithComparator(true);
harness = CepOperatorTestUtilities.getCepTestHarness(operator2);
harness.setup();
harness.initializeState(snapshot);
harness.open();
harness.setProcessingTime(4L);
harness.processElement(new StreamRecord<>(endEvent1, 5L));
harness.setProcessingTime(5L);
verifyPattern(harness.getOutput().poll(), startEvent1, middleEvent1, endEvent1);
verifyPattern(harness.getOutput().poll(), startEvent1, middleEvent2, endEvent1);
verifyPattern(harness.getOutput().poll(), startEvent2, middleEvent1, endEvent1);
verifyPattern(harness.getOutput().poll(), startEvent2, middleEvent2, endEvent1);
} finally {
harness.close();
}
}
use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.
the class CEPOperatorTest method testKeyedCEPOperatorCheckpointing.
@Test
public void testKeyedCEPOperatorCheckpointing() throws Exception {
OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness = getCepTestHarness(false);
try {
harness.open();
Event startEvent = new Event(42, "start", 1.0);
SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
Event endEvent = new Event(42, "end", 1.0);
harness.processElement(new StreamRecord<>(startEvent, 1L));
harness.processElement(new StreamRecord<>(new Event(42, "foobar", 1.0), 2L));
// simulate snapshot/restore with some elements in internal sorting queue
OperatorSubtaskState snapshot = harness.snapshot(0L, 0L);
harness.close();
harness = getCepTestHarness(false);
harness.setup();
harness.initializeState(snapshot);
harness.open();
harness.processWatermark(new Watermark(Long.MIN_VALUE));
harness.processElement(new StreamRecord<Event>(new SubEvent(42, "barfoo", 1.0, 5.0), 3L));
// if element timestamps are not correctly checkpointed/restored this will lead to
// a pruning time underflow exception in NFA
harness.processWatermark(new Watermark(2L));
harness.processElement(new StreamRecord<Event>(middleEvent, 3L));
harness.processElement(new StreamRecord<>(new Event(42, "start", 1.0), 4L));
harness.processElement(new StreamRecord<>(endEvent, 5L));
// simulate snapshot/restore with empty element queue but NFA state
OperatorSubtaskState snapshot2 = harness.snapshot(1L, 1L);
harness.close();
harness = getCepTestHarness(false);
harness.setup();
harness.initializeState(snapshot2);
harness.open();
harness.processWatermark(new Watermark(Long.MAX_VALUE));
// get and verify the output
Queue<Object> result = harness.getOutput();
assertEquals(2, result.size());
verifyPattern(result.poll(), startEvent, middleEvent, endEvent);
verifyWatermark(result.poll(), Long.MAX_VALUE);
} finally {
harness.close();
}
}
Aggregations