use of org.apache.flink.api.common.time.Deadline in project flink by apache.
the class FlinkKinesisConsumerTest method testSourceSynchronization.
@Test
public void testSourceSynchronization() throws Exception {
final String streamName = "fakeStreamName";
final Time maxOutOfOrderness = Time.milliseconds(5);
final long autoWatermarkInterval = 1_000;
final long watermarkSyncInterval = autoWatermarkInterval + 1;
TestWatermarkTracker.WATERMARK.set(0);
HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
final KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new OpenCheckingStringSchema());
Properties props = new Properties();
props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
props.setProperty(ConsumerConfigConstants.WATERMARK_SYNC_MILLIS, Long.toString(watermarkSyncInterval));
props.setProperty(ConsumerConfigConstants.WATERMARK_LOOKAHEAD_MILLIS, Long.toString(5));
BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
streamToQueueMap.put(streamName, Collections.singletonList(shard1));
// override createFetcher to mock Kinesis
FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {
@Override
protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceFunction.SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), getWatermarkTracker(), new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {
@Override
protected void emitWatermark() {
// before the watermark timer callback is triggered
synchronized (sourceContext.getCheckpointLock()) {
super.emitWatermark();
}
}
};
return fetcher;
}
};
sourceFunc.setShardAssigner((streamShardHandle, i) -> {
// shardId-000000000000
return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
});
sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
sourceFunc.setWatermarkTracker(new TestWatermarkTracker());
// there is currently no test harness specifically for sources,
// so we overlay the source thread here
AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
testHarness.initializeEmptyState();
testHarness.open();
final ConcurrentLinkedQueue<Object> results = testHarness.getOutput();
final AtomicBoolean throwOnCollect = new AtomicBoolean();
@SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), results) {
@Override
public void markAsTemporarilyIdle() {
}
@Override
public void collect(Serializable element) {
if (throwOnCollect.get()) {
throw new RuntimeException("expected");
}
super.collect(element);
}
@Override
public void emitWatermark(Watermark mark) {
results.add(mark);
}
};
final AtomicReference<Exception> sourceThreadError = new AtomicReference<>();
new Thread(() -> {
try {
sourceFunc.run(sourceContext);
} catch (InterruptedException e) {
// expected on cancel
} catch (Exception e) {
sourceThreadError.set(e);
}
}).start();
ArrayList<Object> expectedResults = new ArrayList<>();
final long record1 = 1;
shard1.put(Long.toString(record1));
expectedResults.add(Long.toString(record1));
awaitRecordCount(results, expectedResults.size());
// at this point we know the fetcher was initialized
final KinesisDataFetcher fetcher = org.powermock.reflect.Whitebox.getInternalState(sourceFunc, "fetcher");
// trigger watermark emit
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
expectedResults.add(new Watermark(-4));
// verify watermark
awaitRecordCount(results, expectedResults.size());
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
assertEquals(0, TestWatermarkTracker.WATERMARK.get());
// trigger sync
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
TestWatermarkTracker.assertGlobalWatermark(-4);
final long record2 = record1 + (watermarkSyncInterval * 3) + 1;
shard1.put(Long.toString(record2));
// wait for the record to be buffered in the emitter
final RecordEmitter<?> emitter = org.powermock.reflect.Whitebox.getInternalState(fetcher, "recordEmitter");
RecordEmitter.RecordQueue emitterQueue = emitter.getQueue(0);
Deadline deadline = Deadline.fromNow(Duration.ofSeconds(10));
while (deadline.hasTimeLeft() && emitterQueue.getSize() < 1) {
Thread.sleep(10);
}
assertEquals("first record received", 1, emitterQueue.getSize());
// Advance the watermark. Since the new record is past global watermark + threshold,
// it won't be emitted and the watermark does not advance
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
assertEquals(3000L, (long) org.powermock.reflect.Whitebox.getInternalState(fetcher, "nextWatermark"));
TestWatermarkTracker.assertGlobalWatermark(-4);
// Trigger global watermark sync
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
expectedResults.add(Long.toString(record2));
awaitRecordCount(results, expectedResults.size());
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
TestWatermarkTracker.assertGlobalWatermark(3000);
// Trigger watermark update and emit
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
expectedResults.add(new Watermark(3000));
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
// verify exception propagation
Assert.assertNull(sourceThreadError.get());
throwOnCollect.set(true);
shard1.put(Long.toString(record2 + 1));
deadline = Deadline.fromNow(Duration.ofSeconds(10));
while (deadline.hasTimeLeft() && sourceThreadError.get() == null) {
Thread.sleep(10);
}
Assert.assertNotNull(sourceThreadError.get());
Assert.assertNotNull("expected", sourceThreadError.get().getMessage());
sourceFunc.cancel();
testHarness.close();
}
use of org.apache.flink.api.common.time.Deadline in project flink by apache.
the class SQLClientSchemaRegistryITCase method getAllVersions.
private List<Integer> getAllVersions(String behaviourSubject) throws Exception {
Deadline deadline = Deadline.fromNow(Duration.ofSeconds(120));
Exception ex = new IllegalStateException("Could not query schema registry. Negative deadline provided.");
while (deadline.hasTimeLeft()) {
try {
return registryClient.getAllVersions(behaviourSubject);
} catch (RestClientException e) {
ex = e;
}
}
throw ex;
}
use of org.apache.flink.api.common.time.Deadline in project flink by apache.
the class GlueSchemaRegistryJsonKinesisITCase method testGSRJsonGenericFormatWithFlink.
@Test
public void testGSRJsonGenericFormatWithFlink() throws Exception {
List<JsonDataWithSchema> messages = getGenericRecords();
for (JsonDataWithSchema msg : messages) {
kinesisClient.sendMessage(msg.getSchema(), INPUT_STREAM, msg);
}
log.info("generated records");
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStream<JsonDataWithSchema> input = env.addSource(createSource());
input.addSink(createSink());
env.executeAsync();
Deadline deadline = Deadline.fromNow(Duration.ofSeconds(60));
List<Object> results = kinesisClient.readAllMessages(OUTPUT_STREAM);
while (deadline.hasTimeLeft() && results.size() < messages.size()) {
log.info("waiting for results..");
Thread.sleep(1000);
results = kinesisClient.readAllMessages(OUTPUT_STREAM);
}
log.info("results: {}", results);
assertThat(results).containsExactlyInAnyOrderElementsOf(messages);
}
use of org.apache.flink.api.common.time.Deadline in project flink by apache.
the class ApplicationDispatcherBootstrapITCase method testDirtyJobResultRecoveryInApplicationMode.
@Test
public void testDirtyJobResultRecoveryInApplicationMode() throws Exception {
final Deadline deadline = Deadline.fromNow(TIMEOUT);
final Configuration configuration = new Configuration();
configuration.set(HighAvailabilityOptions.HA_MODE, HighAvailabilityMode.ZOOKEEPER.name());
configuration.set(DeploymentOptions.TARGET, EmbeddedExecutor.NAME);
configuration.set(ClientOptions.CLIENT_RETRY_PERIOD, Duration.ofMillis(100));
final TestingMiniClusterConfiguration clusterConfiguration = TestingMiniClusterConfiguration.newBuilder().setConfiguration(configuration).build();
// having a dirty entry in the JobResultStore should make the ApplicationDispatcherBootstrap
// implementation fail to submit the job
final JobResultStore jobResultStore = new EmbeddedJobResultStore();
jobResultStore.createDirtyResult(new JobResultEntry(TestingJobResultStore.createSuccessfulJobResult(ApplicationDispatcherBootstrap.ZERO_JOB_ID)));
final EmbeddedHaServicesWithLeadershipControl haServices = new EmbeddedHaServicesWithLeadershipControl(TestingUtils.defaultExecutor()) {
@Override
public JobResultStore getJobResultStore() {
return jobResultStore;
}
};
final TestingMiniCluster.Builder clusterBuilder = TestingMiniCluster.newBuilder(clusterConfiguration).setHighAvailabilityServicesSupplier(() -> haServices).setDispatcherResourceManagerComponentFactorySupplier(createApplicationModeDispatcherResourceManagerComponentFactorySupplier(clusterConfiguration.getConfiguration(), ErrorHandlingSubmissionJob.createPackagedProgram()));
try (final MiniCluster cluster = clusterBuilder.build()) {
// start mini cluster and submit the job
cluster.start();
// the cluster should shut down automatically once the application completes
awaitClusterStopped(cluster, deadline);
}
FlinkAssertions.assertThatChainOfCauses(ErrorHandlingSubmissionJob.getSubmissionException()).as("The job's main method shouldn't have been succeeded due to a DuplicateJobSubmissionException.").hasAtLeastOneElementOfType(DuplicateJobSubmissionException.class);
assertThat(jobResultStore.hasDirtyJobResultEntry(ApplicationDispatcherBootstrap.ZERO_JOB_ID)).isFalse();
assertThat(jobResultStore.hasCleanJobResultEntry(ApplicationDispatcherBootstrap.ZERO_JOB_ID)).isTrue();
}
use of org.apache.flink.api.common.time.Deadline in project flink by apache.
the class ApplicationDispatcherBootstrapITCase method testSubmitFailedJobOnApplicationError.
@Test
public void testSubmitFailedJobOnApplicationError() throws Exception {
final Deadline deadline = Deadline.fromNow(TIMEOUT);
final JobID jobId = new JobID();
final Configuration configuration = new Configuration();
configuration.set(HighAvailabilityOptions.HA_MODE, HighAvailabilityMode.ZOOKEEPER.name());
configuration.set(DeploymentOptions.TARGET, EmbeddedExecutor.NAME);
configuration.set(ClientOptions.CLIENT_RETRY_PERIOD, Duration.ofMillis(100));
configuration.set(DeploymentOptions.SHUTDOWN_ON_APPLICATION_FINISH, false);
configuration.set(DeploymentOptions.SUBMIT_FAILED_JOB_ON_APPLICATION_ERROR, true);
configuration.set(PipelineOptionsInternal.PIPELINE_FIXED_JOB_ID, jobId.toHexString());
final TestingMiniClusterConfiguration clusterConfiguration = TestingMiniClusterConfiguration.newBuilder().setConfiguration(configuration).build();
final EmbeddedHaServicesWithLeadershipControl haServices = new EmbeddedHaServicesWithLeadershipControl(TestingUtils.defaultExecutor());
final TestingMiniCluster.Builder clusterBuilder = TestingMiniCluster.newBuilder(clusterConfiguration).setHighAvailabilityServicesSupplier(() -> haServices).setDispatcherResourceManagerComponentFactorySupplier(createApplicationModeDispatcherResourceManagerComponentFactorySupplier(clusterConfiguration.getConfiguration(), FailingJob.getProgram()));
try (final MiniCluster cluster = clusterBuilder.build()) {
// start mini cluster and submit the job
cluster.start();
// wait until the failed job has been submitted
awaitJobStatus(cluster, jobId, JobStatus.FAILED, deadline);
final ArchivedExecutionGraph graph = cluster.getArchivedExecutionGraph(jobId).get();
assertThat(graph.getJobID()).isEqualTo(jobId);
assertThat(graph.getJobName()).isEqualTo(ApplicationDispatcherBootstrap.FAILED_JOB_NAME);
assertThat(graph.getFailureInfo()).isNotNull().extracting(ErrorInfo::getException).extracting(e -> e.deserializeError(Thread.currentThread().getContextClassLoader())).satisfies(e -> assertThat(e).isInstanceOf(ProgramInvocationException.class).hasRootCauseInstanceOf(RuntimeException.class).hasRootCauseMessage(FailingJob.EXCEPTION_MESSAGE));
}
}
Aggregations