use of org.apache.flink.contrib.streaming.state.RocksDBStateBackend in project flink by apache.
the class EventTimeWindowCheckpointingITCase method setupRocksDB.
private void setupRocksDB(Configuration config, int fileSizeThreshold, boolean incrementalCheckpoints) throws IOException {
// Configure the managed memory size as 64MB per slot for rocksDB state backend.
config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.ofMebiBytes(PARALLELISM / NUM_OF_TASK_MANAGERS * 64));
final String rocksDb = tempFolder.newFolder().getAbsolutePath();
final File backups = tempFolder.newFolder().getAbsoluteFile();
// we use the fs backend with small threshold here to test the behaviour with file
// references, not self contained byte handles
RocksDBStateBackend rdb = new RocksDBStateBackend(new FsStateBackend(Path.fromLocalFile(backups).toUri(), fileSizeThreshold), incrementalCheckpoints);
rdb.setDbStoragePath(rocksDb);
this.stateBackend = rdb;
}
use of org.apache.flink.contrib.streaming.state.RocksDBStateBackend in project flink by apache.
the class LegacyStatefulJobSavepointMigrationITCase method testSavepointRestore.
@Test
public void testSavepointRestore() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRestartStrategy(RestartStrategies.noRestart());
switch(testStateBackend) {
case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
break;
case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
env.setStateBackend(new MemoryStateBackend());
break;
default:
throw new UnsupportedOperationException();
}
env.enableChangelogStateBackend(false);
env.enableCheckpointing(500);
env.setParallelism(4);
env.setMaxParallelism(4);
env.addSource(new CheckingRestoringSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource").flatMap(new CheckingRestoringFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap").keyBy(0).flatMap(new CheckingRestoringFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState").keyBy(0).flatMap(new CheckingKeyedStateFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap").keyBy(0).transform("custom_operator", new TypeHint<Tuple2<Long, Long>>() {
}.getTypeInfo(), new CheckingRestoringUdfOperator(new CheckingRestoringFlatMapWithKeyedStateInOperator())).uid("LegacyCheckpointedOperator").keyBy(0).transform("timely_stateful_operator", new TypeHint<Tuple2<Long, Long>>() {
}.getTypeInfo(), new CheckingTimelyStatefulOperator()).uid("TimelyStatefulOperator").addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>());
restoreAndExecute(env, getResourceFilename(getSavepointPath(testMigrateVersion, testStateBackend)), new Tuple2<>(CheckingRestoringSource.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, 1), new Tuple2<>(CheckingRestoringFlatMap.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS), new Tuple2<>(CheckingRestoringFlatMapWithKeyedState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS), new Tuple2<>(CheckingKeyedStateFlatMap.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS), new Tuple2<>(CheckingRestoringUdfOperator.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS), new Tuple2<>(CheckingRestoringFlatMapWithKeyedStateInOperator.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS), new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_PROCESS_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS), new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_EVENT_TIME_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS), new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_PROCESSING_TIME_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS), new Tuple2<>(AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
}
use of org.apache.flink.contrib.streaming.state.RocksDBStateBackend in project flink by apache.
the class KeyedStateCheckpointingITCase method testWithRocksDbBackendIncremental.
@Test
public void testWithRocksDbBackendIncremental() throws Exception {
RocksDBStateBackend incRocksDbBackend = new RocksDBStateBackend(new MemoryStateBackend(MAX_MEM_STATE_SIZE), true);
incRocksDbBackend.setDbStoragePath(tmpFolder.newFolder().getAbsolutePath());
testProgramWithBackend(incRocksDbBackend);
}
use of org.apache.flink.contrib.streaming.state.RocksDBStateBackend in project mangolaa-streams-processing-flink by Adsizzlerlabs.
the class StreamingJob method main.
public static void main(String[] args) throws Exception {
val flinkEnv = StreamExecutionEnvironment.getExecutionEnvironment();
// Using RocksDB backend.
flinkEnv.setStateBackend(new RocksDBStateBackend(ROCKS_DB_CHECKPOINT_URI, ENABLE_INCREMENTAL_CHECKPOINT));
// Deserializers
val bidReqGzipJsonDeserializer = new BidReqGzipJsonDeserializer();
val bidRespGzipJsonDeserializer = new BidRespGzipJsonDeserializer();
val winNotificationGzipJsonDeserializer = new WinNotificationGzipJsonDeserializer();
val impressionGzipJsonDeserializer = new ImpressionGzipJsonDeserializer();
val clickGzipJsonDeserializer = new ClickGzipJsonDeserializer();
val conversionGzipJsonDeserializer = new ConversionGzipJsonDeserializer();
val postbackGzipJsonDeserializer = new PostbackGzipJsonDeserializer();
// Kafka v 0.11 is the source of the stream
val bidReqKafkaConsumer = new FlinkKafkaConsumer011<BidReq>(KafkaTopics.BID_REQ, bidReqGzipJsonDeserializer, kafkaProperties());
val bidRespKafkaConsumer = new FlinkKafkaConsumer011<BidResp>(KafkaTopics.BID_RESPONSE, bidRespGzipJsonDeserializer, kafkaProperties());
val winNotificationKafkaConsumer = new FlinkKafkaConsumer011<WinNotification>(KafkaTopics.WINS, winNotificationGzipJsonDeserializer, kafkaProperties());
val impressionsKafkaConsumer = new FlinkKafkaConsumer011<Impression>(KafkaTopics.IMPRESSIONS, impressionGzipJsonDeserializer, kafkaProperties());
val clicksKafkaConsumer = new FlinkKafkaConsumer011<Click>(KafkaTopics.CLICKS, clickGzipJsonDeserializer, kafkaProperties());
val conversionsKafkaConsumer = new FlinkKafkaConsumer011<Conversion>(KafkaTopics.CONVERSIONS, conversionGzipJsonDeserializer, kafkaProperties());
val postbacksKafkaConsumer = new FlinkKafkaConsumer011<Postback>(KafkaTopics.POSTBACKS, postbackGzipJsonDeserializer, kafkaProperties());
// Streams
val bidReqStream = flinkEnv.addSource(bidReqKafkaConsumer);
val bidRespStream = flinkEnv.addSource(bidRespKafkaConsumer);
val winNotificationStream = flinkEnv.addSource(winNotificationKafkaConsumer);
val impressionStream = flinkEnv.addSource(impressionsKafkaConsumer);
val clickStream = flinkEnv.addSource(clicksKafkaConsumer);
val conversionStream = flinkEnv.addSource(conversionsKafkaConsumer);
val postbackStream = flinkEnv.addSource(postbacksKafkaConsumer);
// Windowed Stream
val bidReqWindowedStream = bidReqStream.keyBy(new AggregatedBidReqKey()).timeWindow(Time.minutes(1));
val bidRespWindowedStream = bidRespStream.keyBy(new AggregatedBidRespKey()).timeWindow(Time.minutes(1));
val winNotificationWindowedStream = winNotificationStream.keyBy(new AggregatedWinNotificationKey()).timeWindow(Time.minutes(1));
val impressionWindowedStream = impressionStream.keyBy(new AggregatedImpressionKey()).timeWindow(Time.minutes(1));
val clickWindowedStream = clickStream.keyBy(new AggregatedClickKey()).timeWindow(Time.minutes(1));
val conversionWindowedStream = conversionStream.keyBy(new AggregatedConversionKey()).timeWindow(Time.minutes(1));
val postbackWindowedStream = postbackStream.keyBy(new AggregatedPostbackKey()).timeWindow(Time.minutes(1));
// Aggregated Streams
val aggregatedBidReqStream = bidReqWindowedStream.apply(new BidReqWindowCountFunction()).name("Count Bid Requests in a Windowed Stream");
val aggregatedBidRespStream = bidRespWindowedStream.apply(new BidRespWindowCountFunction()).name("Count Bid Responses in a Windowed Stream");
val aggregatedWinStream = winNotificationWindowedStream.apply(new WinNotificationCountFunction()).name("Counting WinNotifications in a Windowed Stream");
val aggregatedImpressionStream = impressionWindowedStream.apply(new ImpressionCountFunction()).name("Counting Impression in a Windowed Stream");
val aggregatedClickStream = clickWindowedStream.apply(new ClickCountFunction()).name("Counting Clicks in a Windowed Stream");
val aggregatedConversionStream = conversionWindowedStream.apply(new ConversionCountFunction()).name("Counting Conversions in a Windowed Stream");
val aggregatedPostbackStream = postbackWindowedStream.apply(new PostbackWindowCountFunction()).name("Counting Postback in a Windowed Stream");
// Serializers for Aggregated objects
val aggregatedBidReqJsonSerializer = new JsonSerializer<AggregatedBidReq>();
val aggregatedBidRespJsonSerializer = new JsonSerializer<AggregatedBidResp>();
val aggregatedWinNotificationJsonSerializer = new JsonSerializer<AggregatedWin>();
val aggregatedImpressionJsonSerializer = new JsonSerializer<AggregatedImpression>();
val aggregatedClickJsonSerializer = new JsonSerializer<AggregatedClick>();
val aggregatedConversionJsonSerializer = new JsonSerializer<AggregatedConversion>();
val aggregatedPostbackJsonSerializer = new JsonSerializer<AggregatedPostback>();
// Sinks for Aggregated objects
val aggregatedBidReqKafkaSink = new FlinkKafkaProducer011<AggregatedBidReq>(KafkaTopics.AGGREGATED_BID_REQ, aggregatedBidReqJsonSerializer, kafkaProperties());
val aggregatedBidRespKafkaSink = new FlinkKafkaProducer011<AggregatedBidResp>(KafkaTopics.AGGREGATED_BID_RESP, aggregatedBidRespJsonSerializer, kafkaProperties());
val aggregatedWinKafkaSink = new FlinkKafkaProducer011<AggregatedWin>(KafkaTopics.AGGREGATED_WINS, aggregatedWinNotificationJsonSerializer, kafkaProperties());
val aggregatedImpressionKafkaSink = new FlinkKafkaProducer011<AggregatedImpression>(KafkaTopics.AGGREGATED_IMPRESSIONS, aggregatedImpressionJsonSerializer, kafkaProperties());
val aggregatedClickKafkaSink = new FlinkKafkaProducer011<AggregatedClick>(KafkaTopics.AGGREGATED_CLICKS, aggregatedClickJsonSerializer, kafkaProperties());
val aggregatedConversionKafkaSink = new FlinkKafkaProducer011<AggregatedConversion>(KafkaTopics.AGGREGATED_CONVERSIONS, aggregatedConversionJsonSerializer, kafkaProperties());
val aggregatedPostbackKafkaSink = new FlinkKafkaProducer011<AggregatedPostback>(KafkaTopics.AGGREGATED_POSTBACKS, aggregatedPostbackJsonSerializer, kafkaProperties());
// Attach sink to aggregated streams
aggregatedBidReqStream.addSink(aggregatedBidReqKafkaSink);
aggregatedBidRespStream.addSink(aggregatedBidRespKafkaSink);
aggregatedWinStream.addSink(aggregatedWinKafkaSink);
aggregatedImpressionStream.addSink(aggregatedImpressionKafkaSink);
aggregatedClickStream.addSink(aggregatedClickKafkaSink);
aggregatedConversionStream.addSink(aggregatedConversionKafkaSink);
aggregatedPostbackStream.addSink(aggregatedPostbackKafkaSink);
// execute program
flinkEnv.execute("Count events in a time window for the Mangolaa platform");
}
use of org.apache.flink.contrib.streaming.state.RocksDBStateBackend in project flink by apache.
the class StatefulUDFSavepointMigrationITCase method testCreateSavepointOnFlink11WithRocksDB.
/**
* This has to be manually executed to create the savepoint on Flink 1.1.
*/
@Test
@Ignore
public void testCreateSavepointOnFlink11WithRocksDB() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
RocksDBStateBackend rocksBackend = new RocksDBStateBackend(new MemoryStateBackend());
// rocksBackend.enableFullyAsyncSnapshots();
env.setStateBackend(rocksBackend);
env.enableCheckpointing(500);
env.setParallelism(4);
env.setMaxParallelism(4);
// create source
env.addSource(new LegacyCheckpointedSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource").flatMap(new LegacyCheckpointedFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap").keyBy(0).flatMap(new LegacyCheckpointedFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState").keyBy(0).flatMap(new KeyedStateSettingFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap").keyBy(0).transform("custom_operator", new TypeHint<Tuple2<Long, Long>>() {
}.getTypeInfo(), new CheckpointedUdfOperator(new LegacyCheckpointedFlatMapWithKeyedState())).uid("LegacyCheckpointedOperator").addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>(EXPECTED_ELEMENTS_ACCUMULATOR));
executeAndSavepoint(env, "src/test/resources/stateful-udf-migration-itcase-flink1.1-savepoint-rocksdb", new Tuple2<>(EXPECTED_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
}
Aggregations