use of org.apache.beam.runners.samza.SamzaPipelineOptions in project beam by apache.
the class ConfigGeneratorTest method testStatefulBeamStoreConfig.
@Test
public void testStatefulBeamStoreConfig() {
SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
options.setJobName("TestStoreConfig");
options.setRunner(SamzaRunner.class);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally());
pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides());
final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline);
final ConfigBuilder configBuilder = new ConfigBuilder(options);
SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
final Config config = configBuilder.build();
assertEquals(RocksDbKeyValueStorageEngineFactory.class.getName(), config.get("stores.beamStore.factory"));
assertEquals("byteArraySerde", config.get("stores.beamStore.key.serde"));
assertEquals("stateValueSerde", config.get("stores.beamStore.msg.serde"));
assertNull(config.get("stores.beamStore.changelog"));
options.setStateDurable(true);
SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
final Config config2 = configBuilder.build();
assertEquals("TestStoreConfig-1-beamStore-changelog", config2.get("stores.beamStore.changelog"));
}
use of org.apache.beam.runners.samza.SamzaPipelineOptions in project beam by apache.
the class PipelineJsonRendererTest method testCompositePipeline.
@Test
public void testCompositePipeline() throws IOException {
SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
options.setRunner(SamzaRunner.class);
Pipeline p = Pipeline.create(options);
p.apply(Create.timestamped(TimestampedValue.of(KV.of(1, 1), new Instant(1)))).apply(Window.into(FixedWindows.of(Duration.millis(10)))).apply(Sum.integersPerKey());
String jsonDagFileName = "src/test/resources/ExpectedDag.json";
String jsonDag = new String(Files.readAllBytes(Paths.get(jsonDagFileName)), StandardCharsets.UTF_8);
assertEquals(JsonParser.parseString(jsonDag), JsonParser.parseString(PipelineJsonRenderer.toJsonString(p).replaceAll(System.lineSeparator(), "")));
}
use of org.apache.beam.runners.samza.SamzaPipelineOptions in project beam by apache.
the class ParDoBoundMultiTranslator method createConfig.
@Override
public Map<String, String> createConfig(ParDo.MultiOutput<InT, OutT> transform, TransformHierarchy.Node node, ConfigContext ctx) {
final Map<String, String> config = new HashMap<>();
final DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass());
final SamzaPipelineOptions options = ctx.getPipelineOptions();
// in this case, we will use RocksDB as system store.
if (signature.processElement().observesWindow()) {
config.putAll(ConfigBuilder.createRocksDBStoreConfig(options));
}
if (signature.usesState()) {
// set up user state configs
for (DoFnSignature.StateDeclaration state : signature.stateDeclarations().values()) {
final String storeId = state.id();
// TODO: remove validation after we support same state id in different ParDo.
if (!ctx.addStateId(storeId)) {
throw new IllegalStateException("Duplicate StateId " + storeId + " found in multiple ParDo.");
}
config.put("stores." + storeId + ".factory", "org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory");
config.put("stores." + storeId + ".key.serde", "byteArraySerde");
config.put("stores." + storeId + ".msg.serde", "stateValueSerde");
config.put("stores." + storeId + ".rocksdb.compression", "lz4");
if (options.getStateDurable()) {
config.put("stores." + storeId + ".changelog", ConfigBuilder.getChangelogTopic(options, storeId));
}
}
}
if (doFnInvokerRegistrar != null) {
config.putAll(doFnInvokerRegistrar.configFor(transform.getFn()));
}
return config;
}
use of org.apache.beam.runners.samza.SamzaPipelineOptions in project beam by apache.
the class SamzaTimerInternalsFactoryTest method testNewTimersAreInsertedInOrder.
@Test
public void testNewTimersAreInsertedInOrder() {
final SamzaPipelineOptions pipelineOptions = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
pipelineOptions.setEventTimerBufferSize(5);
final KeyValueStore<ByteArray, StateValue<?>> store = createStore();
final SamzaTimerInternalsFactory<String> timerInternalsFactory = createTimerInternalsFactory(null, "timer", pipelineOptions, store);
final StateNamespace nameSpace = StateNamespaces.global();
final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey");
// timers in store now are timestamped from 0 - 9.
for (int i = 0; i < 10; i++) {
timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
}
// fire the first 2 timers.
// timers in memory now are timestamped from 2 - 4;
// timers in store now are timestamped from 2 - 9.
Collection<KeyedTimerData<String>> readyTimers;
timerInternalsFactory.setInputWatermark(new Instant(1));
long lastTimestamp = 0;
readyTimers = timerInternalsFactory.removeReadyTimers();
for (KeyedTimerData<String> keyedTimerData : readyTimers) {
final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
assertTrue(lastTimestamp <= currentTimeStamp);
lastTimestamp = currentTimeStamp;
}
assertEquals(2, readyTimers.size());
// prefixed with timer, timestamp is in order;
for (int i = 0; i < 3; i++) {
timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
}
// there are 11 timers in state now.
// watermark 5 comes, so 6 timers will be evicted because their timestamp is less than 5.
// memory will be reloaded once to have 5 to 8 left (reload to have 4 to 8, but 4 is evicted), 5
// to 9 left in store.
// all of them are in order for firing.
timerInternalsFactory.setInputWatermark(new Instant(5));
lastTimestamp = 0;
readyTimers = timerInternalsFactory.removeReadyTimers();
for (KeyedTimerData<String> keyedTimerData : readyTimers) {
final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
assertTrue(lastTimestamp <= currentTimeStamp);
lastTimestamp = currentTimeStamp;
}
assertEquals(6, readyTimers.size());
assertEquals(4, timerInternalsFactory.getEventTimeBuffer().size());
// watermark 10 comes, so all timers will be evicted in order.
timerInternalsFactory.setInputWatermark(new Instant(10));
readyTimers = timerInternalsFactory.removeReadyTimers();
for (KeyedTimerData<String> keyedTimerData : readyTimers) {
final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
assertTrue(lastTimestamp <= currentTimeStamp);
lastTimestamp = currentTimeStamp;
}
assertEquals(4, readyTimers.size());
assertEquals(0, timerInternalsFactory.getEventTimeBuffer().size());
store.close();
}
use of org.apache.beam.runners.samza.SamzaPipelineOptions in project beam by apache.
the class SamzaTimerInternalsFactoryTest method testAllTimersAreFiredWithReload.
@Test
public void testAllTimersAreFiredWithReload() {
final SamzaPipelineOptions pipelineOptions = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
pipelineOptions.setEventTimerBufferSize(2);
final KeyValueStore<ByteArray, StateValue<?>> store = createStore();
final SamzaTimerInternalsFactory<String> timerInternalsFactory = createTimerInternalsFactory(null, "timer", pipelineOptions, store);
final StateNamespace nameSpace = StateNamespaces.global();
final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey");
// timers in store now are timestamped from 0 - 2.
for (int i = 0; i < 3; i++) {
timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
}
// total number of event time timers to fire equals to the number of timers in store
Collection<KeyedTimerData<String>> readyTimers;
timerInternalsFactory.setInputWatermark(new Instant(3));
readyTimers = timerInternalsFactory.removeReadyTimers();
// buffer should reload from store and all timers are supposed to be fired.
assertEquals(3, readyTimers.size());
store.close();
}
Aggregations