Search in sources :

Example 11 with SamzaPipelineOptions

use of org.apache.beam.runners.samza.SamzaPipelineOptions in project beam by apache.

the class ConfigGeneratorTest method testStatefulBeamStoreConfig.

@Test
public void testStatefulBeamStoreConfig() {
    SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
    options.setJobName("TestStoreConfig");
    options.setRunner(SamzaRunner.class);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally());
    pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides());
    final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline);
    final ConfigBuilder configBuilder = new ConfigBuilder(options);
    SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
    final Config config = configBuilder.build();
    assertEquals(RocksDbKeyValueStorageEngineFactory.class.getName(), config.get("stores.beamStore.factory"));
    assertEquals("byteArraySerde", config.get("stores.beamStore.key.serde"));
    assertEquals("stateValueSerde", config.get("stores.beamStore.msg.serde"));
    assertNull(config.get("stores.beamStore.changelog"));
    options.setStateDurable(true);
    SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
    final Config config2 = configBuilder.build();
    assertEquals("TestStoreConfig-1-beamStore-changelog", config2.get("stores.beamStore.changelog"));
}
Also used : ZkConfig(org.apache.samza.config.ZkConfig) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) RocksDbKeyValueStorageEngineFactory(org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory) PValue(org.apache.beam.sdk.values.PValue) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 12 with SamzaPipelineOptions

use of org.apache.beam.runners.samza.SamzaPipelineOptions in project beam by apache.

the class PipelineJsonRendererTest method testCompositePipeline.

@Test
public void testCompositePipeline() throws IOException {
    SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
    options.setRunner(SamzaRunner.class);
    Pipeline p = Pipeline.create(options);
    p.apply(Create.timestamped(TimestampedValue.of(KV.of(1, 1), new Instant(1)))).apply(Window.into(FixedWindows.of(Duration.millis(10)))).apply(Sum.integersPerKey());
    String jsonDagFileName = "src/test/resources/ExpectedDag.json";
    String jsonDag = new String(Files.readAllBytes(Paths.get(jsonDagFileName)), StandardCharsets.UTF_8);
    assertEquals(JsonParser.parseString(jsonDag), JsonParser.parseString(PipelineJsonRenderer.toJsonString(p).replaceAll(System.lineSeparator(), "")));
}
Also used : Instant(org.joda.time.Instant) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 13 with SamzaPipelineOptions

use of org.apache.beam.runners.samza.SamzaPipelineOptions in project beam by apache.

the class ParDoBoundMultiTranslator method createConfig.

@Override
public Map<String, String> createConfig(ParDo.MultiOutput<InT, OutT> transform, TransformHierarchy.Node node, ConfigContext ctx) {
    final Map<String, String> config = new HashMap<>();
    final DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass());
    final SamzaPipelineOptions options = ctx.getPipelineOptions();
    // in this case, we will use RocksDB as system store.
    if (signature.processElement().observesWindow()) {
        config.putAll(ConfigBuilder.createRocksDBStoreConfig(options));
    }
    if (signature.usesState()) {
        // set up user state configs
        for (DoFnSignature.StateDeclaration state : signature.stateDeclarations().values()) {
            final String storeId = state.id();
            // TODO: remove validation after we support same state id in different ParDo.
            if (!ctx.addStateId(storeId)) {
                throw new IllegalStateException("Duplicate StateId " + storeId + " found in multiple ParDo.");
            }
            config.put("stores." + storeId + ".factory", "org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory");
            config.put("stores." + storeId + ".key.serde", "byteArraySerde");
            config.put("stores." + storeId + ".msg.serde", "stateValueSerde");
            config.put("stores." + storeId + ".rocksdb.compression", "lz4");
            if (options.getStateDurable()) {
                config.put("stores." + storeId + ".changelog", ConfigBuilder.getChangelogTopic(options, storeId));
            }
        }
    }
    if (doFnInvokerRegistrar != null) {
        config.putAll(doFnInvokerRegistrar.configFor(transform.getFn()));
    }
    return config;
}
Also used : HashMap(java.util.HashMap) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions)

Example 14 with SamzaPipelineOptions

use of org.apache.beam.runners.samza.SamzaPipelineOptions in project beam by apache.

the class SamzaTimerInternalsFactoryTest method testNewTimersAreInsertedInOrder.

@Test
public void testNewTimersAreInsertedInOrder() {
    final SamzaPipelineOptions pipelineOptions = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
    pipelineOptions.setEventTimerBufferSize(5);
    final KeyValueStore<ByteArray, StateValue<?>> store = createStore();
    final SamzaTimerInternalsFactory<String> timerInternalsFactory = createTimerInternalsFactory(null, "timer", pipelineOptions, store);
    final StateNamespace nameSpace = StateNamespaces.global();
    final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey");
    // timers in store now are timestamped from 0 - 9.
    for (int i = 0; i < 10; i++) {
        timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
    }
    // fire the first 2 timers.
    // timers in memory now are timestamped from 2 - 4;
    // timers in store now are timestamped from 2 - 9.
    Collection<KeyedTimerData<String>> readyTimers;
    timerInternalsFactory.setInputWatermark(new Instant(1));
    long lastTimestamp = 0;
    readyTimers = timerInternalsFactory.removeReadyTimers();
    for (KeyedTimerData<String> keyedTimerData : readyTimers) {
        final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
        assertTrue(lastTimestamp <= currentTimeStamp);
        lastTimestamp = currentTimeStamp;
    }
    assertEquals(2, readyTimers.size());
    // prefixed with timer, timestamp is in order;
    for (int i = 0; i < 3; i++) {
        timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
    }
    // there are 11 timers in state now.
    // watermark 5 comes, so 6 timers will be evicted because their timestamp is less than 5.
    // memory will be reloaded once to have 5 to 8 left (reload to have 4 to 8, but 4 is evicted), 5
    // to 9 left in store.
    // all of them are in order for firing.
    timerInternalsFactory.setInputWatermark(new Instant(5));
    lastTimestamp = 0;
    readyTimers = timerInternalsFactory.removeReadyTimers();
    for (KeyedTimerData<String> keyedTimerData : readyTimers) {
        final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
        assertTrue(lastTimestamp <= currentTimeStamp);
        lastTimestamp = currentTimeStamp;
    }
    assertEquals(6, readyTimers.size());
    assertEquals(4, timerInternalsFactory.getEventTimeBuffer().size());
    // watermark 10 comes, so all timers will be evicted in order.
    timerInternalsFactory.setInputWatermark(new Instant(10));
    readyTimers = timerInternalsFactory.removeReadyTimers();
    for (KeyedTimerData<String> keyedTimerData : readyTimers) {
        final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
        assertTrue(lastTimestamp <= currentTimeStamp);
        lastTimestamp = currentTimeStamp;
    }
    assertEquals(4, readyTimers.size());
    assertEquals(0, timerInternalsFactory.getEventTimeBuffer().size());
    store.close();
}
Also used : Instant(org.joda.time.Instant) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) StateNamespace(org.apache.beam.runners.core.StateNamespace) TimerInternals(org.apache.beam.runners.core.TimerInternals) ByteArray(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray) StateValue(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) Test(org.junit.Test)

Example 15 with SamzaPipelineOptions

use of org.apache.beam.runners.samza.SamzaPipelineOptions in project beam by apache.

the class SamzaTimerInternalsFactoryTest method testAllTimersAreFiredWithReload.

@Test
public void testAllTimersAreFiredWithReload() {
    final SamzaPipelineOptions pipelineOptions = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
    pipelineOptions.setEventTimerBufferSize(2);
    final KeyValueStore<ByteArray, StateValue<?>> store = createStore();
    final SamzaTimerInternalsFactory<String> timerInternalsFactory = createTimerInternalsFactory(null, "timer", pipelineOptions, store);
    final StateNamespace nameSpace = StateNamespaces.global();
    final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey");
    // timers in store now are timestamped from 0 - 2.
    for (int i = 0; i < 3; i++) {
        timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
    }
    // total number of event time timers to fire equals to the number of timers in store
    Collection<KeyedTimerData<String>> readyTimers;
    timerInternalsFactory.setInputWatermark(new Instant(3));
    readyTimers = timerInternalsFactory.removeReadyTimers();
    // buffer should reload from store and all timers are supposed to be fired.
    assertEquals(3, readyTimers.size());
    store.close();
}
Also used : TimerInternals(org.apache.beam.runners.core.TimerInternals) Instant(org.joda.time.Instant) ByteArray(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray) StateValue(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) StateNamespace(org.apache.beam.runners.core.StateNamespace) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) Test(org.junit.Test)

Aggregations

SamzaPipelineOptions (org.apache.beam.runners.samza.SamzaPipelineOptions)24 Test (org.junit.Test)20 Instant (org.joda.time.Instant)12 StateNamespace (org.apache.beam.runners.core.StateNamespace)11 TimerInternals (org.apache.beam.runners.core.TimerInternals)11 ByteArray (org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray)11 StateValue (org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue)11 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)11 Pipeline (org.apache.beam.sdk.Pipeline)9 PValue (org.apache.beam.sdk.values.PValue)7 Config (org.apache.samza.config.Config)6 JobCoordinatorConfig (org.apache.samza.config.JobCoordinatorConfig)6 ZkConfig (org.apache.samza.config.ZkConfig)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 StateSpec (org.apache.beam.sdk.state.StateSpec)3 DoFn (org.apache.beam.sdk.transforms.DoFn)3 HashMap (java.util.HashMap)2 SamzaMetricsContainer (org.apache.beam.runners.samza.metrics.SamzaMetricsContainer)2 ValueState (org.apache.beam.sdk.state.ValueState)2 KV (org.apache.beam.sdk.values.KV)2