Search in sources :

Example 6 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class WindmillTimerInternals method windmillTimerToTimerData.

public static TimerData windmillTimerToTimerData(WindmillNamespacePrefix prefix, Timer timer, Coder<? extends BoundedWindow> windowCoder) {
    // The tag is a path-structure string but cheaper to parse than a proper URI. It follows
    // this pattern, where no component but the ID can contain a slash
    // 
    // prefix namespace '+' id '+' familyId
    // 
    // prefix ::= '/' prefix_char
    // namespace ::= '/' | '/' window '/'
    // id ::= autogenerated_id | arbitrary_string
    // autogenerated_id ::= timedomain_ordinal ':' millis
    // 
    // Notes:
    // 
    // - the slashes and whaatnot in prefix and namespace are owned by that bit of code
    // - the prefix_char is always ASCII 'u' or 's' for "user" or "system"
    // - the namespace is generally a base64 encoding of the window passed through its coder, but:
    // - the GlobalWindow is currently encoded in zero bytes, so it becomes "//"
    // - the Global StateNamespace is different, and becomes "/"
    // - the id is totally arbitrary; currently unescaped though that could change
    ByteString tag = timer.getTag();
    checkArgument(tag.startsWith(prefix.byteString()), "Expected timer tag %s to start with prefix %s", tag, prefix.byteString());
    Instant timestamp = WindmillTimeUtils.windmillToHarnessTimestamp(timer.getTimestamp());
    // Parse the namespace.
    // drop the prefix, leave the begin slash
    int namespaceStart = prefix.byteString().size();
    int namespaceEnd = namespaceStart;
    while (namespaceEnd < tag.size() && tag.byteAt(namespaceEnd) != '+') {
        namespaceEnd++;
    }
    String namespaceString = tag.substring(namespaceStart, namespaceEnd).toStringUtf8();
    // Parse the timer id.
    int timerIdStart = namespaceEnd + 1;
    int timerIdEnd = timerIdStart;
    while (timerIdEnd < tag.size() && tag.byteAt(timerIdEnd) != '+') {
        timerIdEnd++;
    }
    String timerId = tag.substring(timerIdStart, timerIdEnd).toStringUtf8();
    // Parse the timer family.
    int timerFamilyStart = timerIdEnd + 1;
    int timerFamilyEnd = timerFamilyStart;
    while (timerFamilyEnd < tag.size() && tag.byteAt(timerFamilyEnd) != '+') {
        timerFamilyEnd++;
    }
    // For backwards compatibility, handle the case were the timer family isn't present.
    String timerFamily = (timerFamilyStart < tag.size()) ? tag.substring(timerFamilyStart, timerFamilyEnd).toStringUtf8() : "";
    // Parse the output timestamp. Not using '+' as a terminator because the output timestamp is the
    // last segment in the tag and the timestamp encoding itself may contain '+'.
    int outputTimestampStart = timerFamilyEnd + 1;
    int outputTimestampEnd = tag.size();
    // For backwards compatibility, handle the case were the output timestamp isn't present.
    Instant outputTimestamp = timestamp;
    if ((outputTimestampStart < tag.size())) {
        try {
            outputTimestamp = new Instant(VarInt.decodeLong(tag.substring(outputTimestampStart, outputTimestampEnd).newInput()));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
    StateNamespace namespace = StateNamespaces.fromString(namespaceString, windowCoder);
    return TimerData.of(timerId, timerFamily, namespace, timestamp, outputTimestamp, timerTypeToTimeDomain(timer.getType()));
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Instant(org.joda.time.Instant) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) IOException(java.io.IOException) StateNamespace(org.apache.beam.runners.core.StateNamespace)

Example 7 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class SamzaTimerInternalsFactoryTest method testBufferRefilledAfterRestoreToNonFullState.

@Test
public void testBufferRefilledAfterRestoreToNonFullState() {
    final SamzaPipelineOptions pipelineOptions = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
    pipelineOptions.setEventTimerBufferSize(5);
    final KeyValueStore<ByteArray, StateValue<?>> store = createStore();
    final SamzaTimerInternalsFactory<String> timerInternalsFactory = createTimerInternalsFactory(null, "timer", pipelineOptions, store);
    final StateNamespace nameSpace = StateNamespaces.global();
    final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey");
    // timer in store now is timestamped 6.
    for (int i = 0; i < 6; i++) {
        timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
    }
    // total number of event time timers to fire equals to the number of timers in store
    Collection<KeyedTimerData<String>> readyTimers;
    timerInternalsFactory.setInputWatermark(new Instant(4));
    readyTimers = timerInternalsFactory.removeReadyTimers();
    assertEquals(5, readyTimers.size());
    // reloaded timer5
    assertEquals(1, timerInternalsFactory.getEventTimeBuffer().size());
    for (int i = 6; i < 13; i++) {
        timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
    }
    // timers should go into buffer not state
    assertEquals(5, timerInternalsFactory.getEventTimeBuffer().size());
    // watermark 10 comes,6 timers will be evicted in order and 2 still in buffer.
    timerInternalsFactory.setInputWatermark(new Instant(10));
    readyTimers = timerInternalsFactory.removeReadyTimers();
    long lastTimestamp = 0;
    for (KeyedTimerData<String> keyedTimerData : readyTimers) {
        final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
        assertTrue(lastTimestamp <= currentTimeStamp);
        lastTimestamp = currentTimeStamp;
    }
    assertEquals(6, readyTimers.size());
    assertEquals(2, timerInternalsFactory.getEventTimeBuffer().size());
    store.close();
}
Also used : Instant(org.joda.time.Instant) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) StateNamespace(org.apache.beam.runners.core.StateNamespace) TimerInternals(org.apache.beam.runners.core.TimerInternals) ByteArray(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray) StateValue(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) Test(org.junit.Test)

Example 8 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class SamzaTimerInternalsFactoryTest method testAllTimersAreFiredInOrder.

/**
 * Test the total number of event time timers reloaded into memory is aligned with the number of
 * the event time timers written to the store. Moreover, event time timers reloaded into memory is
 * maintained in order.
 */
@Test
public void testAllTimersAreFiredInOrder() {
    final SamzaPipelineOptions pipelineOptions = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
    pipelineOptions.setEventTimerBufferSize(5);
    final KeyValueStore<ByteArray, StateValue<?>> store = createStore();
    final SamzaTimerInternalsFactory<String> timerInternalsFactory = createTimerInternalsFactory(null, "timer", pipelineOptions, store);
    final StateNamespace nameSpace = StateNamespaces.global();
    final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey");
    // timers in store now are timestamped from 0 - 7.
    for (int i = 0; i < 8; i++) {
        timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
    }
    // fire the first 2 timers.
    // timers in memory now are timestamped from 2 - 4;
    // timers in store now are timestamped from 2 - 7.
    Collection<KeyedTimerData<String>> readyTimers;
    timerInternalsFactory.setInputWatermark(new Instant(1));
    long lastTimestamp = 0;
    readyTimers = timerInternalsFactory.removeReadyTimers();
    for (KeyedTimerData<String> keyedTimerData : readyTimers) {
        final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
        assertTrue(lastTimestamp <= currentTimeStamp);
        lastTimestamp = currentTimeStamp;
    }
    assertEquals(2, readyTimers.size());
    // the total number of timers to fire is 18.
    for (int i = 8; i < 20; i++) {
        timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
    }
    timerInternalsFactory.setInputWatermark(new Instant(20));
    lastTimestamp = 0;
    readyTimers = timerInternalsFactory.removeReadyTimers();
    for (KeyedTimerData<String> keyedTimerData : readyTimers) {
        final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
        assertTrue(lastTimestamp <= currentTimeStamp);
        lastTimestamp = currentTimeStamp;
    }
    assertEquals(18, readyTimers.size());
    store.close();
}
Also used : Instant(org.joda.time.Instant) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) StateNamespace(org.apache.beam.runners.core.StateNamespace) TimerInternals(org.apache.beam.runners.core.TimerInternals) ByteArray(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray) StateValue(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) Test(org.junit.Test)

Example 9 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class SamzaTimerInternalsFactoryTest method testRestoreEventBufferSize.

@Test
public void testRestoreEventBufferSize() throws Exception {
    final SamzaPipelineOptions pipelineOptions = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
    KeyValueStore<ByteArray, StateValue<?>> store = createStore();
    final SamzaTimerInternalsFactory<String> timerInternalsFactory = createTimerInternalsFactory(null, "timer", pipelineOptions, store);
    final String key = "testKey";
    final StateNamespace nameSpace = StateNamespaces.global();
    final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey(key);
    final TimerInternals.TimerData timer1 = TimerInternals.TimerData.of("timer1", nameSpace, new Instant(10), new Instant(10), TimeDomain.EVENT_TIME);
    timerInternals.setTimer(timer1);
    store.close();
    // restore by creating a new instance
    store = createStore();
    final SamzaTimerInternalsFactory<String> restoredFactory = createTimerInternalsFactory(null, "timer", pipelineOptions, store);
    assertEquals(1, restoredFactory.getEventTimeBuffer().size());
    restoredFactory.setInputWatermark(new Instant(150));
    Collection<KeyedTimerData<String>> readyTimers = restoredFactory.removeReadyTimers();
    assertEquals(1, readyTimers.size());
    // Timer 1 should be evicted from buffer
    assertTrue(restoredFactory.getEventTimeBuffer().isEmpty());
    final TimerInternals restoredTimerInternals = restoredFactory.timerInternalsForKey(key);
    final TimerInternals.TimerData timer2 = TimerInternals.TimerData.of("timer2", nameSpace, new Instant(200), new Instant(200), TimeDomain.EVENT_TIME);
    restoredTimerInternals.setTimer(timer2);
    // Timer 2 should be added to the Event buffer
    assertEquals(1, restoredFactory.getEventTimeBuffer().size());
    // Timer 2 should not be ready
    readyTimers = restoredFactory.removeReadyTimers();
    assertEquals(0, readyTimers.size());
    restoredFactory.setInputWatermark(new Instant(250));
    // Timer 2 should be ready
    readyTimers = restoredFactory.removeReadyTimers();
    assertEquals(1, readyTimers.size());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    StringUtf8Coder.of().encode(key, baos);
    byte[] keyBytes = baos.toByteArray();
    assertEquals(readyTimers, Arrays.asList(new KeyedTimerData<>(keyBytes, key, timer2)));
    store.close();
}
Also used : Instant(org.joda.time.Instant) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) ByteArrayOutputStream(java.io.ByteArrayOutputStream) StateNamespace(org.apache.beam.runners.core.StateNamespace) TimerInternals(org.apache.beam.runners.core.TimerInternals) ByteArray(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray) StateValue(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) Test(org.junit.Test)

Example 10 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class SamzaTimerInternalsFactoryTest method testProcessingTimeTimers.

@Test
public void testProcessingTimeTimers() throws IOException {
    final SamzaPipelineOptions pipelineOptions = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
    KeyValueStore<ByteArray, StateValue<?>> store = createStore();
    TestTimerRegistry timerRegistry = new TestTimerRegistry();
    final SamzaTimerInternalsFactory<String> timerInternalsFactory = createTimerInternalsFactory(timerRegistry, "timer", pipelineOptions, store);
    final StateNamespace nameSpace = StateNamespaces.global();
    final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey");
    final TimerInternals.TimerData timer1 = TimerInternals.TimerData.of("timer1", nameSpace, new Instant(10), new Instant(10), TimeDomain.PROCESSING_TIME);
    timerInternals.setTimer(timer1);
    final TimerInternals.TimerData timer2 = TimerInternals.TimerData.of("timer2", nameSpace, new Instant(100), new Instant(100), TimeDomain.PROCESSING_TIME);
    timerInternals.setTimer(timer2);
    final TimerInternals.TimerData timer3 = TimerInternals.TimerData.of("timer3", "timerFamilyId3", nameSpace, new Instant(100), new Instant(100), TimeDomain.PROCESSING_TIME);
    timerInternals.setTimer(timer3);
    assertEquals(3, timerRegistry.timers.size());
    store.close();
    // restore by creating a new instance
    store = createStore();
    TestTimerRegistry restoredRegistry = new TestTimerRegistry();
    final SamzaTimerInternalsFactory<String> restoredFactory = createTimerInternalsFactory(restoredRegistry, "timer", pipelineOptions, store);
    assertEquals(3, restoredRegistry.timers.size());
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    StringUtf8Coder.of().encode("testKey", baos);
    final byte[] keyBytes = baos.toByteArray();
    restoredFactory.removeProcessingTimer(new KeyedTimerData<>(keyBytes, "testKey", timer1));
    restoredFactory.removeProcessingTimer(new KeyedTimerData<>(keyBytes, "testKey", timer2));
    restoredFactory.removeProcessingTimer(new KeyedTimerData<>(keyBytes, "testKey", timer3));
    store.close();
}
Also used : Instant(org.joda.time.Instant) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) ByteArrayOutputStream(java.io.ByteArrayOutputStream) StateNamespace(org.apache.beam.runners.core.StateNamespace) TimerInternals(org.apache.beam.runners.core.TimerInternals) ByteArray(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray) StateValue(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) Test(org.junit.Test)

Aggregations

StateNamespace (org.apache.beam.runners.core.StateNamespace)43 Test (org.junit.Test)30 Instant (org.joda.time.Instant)20 StateNamespaceForTest (org.apache.beam.runners.core.StateNamespaceForTest)15 TimerInternals (org.apache.beam.runners.core.TimerInternals)15 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)13 SamzaPipelineOptions (org.apache.beam.runners.samza.SamzaPipelineOptions)11 ByteArray (org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray)11 StateValue (org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)11 BagState (org.apache.beam.sdk.state.BagState)10 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)5 ValueState (org.apache.beam.sdk.state.ValueState)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 ByteBuffer (java.nio.ByteBuffer)3 StateNamespaces (org.apache.beam.runners.core.StateNamespaces)3 WindowNamespace (org.apache.beam.runners.core.StateNamespaces.WindowNamespace)3 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)3 StreamRecordStripper.stripStreamRecordFromWindowedValue (org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue)3 Coder (org.apache.beam.sdk.coders.Coder)3