Search in sources :

Example 1 with ValueWithRecordId

use of org.apache.beam.sdk.values.ValueWithRecordId in project beam by apache.

the class DedupingOperator method processElement.

@Override
public void processElement(StreamRecord<WindowedValue<ValueWithRecordId<T>>> streamRecord) throws Exception {
    ByteBuffer currentKey = keyedStateBackend.getCurrentKey();
    int groupIndex = keyedStateBackend.getCurrentKeyGroupIndex();
    if (shouldOutput(groupIndex, currentKey)) {
        WindowedValue<ValueWithRecordId<T>> value = streamRecord.getValue();
        output.collect(streamRecord.replace(value.withValue(value.getValue().getValue())));
    }
}
Also used : ByteBuffer(java.nio.ByteBuffer) ValueWithRecordId(org.apache.beam.sdk.values.ValueWithRecordId)

Example 2 with ValueWithRecordId

use of org.apache.beam.sdk.values.ValueWithRecordId in project beam by apache.

the class WorkerCustomSourcesTest method testReadUnboundedReader.

@Test
public void testReadUnboundedReader() throws Exception {
    CounterSet counterSet = new CounterSet();
    StreamingModeExecutionStateRegistry executionStateRegistry = new StreamingModeExecutionStateRegistry(null);
    ReaderCache readerCache = new ReaderCache(Duration.standardMinutes(1), Runnable::run);
    StreamingModeExecutionContext context = new StreamingModeExecutionContext(counterSet, "computationId", readerCache, /*stateNameMap=*/
    ImmutableMap.of(), /*stateCache=*/
    null, StreamingStepMetricsContainer.createRegistry(), new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), executionStateRegistry.getState(NameContext.forStage("stageName"), "other", null, NoopProfileScope.NOOP), counterSet, PipelineOptionsFactory.create(), "test-work-item-id"), executionStateRegistry, Long.MAX_VALUE);
    options.setNumWorkers(5);
    int maxElements = 10;
    DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
    debugOptions.setUnboundedReaderMaxElements(maxElements);
    ByteString state = ByteString.EMPTY;
    for (int i = 0; i < 10 * maxElements; ) /* Incremented in inner loop */
    {
        // Initialize streaming context with state from previous iteration.
        context.start("key", Windmill.WorkItem.newBuilder().setKey(// key is zero-padded index.
        ByteString.copyFromUtf8("0000000000000001")).setWorkToken(// Must be increasing across activations for cache to be used.
        i).setCacheToken(1).setSourceState(// Source state.
        Windmill.SourceState.newBuilder().setState(state).build()).build(), // input watermark
        new Instant(0), // output watermark
        null, // synchronized processing time
        null, // StateReader
        null, // StateFetcher
        null, Windmill.WorkItemCommitRequest.newBuilder());
        @SuppressWarnings({ "unchecked", "rawtypes" }) NativeReader<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader = (NativeReader) WorkerCustomSources.create((CloudObject) serializeToCloudSource(new TestCountingSource(Integer.MAX_VALUE), options).getSpec(), options, context);
        // Verify data.
        Instant beforeReading = Instant.now();
        int numReadOnThisIteration = 0;
        for (WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value : ReaderUtils.readAllFromReader(reader)) {
            assertEquals(KV.of(0, i), value.getValue().getValue());
            assertArrayEquals(encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)), value.getValue().getId());
            assertThat(value.getWindows(), contains((BoundedWindow) GlobalWindow.INSTANCE));
            assertEquals(i, value.getTimestamp().getMillis());
            i++;
            numReadOnThisIteration++;
        }
        Instant afterReading = Instant.now();
        long maxReadSec = debugOptions.getUnboundedReaderMaxReadTimeSec();
        assertThat(new Duration(beforeReading, afterReading).getStandardSeconds(), lessThanOrEqualTo(maxReadSec + 1));
        assertThat(numReadOnThisIteration, lessThanOrEqualTo(debugOptions.getUnboundedReaderMaxElements()));
        // Extract and verify state modifications.
        context.flushState();
        state = context.getOutputBuilder().getSourceStateUpdates().getState();
        // CountingSource's watermark is the last record + 1.  i is now one past the last record,
        // so the expected watermark is i millis.
        assertEquals(TimeUnit.MILLISECONDS.toMicros(i), context.getOutputBuilder().getSourceWatermark());
        assertEquals(1, context.getOutputBuilder().getSourceStateUpdates().getFinalizeIdsList().size());
        assertNotNull(readerCache.acquireReader(context.getComputationKey(), context.getWork().getCacheToken(), context.getWorkToken() + 1));
        assertEquals(7L, context.getBacklogBytes());
    }
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Instant(org.joda.time.Instant) StreamingModeExecutionStateRegistry(org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StreamingModeExecutionStateRegistry) Duration(org.joda.time.Duration) KV(org.apache.beam.sdk.values.KV) ValueWithRecordId(org.apache.beam.sdk.values.ValueWithRecordId) NativeReader(org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) WindowedValue(org.apache.beam.sdk.util.WindowedValue) TestCountingSource(org.apache.beam.runners.dataflow.worker.testing.TestCountingSource) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) Test(org.junit.Test)

Example 3 with ValueWithRecordId

use of org.apache.beam.sdk.values.ValueWithRecordId in project beam by apache.

the class UnboundedSourceWrapper method emitElement.

/**
 * Emit the current element from the given Reader. The reader is guaranteed to have data.
 */
private void emitElement(SourceContext<WindowedValue<ValueWithRecordId<OutputT>>> ctx, UnboundedSource.UnboundedReader<OutputT> reader) {
    // make sure that reader state update and element emission are atomic
    // with respect to snapshots
    OutputT item = reader.getCurrent();
    byte[] recordId = reader.getCurrentRecordId();
    Instant timestamp = reader.getCurrentTimestamp();
    WindowedValue<ValueWithRecordId<OutputT>> windowedValue = WindowedValue.of(new ValueWithRecordId<>(item, recordId), timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
    ctx.collect(windowedValue);
}
Also used : Instant(org.joda.time.Instant) ValueWithRecordId(org.apache.beam.sdk.values.ValueWithRecordId)

Aggregations

ValueWithRecordId (org.apache.beam.sdk.values.ValueWithRecordId)3 Instant (org.joda.time.Instant)2 ByteBuffer (java.nio.ByteBuffer)1 DataflowPipelineDebugOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions)1 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)1 DataflowExecutionStateTracker (org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker)1 StreamingModeExecutionStateRegistry (org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StreamingModeExecutionStateRegistry)1 CounterSet (org.apache.beam.runners.dataflow.worker.counters.CounterSet)1 TestCountingSource (org.apache.beam.runners.dataflow.worker.testing.TestCountingSource)1 NativeReader (org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader)1 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)1 WindowedValue (org.apache.beam.sdk.util.WindowedValue)1 KV (org.apache.beam.sdk.values.KV)1 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)1 Duration (org.joda.time.Duration)1 Test (org.junit.Test)1