Search in sources :

Example 1 with NativeReader

use of org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader in project beam by apache.

the class WorkerCustomSourcesTest method testReadUnboundedReader.

@Test
public void testReadUnboundedReader() throws Exception {
    CounterSet counterSet = new CounterSet();
    StreamingModeExecutionStateRegistry executionStateRegistry = new StreamingModeExecutionStateRegistry(null);
    ReaderCache readerCache = new ReaderCache(Duration.standardMinutes(1), Runnable::run);
    StreamingModeExecutionContext context = new StreamingModeExecutionContext(counterSet, "computationId", readerCache, /*stateNameMap=*/
    ImmutableMap.of(), /*stateCache=*/
    null, StreamingStepMetricsContainer.createRegistry(), new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), executionStateRegistry.getState(NameContext.forStage("stageName"), "other", null, NoopProfileScope.NOOP), counterSet, PipelineOptionsFactory.create(), "test-work-item-id"), executionStateRegistry, Long.MAX_VALUE);
    options.setNumWorkers(5);
    int maxElements = 10;
    DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
    debugOptions.setUnboundedReaderMaxElements(maxElements);
    ByteString state = ByteString.EMPTY;
    for (int i = 0; i < 10 * maxElements; ) /* Incremented in inner loop */
    {
        // Initialize streaming context with state from previous iteration.
        context.start("key", Windmill.WorkItem.newBuilder().setKey(// key is zero-padded index.
        ByteString.copyFromUtf8("0000000000000001")).setWorkToken(// Must be increasing across activations for cache to be used.
        i).setCacheToken(1).setSourceState(// Source state.
        Windmill.SourceState.newBuilder().setState(state).build()).build(), // input watermark
        new Instant(0), // output watermark
        null, // synchronized processing time
        null, // StateReader
        null, // StateFetcher
        null, Windmill.WorkItemCommitRequest.newBuilder());
        @SuppressWarnings({ "unchecked", "rawtypes" }) NativeReader<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader = (NativeReader) WorkerCustomSources.create((CloudObject) serializeToCloudSource(new TestCountingSource(Integer.MAX_VALUE), options).getSpec(), options, context);
        // Verify data.
        Instant beforeReading = Instant.now();
        int numReadOnThisIteration = 0;
        for (WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value : ReaderUtils.readAllFromReader(reader)) {
            assertEquals(KV.of(0, i), value.getValue().getValue());
            assertArrayEquals(encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)), value.getValue().getId());
            assertThat(value.getWindows(), contains((BoundedWindow) GlobalWindow.INSTANCE));
            assertEquals(i, value.getTimestamp().getMillis());
            i++;
            numReadOnThisIteration++;
        }
        Instant afterReading = Instant.now();
        long maxReadSec = debugOptions.getUnboundedReaderMaxReadTimeSec();
        assertThat(new Duration(beforeReading, afterReading).getStandardSeconds(), lessThanOrEqualTo(maxReadSec + 1));
        assertThat(numReadOnThisIteration, lessThanOrEqualTo(debugOptions.getUnboundedReaderMaxElements()));
        // Extract and verify state modifications.
        context.flushState();
        state = context.getOutputBuilder().getSourceStateUpdates().getState();
        // CountingSource's watermark is the last record + 1.  i is now one past the last record,
        // so the expected watermark is i millis.
        assertEquals(TimeUnit.MILLISECONDS.toMicros(i), context.getOutputBuilder().getSourceWatermark());
        assertEquals(1, context.getOutputBuilder().getSourceStateUpdates().getFinalizeIdsList().size());
        assertNotNull(readerCache.acquireReader(context.getComputationKey(), context.getWork().getCacheToken(), context.getWorkToken() + 1));
        assertEquals(7L, context.getBacklogBytes());
    }
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Instant(org.joda.time.Instant) StreamingModeExecutionStateRegistry(org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StreamingModeExecutionStateRegistry) Duration(org.joda.time.Duration) KV(org.apache.beam.sdk.values.KV) ValueWithRecordId(org.apache.beam.sdk.values.ValueWithRecordId) NativeReader(org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) WindowedValue(org.apache.beam.sdk.util.WindowedValue) TestCountingSource(org.apache.beam.runners.dataflow.worker.testing.TestCountingSource) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) Test(org.junit.Test)

Example 2 with NativeReader

use of org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader in project beam by apache.

the class ConcatReaderFactoryTest method testCreateConcatReaderWithManySubSources.

@Test
public void testCreateConcatReaderWithManySubSources() throws Exception {
    List<List<String>> allData = createInMemorySourceData(15, 10);
    Source source = createSourcesWithInMemorySources(allData);
    @SuppressWarnings("unchecked") NativeReader<String> reader = (NativeReader<String>) ReaderRegistry.defaultRegistry().create(source, null, null, null);
    assertNotNull(reader);
    List<String> expected = new ArrayList<>();
    for (List<String> data : allData) {
        expected.addAll(data);
    }
    assertThat(readAllFromReader(reader), containsInAnyOrder(expected.toArray()));
}
Also used : NativeReader(org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader) ArrayList(java.util.ArrayList) Structs.addStringList(org.apache.beam.runners.dataflow.util.Structs.addStringList) ArrayList(java.util.ArrayList) List(java.util.List) Structs.addList(org.apache.beam.runners.dataflow.util.Structs.addList) Source(com.google.api.services.dataflow.model.Source) Test(org.junit.Test)

Example 3 with NativeReader

use of org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader in project beam by apache.

the class ConcatReaderFactoryTest method testCreateConcatReaderWithOneSubSource.

@Test
public void testCreateConcatReaderWithOneSubSource() throws Exception {
    List<List<String>> allData = createInMemorySourceData(1, 10);
    Source source = createSourcesWithInMemorySources(allData);
    @SuppressWarnings("unchecked") NativeReader<String> reader = (NativeReader<String>) ReaderRegistry.defaultRegistry().create(source, null, null, null);
    assertNotNull(reader);
    List<String> expected = new ArrayList<>();
    for (List<String> data : allData) {
        expected.addAll(data);
    }
    assertThat(readAllFromReader(reader), containsInAnyOrder(expected.toArray()));
}
Also used : NativeReader(org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader) ArrayList(java.util.ArrayList) Structs.addStringList(org.apache.beam.runners.dataflow.util.Structs.addStringList) ArrayList(java.util.ArrayList) List(java.util.List) Structs.addList(org.apache.beam.runners.dataflow.util.Structs.addList) Source(com.google.api.services.dataflow.model.Source) Test(org.junit.Test)

Aggregations

NativeReader (org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader)3 Test (org.junit.Test)3 Source (com.google.api.services.dataflow.model.Source)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Structs.addList (org.apache.beam.runners.dataflow.util.Structs.addList)2 Structs.addStringList (org.apache.beam.runners.dataflow.util.Structs.addStringList)2 DataflowPipelineDebugOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions)1 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)1 DataflowExecutionStateTracker (org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker)1 StreamingModeExecutionStateRegistry (org.apache.beam.runners.dataflow.worker.StreamingModeExecutionContext.StreamingModeExecutionStateRegistry)1 CounterSet (org.apache.beam.runners.dataflow.worker.counters.CounterSet)1 TestCountingSource (org.apache.beam.runners.dataflow.worker.testing.TestCountingSource)1 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)1 WindowedValue (org.apache.beam.sdk.util.WindowedValue)1 KV (org.apache.beam.sdk.values.KV)1 ValueWithRecordId (org.apache.beam.sdk.values.ValueWithRecordId)1 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)1 Duration (org.joda.time.Duration)1 Instant (org.joda.time.Instant)1