Search in sources :

Example 96 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class GroupingShuffleReaderTest method runTestBytesReadCounterForOptions.

private void runTestBytesReadCounterForOptions(PipelineOptions options, List<KV<Integer, List<KV<Integer, Integer>>>> input, boolean useSecondaryKey, ValuesToRead valuesToRead, long expectedReadBytes) throws Exception {
    // Create a shuffle reader with the shuffle values provided as input.
    List<ShuffleEntry> records = writeShuffleEntries(input, useSecondaryKey);
    TestShuffleReader shuffleReader = new TestShuffleReader();
    for (ShuffleEntry record : records) {
        shuffleReader.addEntry(record);
    }
    TestShuffleReadCounterFactory shuffleReadCounterFactory = new TestShuffleReadCounterFactory();
    Coder<WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>>> sourceElemCoder = WindowedValue.getFullCoder(KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(KvCoder.of(BigEndianIntegerCoder.of(), BigEndianIntegerCoder.of()))), IntervalWindow.getCoder());
    // Read from shuffle with GroupingShuffleReader.
    BatchModeExecutionContext context = BatchModeExecutionContext.forTesting(options, "testStage");
    TestOperationContext operationContext = TestOperationContext.create();
    GroupingShuffleReader<Integer, KV<Integer, Integer>> groupingShuffleReader = new GroupingShuffleReader<>(options, null, null, null, sourceElemCoder, context, operationContext, shuffleReadCounterFactory, useSecondaryKey);
    groupingShuffleReader.perOperationPerDatasetBytesCounter = operationContext.counterFactory().longSum(CounterName.named("dax-shuffle-test-wf-read-bytes"));
    runIterationOverGroupingShuffleReader(context, shuffleReader, groupingShuffleReader, sourceElemCoder, valuesToRead);
    if (ExperimentContext.parseFrom(options).isEnabled(Experiment.IntertransformIO)) {
        expectShuffleReadCounterEquals(shuffleReadCounterFactory, expectedReadBytes);
    } else {
        assertEquals(expectedReadBytes, (long) groupingShuffleReader.perOperationPerDatasetBytesCounter.getAggregate());
    }
}
Also used : KV(org.apache.beam.sdk.values.KV) ShuffleEntry(org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry) WindowedValue(org.apache.beam.sdk.util.WindowedValue)

Example 97 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class GroupingShuffleReaderTest method runIterationOverGroupingShuffleReader.

@SuppressWarnings("ReturnValueIgnored")
private List<KV<Integer, List<KV<Integer, Integer>>>> runIterationOverGroupingShuffleReader(BatchModeExecutionContext context, TestShuffleReader shuffleReader, GroupingShuffleReader<Integer, KV<Integer, Integer>> groupingShuffleReader, Coder<WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>>> coder, ValuesToRead valuesToRead) throws Exception {
    CounterSet counterSet = new CounterSet();
    Counter<Long, ?> elementByteSizeCounter = counterSet.longSum(CounterName.named("element-byte-size-counter"));
    CounterBackedElementByteSizeObserver elementObserver = new CounterBackedElementByteSizeObserver(elementByteSizeCounter);
    List<KV<Integer, List<KV<Integer, Integer>>>> actual = new ArrayList<>();
    assertFalse(shuffleReader.isClosed());
    try (GroupingShuffleReaderIterator<Integer, KV<Integer, Integer>> iter = groupingShuffleReader.iterator(shuffleReader)) {
        Iterable<KV<Integer, Integer>> prevValuesIterable = null;
        Iterator<KV<Integer, Integer>> prevValuesIterator = null;
        for (boolean more = iter.start(); more; more = iter.advance()) {
            // Should not fail.
            iter.getCurrent();
            iter.getCurrent();
            // safe co-variant cast from Reiterable to Iterable
            @SuppressWarnings({ // TODO(https://issues.apache.org/jira/browse/BEAM-10556)
            "rawtypes", "unchecked" }) WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>> windowedValue = (WindowedValue) iter.getCurrent();
            // Verify that the byte size observer is lazy for every value the GroupingShuffleReader
            // produces.
            coder.registerByteSizeObserver(windowedValue, elementObserver);
            assertTrue(elementObserver.getIsLazy());
            // Verify value is in an empty windows.
            assertEquals(BoundedWindow.TIMESTAMP_MIN_VALUE, windowedValue.getTimestamp());
            assertEquals(0, windowedValue.getWindows().size());
            KV<Integer, Iterable<KV<Integer, Integer>>> elem = windowedValue.getValue();
            Integer key = elem.getKey();
            List<KV<Integer, Integer>> values = new ArrayList<>();
            if (valuesToRead.ordinal() > ValuesToRead.SKIP_VALUES.ordinal()) {
                if (prevValuesIterable != null) {
                    // Verifies that this does not throw.
                    prevValuesIterable.iterator();
                }
                if (prevValuesIterator != null) {
                    // Verifies that this does not throw.
                    prevValuesIterator.hasNext();
                }
                Iterable<KV<Integer, Integer>> valuesIterable = elem.getValue();
                Iterator<KV<Integer, Integer>> valuesIterator = valuesIterable.iterator();
                if (valuesToRead.ordinal() >= ValuesToRead.READ_ONE_VALUE.ordinal()) {
                    while (valuesIterator.hasNext()) {
                        assertTrue(valuesIterator.hasNext());
                        assertTrue(valuesIterator.hasNext());
                        assertEquals("BatchModeExecutionContext key", key, context.getKey());
                        values.add(valuesIterator.next());
                        if (valuesToRead == ValuesToRead.READ_ONE_VALUE) {
                            break;
                        }
                    }
                    if (valuesToRead.ordinal() >= ValuesToRead.READ_ALL_VALUES.ordinal()) {
                        assertFalse(valuesIterator.hasNext());
                        assertFalse(valuesIterator.hasNext());
                        try {
                            valuesIterator.next();
                            fail("Expected NoSuchElementException");
                        } catch (NoSuchElementException exn) {
                        // As expected.
                        }
                        // Verifies that this does not throw.
                        valuesIterable.iterator();
                    }
                }
                if (valuesToRead == ValuesToRead.READ_ALL_VALUES_TWICE) {
                    // Create new iterator;
                    valuesIterator = valuesIterable.iterator();
                    while (valuesIterator.hasNext()) {
                        assertTrue(valuesIterator.hasNext());
                        assertTrue(valuesIterator.hasNext());
                        assertEquals("BatchModeExecutionContext key", key, context.getKey());
                        valuesIterator.next();
                    }
                    assertFalse(valuesIterator.hasNext());
                    assertFalse(valuesIterator.hasNext());
                    try {
                        valuesIterator.next();
                        fail("Expected NoSuchElementException");
                    } catch (NoSuchElementException exn) {
                    // As expected.
                    }
                }
                prevValuesIterable = valuesIterable;
                prevValuesIterator = valuesIterator;
            }
            actual.add(KV.of(key, values));
        }
        assertFalse(iter.advance());
        assertFalse(iter.advance());
        try {
            iter.getCurrent();
            fail("Expected NoSuchElementException");
        } catch (NoSuchElementException exn) {
        // As expected.
        }
    }
    assertTrue(shuffleReader.isClosed());
    return actual;
}
Also used : ArrayList(java.util.ArrayList) KV(org.apache.beam.sdk.values.KV) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) CounterBackedElementByteSizeObserver(org.apache.beam.runners.dataflow.worker.counters.CounterBackedElementByteSizeObserver) WindowedValue(org.apache.beam.sdk.util.WindowedValue) NoSuchElementException(java.util.NoSuchElementException)

Example 98 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class PubsubSinkTest method testWriteWith.

private void testWriteWith(String formatFn) throws Exception {
    Windmill.WorkItemCommitRequest.Builder outputBuilder = Windmill.WorkItemCommitRequest.newBuilder().setKey(ByteString.copyFromUtf8("key")).setWorkToken(0);
    when(mockContext.getOutputBuilder()).thenReturn(outputBuilder);
    Map<String, Object> spec = new HashMap<>();
    spec.put(PropertyNames.OBJECT_TYPE_NAME, "");
    spec.put(PropertyNames.PUBSUB_TOPIC, "topic");
    spec.put(PropertyNames.PUBSUB_TIMESTAMP_ATTRIBUTE, "ts");
    spec.put(PropertyNames.PUBSUB_ID_ATTRIBUTE, "id");
    if (formatFn != null) {
        spec.put(PropertyNames.PUBSUB_SERIALIZED_ATTRIBUTES_FN, formatFn);
    }
    CloudObject cloudSinkSpec = CloudObject.fromSpec(spec);
    PubsubSink.Factory factory = new PubsubSink.Factory();
    PubsubSink<String> sink = (PubsubSink<String>) factory.create(cloudSinkSpec, WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder()), null, mockContext, null);
    Sink.SinkWriter<WindowedValue<String>> writer = sink.writer();
    assertEquals(2, writer.add(WindowedValue.timestampedValueInGlobalWindow("e0", new Instant(0))));
    assertEquals(2, writer.add(WindowedValue.timestampedValueInGlobalWindow("e1", new Instant(1))));
    assertEquals(2, writer.add(WindowedValue.timestampedValueInGlobalWindow("e2", new Instant(2))));
    writer.close();
    assertEquals(Windmill.WorkItemCommitRequest.newBuilder().setKey(ByteString.copyFromUtf8("key")).setWorkToken(0).addPubsubMessages(Windmill.PubSubMessageBundle.newBuilder().setTopic("topic").setTimestampLabel("ts").setIdLabel("id").addMessages(Windmill.Message.newBuilder().setTimestamp(0).setData(ByteString.copyFromUtf8("e0"))).addMessages(Windmill.Message.newBuilder().setTimestamp(1000).setData(ByteString.copyFromUtf8("e1"))).addMessages(Windmill.Message.newBuilder().setTimestamp(2000).setData(ByteString.copyFromUtf8("e2"))).setWithAttributes(formatFn != null)).build(), outputBuilder.build());
}
Also used : HashMap(java.util.HashMap) Instant(org.joda.time.Instant) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) Sink(org.apache.beam.runners.dataflow.worker.util.common.worker.Sink) WindowedValue(org.apache.beam.sdk.util.WindowedValue) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject)

Example 99 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class ReifyTimestampAndWindowsParDoFnFactoryTest method verifyReifiedIsInTheSameWindows.

private <K, V> void verifyReifiedIsInTheSameWindows(WindowedValue<KV<K, V>> elem) throws Exception {
    ParDoFn reifyFn = new ReifyTimestampAndWindowsParDoFnFactory().create(null, null, null, null, null, null, null);
    SingleValueReceiver<WindowedValue<KV<K, WindowedValue<V>>>> receiver = new SingleValueReceiver<>();
    reifyFn.startBundle(receiver);
    // The important thing to test that is not just a restatement of the ParDoFn is that
    // it only produces one element per input
    reifyFn.processElement(elem);
    assertThat(receiver.reified.getValue().getKey(), equalTo(elem.getValue().getKey()));
    assertThat(receiver.reified.getValue().getValue().getValue(), equalTo(elem.getValue().getValue()));
    assertThat(receiver.reified.getValue().getValue().getTimestamp(), equalTo(elem.getTimestamp()));
    assertThat(receiver.reified.getValue().getValue().getWindows(), equalTo(elem.getWindows()));
    assertThat(receiver.reified.getValue().getValue().getPane(), equalTo(elem.getPane()));
}
Also used : WindowedValue(org.apache.beam.sdk.util.WindowedValue) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn)

Example 100 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class IsmSinkFactory method create.

@Override
public Sink<?> create(CloudObject spec, @Nullable Coder<?> coder, @Nullable PipelineOptions options, @Nullable DataflowExecutionContext executionContext, DataflowOperationContext operationContext) throws Exception {
    options = checkArgumentNotNull(options);
    coder = checkArgumentNotNull(coder);
    // The validity of this coder is checked in detail by the typed create, below
    @SuppressWarnings("unchecked") Coder<WindowedValue<IsmRecord<Object>>> typedCoder = (Coder<WindowedValue<IsmRecord<Object>>>) coder;
    String filename = getString(spec, WorkerPropertyNames.FILENAME);
    checkArgument(typedCoder instanceof WindowedValueCoder, "%s only supports using %s but got %s.", IsmSink.class, WindowedValueCoder.class, typedCoder);
    WindowedValueCoder<IsmRecord<Object>> windowedCoder = (WindowedValueCoder<IsmRecord<Object>>) typedCoder;
    checkArgument(windowedCoder.getValueCoder() instanceof IsmRecordCoder, "%s only supports using %s but got %s.", IsmSink.class, IsmRecordCoder.class, windowedCoder.getValueCoder());
    @SuppressWarnings("unchecked") IsmRecordCoder<Object> ismCoder = (IsmRecordCoder<Object>) windowedCoder.getValueCoder();
    long bloomFilterSizeLimitBytes = Math.max(MIN_BLOOM_FILTER_SIZE_BYTES, DoubleMath.roundToLong(BLOOM_FILTER_SIZE_LIMIT_MULTIPLIER * options.as(DataflowWorkerHarnessOptions.class).getWorkerCacheMb() * // Note the conversion from MiB to bytes
    1024 * 1024, RoundingMode.DOWN));
    return new IsmSink<>(FileSystems.matchNewResource(filename, false), ismCoder, bloomFilterSizeLimitBytes);
}
Also used : WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) Coder(org.apache.beam.sdk.coders.Coder) IsmRecordCoder(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecordCoder) IsmRecord(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) IsmRecordCoder(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecordCoder) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) WindowedValue(org.apache.beam.sdk.util.WindowedValue) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject)

Aggregations

WindowedValue (org.apache.beam.sdk.util.WindowedValue)304 Test (org.junit.Test)165 Instant (org.joda.time.Instant)102 KV (org.apache.beam.sdk.values.KV)98 ArrayList (java.util.ArrayList)93 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)76 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)70 HashMap (java.util.HashMap)61 TupleTag (org.apache.beam.sdk.values.TupleTag)53 Coder (org.apache.beam.sdk.coders.Coder)51 KvCoder (org.apache.beam.sdk.coders.KvCoder)49 List (java.util.List)39 Map (java.util.Map)36 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)35 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)34 StreamRecordStripper.stripStreamRecordFromWindowedValue (org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue)31 PCollection (org.apache.beam.sdk.values.PCollection)31 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)30 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)30 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)27