use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class GroupingShuffleReaderTest method runTestBytesReadCounterForOptions.
private void runTestBytesReadCounterForOptions(PipelineOptions options, List<KV<Integer, List<KV<Integer, Integer>>>> input, boolean useSecondaryKey, ValuesToRead valuesToRead, long expectedReadBytes) throws Exception {
// Create a shuffle reader with the shuffle values provided as input.
List<ShuffleEntry> records = writeShuffleEntries(input, useSecondaryKey);
TestShuffleReader shuffleReader = new TestShuffleReader();
for (ShuffleEntry record : records) {
shuffleReader.addEntry(record);
}
TestShuffleReadCounterFactory shuffleReadCounterFactory = new TestShuffleReadCounterFactory();
Coder<WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>>> sourceElemCoder = WindowedValue.getFullCoder(KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(KvCoder.of(BigEndianIntegerCoder.of(), BigEndianIntegerCoder.of()))), IntervalWindow.getCoder());
// Read from shuffle with GroupingShuffleReader.
BatchModeExecutionContext context = BatchModeExecutionContext.forTesting(options, "testStage");
TestOperationContext operationContext = TestOperationContext.create();
GroupingShuffleReader<Integer, KV<Integer, Integer>> groupingShuffleReader = new GroupingShuffleReader<>(options, null, null, null, sourceElemCoder, context, operationContext, shuffleReadCounterFactory, useSecondaryKey);
groupingShuffleReader.perOperationPerDatasetBytesCounter = operationContext.counterFactory().longSum(CounterName.named("dax-shuffle-test-wf-read-bytes"));
runIterationOverGroupingShuffleReader(context, shuffleReader, groupingShuffleReader, sourceElemCoder, valuesToRead);
if (ExperimentContext.parseFrom(options).isEnabled(Experiment.IntertransformIO)) {
expectShuffleReadCounterEquals(shuffleReadCounterFactory, expectedReadBytes);
} else {
assertEquals(expectedReadBytes, (long) groupingShuffleReader.perOperationPerDatasetBytesCounter.getAggregate());
}
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class GroupingShuffleReaderTest method runIterationOverGroupingShuffleReader.
@SuppressWarnings("ReturnValueIgnored")
private List<KV<Integer, List<KV<Integer, Integer>>>> runIterationOverGroupingShuffleReader(BatchModeExecutionContext context, TestShuffleReader shuffleReader, GroupingShuffleReader<Integer, KV<Integer, Integer>> groupingShuffleReader, Coder<WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>>> coder, ValuesToRead valuesToRead) throws Exception {
CounterSet counterSet = new CounterSet();
Counter<Long, ?> elementByteSizeCounter = counterSet.longSum(CounterName.named("element-byte-size-counter"));
CounterBackedElementByteSizeObserver elementObserver = new CounterBackedElementByteSizeObserver(elementByteSizeCounter);
List<KV<Integer, List<KV<Integer, Integer>>>> actual = new ArrayList<>();
assertFalse(shuffleReader.isClosed());
try (GroupingShuffleReaderIterator<Integer, KV<Integer, Integer>> iter = groupingShuffleReader.iterator(shuffleReader)) {
Iterable<KV<Integer, Integer>> prevValuesIterable = null;
Iterator<KV<Integer, Integer>> prevValuesIterator = null;
for (boolean more = iter.start(); more; more = iter.advance()) {
// Should not fail.
iter.getCurrent();
iter.getCurrent();
// safe co-variant cast from Reiterable to Iterable
@SuppressWarnings({ // TODO(https://issues.apache.org/jira/browse/BEAM-10556)
"rawtypes", "unchecked" }) WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>> windowedValue = (WindowedValue) iter.getCurrent();
// Verify that the byte size observer is lazy for every value the GroupingShuffleReader
// produces.
coder.registerByteSizeObserver(windowedValue, elementObserver);
assertTrue(elementObserver.getIsLazy());
// Verify value is in an empty windows.
assertEquals(BoundedWindow.TIMESTAMP_MIN_VALUE, windowedValue.getTimestamp());
assertEquals(0, windowedValue.getWindows().size());
KV<Integer, Iterable<KV<Integer, Integer>>> elem = windowedValue.getValue();
Integer key = elem.getKey();
List<KV<Integer, Integer>> values = new ArrayList<>();
if (valuesToRead.ordinal() > ValuesToRead.SKIP_VALUES.ordinal()) {
if (prevValuesIterable != null) {
// Verifies that this does not throw.
prevValuesIterable.iterator();
}
if (prevValuesIterator != null) {
// Verifies that this does not throw.
prevValuesIterator.hasNext();
}
Iterable<KV<Integer, Integer>> valuesIterable = elem.getValue();
Iterator<KV<Integer, Integer>> valuesIterator = valuesIterable.iterator();
if (valuesToRead.ordinal() >= ValuesToRead.READ_ONE_VALUE.ordinal()) {
while (valuesIterator.hasNext()) {
assertTrue(valuesIterator.hasNext());
assertTrue(valuesIterator.hasNext());
assertEquals("BatchModeExecutionContext key", key, context.getKey());
values.add(valuesIterator.next());
if (valuesToRead == ValuesToRead.READ_ONE_VALUE) {
break;
}
}
if (valuesToRead.ordinal() >= ValuesToRead.READ_ALL_VALUES.ordinal()) {
assertFalse(valuesIterator.hasNext());
assertFalse(valuesIterator.hasNext());
try {
valuesIterator.next();
fail("Expected NoSuchElementException");
} catch (NoSuchElementException exn) {
// As expected.
}
// Verifies that this does not throw.
valuesIterable.iterator();
}
}
if (valuesToRead == ValuesToRead.READ_ALL_VALUES_TWICE) {
// Create new iterator;
valuesIterator = valuesIterable.iterator();
while (valuesIterator.hasNext()) {
assertTrue(valuesIterator.hasNext());
assertTrue(valuesIterator.hasNext());
assertEquals("BatchModeExecutionContext key", key, context.getKey());
valuesIterator.next();
}
assertFalse(valuesIterator.hasNext());
assertFalse(valuesIterator.hasNext());
try {
valuesIterator.next();
fail("Expected NoSuchElementException");
} catch (NoSuchElementException exn) {
// As expected.
}
}
prevValuesIterable = valuesIterable;
prevValuesIterator = valuesIterator;
}
actual.add(KV.of(key, values));
}
assertFalse(iter.advance());
assertFalse(iter.advance());
try {
iter.getCurrent();
fail("Expected NoSuchElementException");
} catch (NoSuchElementException exn) {
// As expected.
}
}
assertTrue(shuffleReader.isClosed());
return actual;
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class PubsubSinkTest method testWriteWith.
private void testWriteWith(String formatFn) throws Exception {
Windmill.WorkItemCommitRequest.Builder outputBuilder = Windmill.WorkItemCommitRequest.newBuilder().setKey(ByteString.copyFromUtf8("key")).setWorkToken(0);
when(mockContext.getOutputBuilder()).thenReturn(outputBuilder);
Map<String, Object> spec = new HashMap<>();
spec.put(PropertyNames.OBJECT_TYPE_NAME, "");
spec.put(PropertyNames.PUBSUB_TOPIC, "topic");
spec.put(PropertyNames.PUBSUB_TIMESTAMP_ATTRIBUTE, "ts");
spec.put(PropertyNames.PUBSUB_ID_ATTRIBUTE, "id");
if (formatFn != null) {
spec.put(PropertyNames.PUBSUB_SERIALIZED_ATTRIBUTES_FN, formatFn);
}
CloudObject cloudSinkSpec = CloudObject.fromSpec(spec);
PubsubSink.Factory factory = new PubsubSink.Factory();
PubsubSink<String> sink = (PubsubSink<String>) factory.create(cloudSinkSpec, WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder()), null, mockContext, null);
Sink.SinkWriter<WindowedValue<String>> writer = sink.writer();
assertEquals(2, writer.add(WindowedValue.timestampedValueInGlobalWindow("e0", new Instant(0))));
assertEquals(2, writer.add(WindowedValue.timestampedValueInGlobalWindow("e1", new Instant(1))));
assertEquals(2, writer.add(WindowedValue.timestampedValueInGlobalWindow("e2", new Instant(2))));
writer.close();
assertEquals(Windmill.WorkItemCommitRequest.newBuilder().setKey(ByteString.copyFromUtf8("key")).setWorkToken(0).addPubsubMessages(Windmill.PubSubMessageBundle.newBuilder().setTopic("topic").setTimestampLabel("ts").setIdLabel("id").addMessages(Windmill.Message.newBuilder().setTimestamp(0).setData(ByteString.copyFromUtf8("e0"))).addMessages(Windmill.Message.newBuilder().setTimestamp(1000).setData(ByteString.copyFromUtf8("e1"))).addMessages(Windmill.Message.newBuilder().setTimestamp(2000).setData(ByteString.copyFromUtf8("e2"))).setWithAttributes(formatFn != null)).build(), outputBuilder.build());
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class ReifyTimestampAndWindowsParDoFnFactoryTest method verifyReifiedIsInTheSameWindows.
private <K, V> void verifyReifiedIsInTheSameWindows(WindowedValue<KV<K, V>> elem) throws Exception {
ParDoFn reifyFn = new ReifyTimestampAndWindowsParDoFnFactory().create(null, null, null, null, null, null, null);
SingleValueReceiver<WindowedValue<KV<K, WindowedValue<V>>>> receiver = new SingleValueReceiver<>();
reifyFn.startBundle(receiver);
// The important thing to test that is not just a restatement of the ParDoFn is that
// it only produces one element per input
reifyFn.processElement(elem);
assertThat(receiver.reified.getValue().getKey(), equalTo(elem.getValue().getKey()));
assertThat(receiver.reified.getValue().getValue().getValue(), equalTo(elem.getValue().getValue()));
assertThat(receiver.reified.getValue().getValue().getTimestamp(), equalTo(elem.getTimestamp()));
assertThat(receiver.reified.getValue().getValue().getWindows(), equalTo(elem.getWindows()));
assertThat(receiver.reified.getValue().getValue().getPane(), equalTo(elem.getPane()));
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class IsmSinkFactory method create.
@Override
public Sink<?> create(CloudObject spec, @Nullable Coder<?> coder, @Nullable PipelineOptions options, @Nullable DataflowExecutionContext executionContext, DataflowOperationContext operationContext) throws Exception {
options = checkArgumentNotNull(options);
coder = checkArgumentNotNull(coder);
// The validity of this coder is checked in detail by the typed create, below
@SuppressWarnings("unchecked") Coder<WindowedValue<IsmRecord<Object>>> typedCoder = (Coder<WindowedValue<IsmRecord<Object>>>) coder;
String filename = getString(spec, WorkerPropertyNames.FILENAME);
checkArgument(typedCoder instanceof WindowedValueCoder, "%s only supports using %s but got %s.", IsmSink.class, WindowedValueCoder.class, typedCoder);
WindowedValueCoder<IsmRecord<Object>> windowedCoder = (WindowedValueCoder<IsmRecord<Object>>) typedCoder;
checkArgument(windowedCoder.getValueCoder() instanceof IsmRecordCoder, "%s only supports using %s but got %s.", IsmSink.class, IsmRecordCoder.class, windowedCoder.getValueCoder());
@SuppressWarnings("unchecked") IsmRecordCoder<Object> ismCoder = (IsmRecordCoder<Object>) windowedCoder.getValueCoder();
long bloomFilterSizeLimitBytes = Math.max(MIN_BLOOM_FILTER_SIZE_BYTES, DoubleMath.roundToLong(BLOOM_FILTER_SIZE_LIMIT_MULTIPLIER * options.as(DataflowWorkerHarnessOptions.class).getWorkerCacheMb() * // Note the conversion from MiB to bytes
1024 * 1024, RoundingMode.DOWN));
return new IsmSink<>(FileSystems.matchNewResource(filename, false), ismCoder, bloomFilterSizeLimitBytes);
}
Aggregations