use of org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry in project beam by apache.
the class ShuffleSinkTest method runTestWriteUngroupingShuffleSink.
private void runTestWriteUngroupingShuffleSink(List<Integer> expected) throws Exception {
Coder<WindowedValue<Integer>> windowedValueCoder = WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), new GlobalWindows().windowCoder());
BatchModeExecutionContext executionContext = BatchModeExecutionContext.forTesting(PipelineOptionsFactory.create(), "STAGE");
ShuffleSink<Integer> shuffleSink = new ShuffleSink<>(PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.UNGROUPED, windowedValueCoder, executionContext, TestOperationContext.create());
TestShuffleWriter shuffleWriter = new TestShuffleWriter();
List<Long> actualSizes = new ArrayList<>();
try (Sink.SinkWriter<WindowedValue<Integer>> shuffleSinkWriter = shuffleSink.writer(shuffleWriter, "dataset")) {
for (Integer value : expected) {
actualSizes.add(shuffleSinkWriter.add(WindowedValue.valueInGlobalWindow(value)));
}
}
List<ShuffleEntry> records = shuffleWriter.getRecords();
List<Integer> actual = new ArrayList<>();
for (ShuffleEntry record : records) {
// Ignore the key.
byte[] valueBytes = record.getValue();
WindowedValue<Integer> value = CoderUtils.decodeFromByteArray(windowedValueCoder, valueBytes);
Assert.assertEquals(Lists.newArrayList(GlobalWindow.INSTANCE), value.getWindows());
actual.add(value.getValue());
}
Assert.assertEquals(expected, actual);
Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry in project beam by apache.
the class ShuffleSinkTest method runTestWriteGroupingShuffleSink.
void runTestWriteGroupingShuffleSink(List<KV<Integer, String>> expected) throws Exception {
BatchModeExecutionContext executionContext = BatchModeExecutionContext.forTesting(PipelineOptionsFactory.create(), "STAGE");
ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.GROUP_KEYS, WindowedValue.getFullCoder(KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()), IntervalWindow.getCoder()), executionContext, TestOperationContext.create());
TestShuffleWriter shuffleWriter = new TestShuffleWriter();
List<Long> actualSizes = new ArrayList<>();
try (SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter = shuffleSink.writer(shuffleWriter, "dataset")) {
for (KV<Integer, String> kv : expected) {
actualSizes.add(shuffleSinkWriter.add(WindowedValue.of(KV.of(kv.getKey(), kv.getValue()), timestamp, Lists.newArrayList(window), PaneInfo.NO_FIRING)));
}
}
List<ShuffleEntry> records = shuffleWriter.getRecords();
List<KV<Integer, String>> actual = new ArrayList<>();
for (ShuffleEntry record : records) {
byte[] keyBytes = record.getKey();
byte[] valueBytes = record.getValue();
Assert.assertEquals(timestamp, CoderUtils.decodeFromByteArray(InstantCoder.of(), record.getSecondaryKey()));
Integer key = CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(), keyBytes);
String valueElem = CoderUtils.decodeFromByteArray(StringUtf8Coder.of(), valueBytes);
actual.add(KV.of(key, valueElem));
}
Assert.assertEquals(expected, actual);
Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry in project beam by apache.
the class PartitioningShuffleReaderTest method runTestReadFromShuffle.
private void runTestReadFromShuffle(List<WindowedValue<KV<Integer, String>>> expected) throws Exception {
Coder<WindowedValue<KV<Integer, String>>> elemCoder = WindowedValue.getFullCoder(KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()), IntervalWindow.getCoder());
BatchModeExecutionContext executionContext = BatchModeExecutionContext.forTesting(PipelineOptionsFactory.create(), "STAGE");
// Write to shuffle with PARTITION_KEYS ShuffleSink.
ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.PARTITION_KEYS, elemCoder, executionContext, TestOperationContext.create());
TestShuffleWriter shuffleWriter = new TestShuffleWriter();
List<Long> actualSizes = new ArrayList<>();
try (Sink.SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter = shuffleSink.writer(shuffleWriter, "dataset")) {
for (WindowedValue<KV<Integer, String>> value : expected) {
actualSizes.add(shuffleSinkWriter.add(value));
}
}
List<ShuffleEntry> records = shuffleWriter.getRecords();
Assert.assertEquals(expected.size(), records.size());
Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
// Read from shuffle with PartitioningShuffleReader.
PartitioningShuffleReader<Integer, String> partitioningShuffleReader = new PartitioningShuffleReader<>(PipelineOptionsFactory.create(), null, null, null, elemCoder, executionContext, TestOperationContext.create());
ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(partitioningShuffleReader);
TestShuffleReader shuffleReader = new TestShuffleReader();
List<Integer> expectedSizes = new ArrayList<>();
for (ShuffleEntry record : records) {
expectedSizes.add(record.length());
shuffleReader.addEntry(record);
}
List<WindowedValue<KV<Integer, String>>> actual = new ArrayList<>();
Assert.assertFalse(shuffleReader.isClosed());
try (PartitioningShuffleReaderIterator<Integer, String> iter = partitioningShuffleReader.iterator(shuffleReader)) {
for (boolean more = iter.start(); more; more = iter.advance()) {
actual.add(iter.getCurrent());
}
Assert.assertFalse(iter.advance());
try {
iter.getCurrent();
Assert.fail("should have failed");
} catch (NoSuchElementException exn) {
// As expected.
}
}
Assert.assertTrue(shuffleReader.isClosed());
Assert.assertEquals(expected, actual);
Assert.assertEquals(expectedSizes, observer.getActualSizes());
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry in project beam by apache.
the class TestShuffleWriter method write.
@Override
public void write(byte[] chunk) throws IOException {
if (closed) {
throw new AssertionError("shuffle writer already closed");
}
DataInputStream dais = new DataInputStream(new ByteArrayInputStream(chunk));
while (dais.available() > 0) {
byte[] key = new byte[dais.readInt()];
dais.readFully(key);
byte[] sortKey = new byte[dais.readInt()];
dais.readFully(sortKey);
byte[] value = new byte[dais.readInt()];
dais.readFully(value);
ShuffleEntry entry = new ShuffleEntry(key, sortKey, value);
records.add(entry);
long size = entry.length();
sizes.add(size);
}
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry in project beam by apache.
the class TestShuffleReaderTest method readShuffleEntries.
private List<KV<String, KV<String, String>>> readShuffleEntries(Reiterator<ShuffleEntry> iter) {
List<KV<String, KV<String, String>>> actual = new ArrayList<>();
while (iter.hasNext()) {
ShuffleEntry entry = iter.next();
actual.add(KV.of(new String(entry.getKey(), StandardCharsets.UTF_8), KV.of(new String(entry.getSecondaryKey(), StandardCharsets.UTF_8), new String(entry.getValue(), StandardCharsets.UTF_8))));
}
return actual;
}
Aggregations