Search in sources :

Example 11 with ShuffleEntry

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry in project beam by apache.

the class ShuffleSinkTest method runTestWriteUngroupingShuffleSink.

private void runTestWriteUngroupingShuffleSink(List<Integer> expected) throws Exception {
    Coder<WindowedValue<Integer>> windowedValueCoder = WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), new GlobalWindows().windowCoder());
    BatchModeExecutionContext executionContext = BatchModeExecutionContext.forTesting(PipelineOptionsFactory.create(), "STAGE");
    ShuffleSink<Integer> shuffleSink = new ShuffleSink<>(PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.UNGROUPED, windowedValueCoder, executionContext, TestOperationContext.create());
    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
    List<Long> actualSizes = new ArrayList<>();
    try (Sink.SinkWriter<WindowedValue<Integer>> shuffleSinkWriter = shuffleSink.writer(shuffleWriter, "dataset")) {
        for (Integer value : expected) {
            actualSizes.add(shuffleSinkWriter.add(WindowedValue.valueInGlobalWindow(value)));
        }
    }
    List<ShuffleEntry> records = shuffleWriter.getRecords();
    List<Integer> actual = new ArrayList<>();
    for (ShuffleEntry record : records) {
        // Ignore the key.
        byte[] valueBytes = record.getValue();
        WindowedValue<Integer> value = CoderUtils.decodeFromByteArray(windowedValueCoder, valueBytes);
        Assert.assertEquals(Lists.newArrayList(GlobalWindow.INSTANCE), value.getWindows());
        actual.add(value.getValue());
    }
    Assert.assertEquals(expected, actual);
    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) ArrayList(java.util.ArrayList) ShuffleEntry(org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry) Sink(org.apache.beam.runners.dataflow.worker.util.common.worker.Sink) WindowedValue(org.apache.beam.sdk.util.WindowedValue)

Example 12 with ShuffleEntry

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry in project beam by apache.

the class ShuffleSinkTest method runTestWriteGroupingShuffleSink.

void runTestWriteGroupingShuffleSink(List<KV<Integer, String>> expected) throws Exception {
    BatchModeExecutionContext executionContext = BatchModeExecutionContext.forTesting(PipelineOptionsFactory.create(), "STAGE");
    ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.GROUP_KEYS, WindowedValue.getFullCoder(KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()), IntervalWindow.getCoder()), executionContext, TestOperationContext.create());
    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
    List<Long> actualSizes = new ArrayList<>();
    try (SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter = shuffleSink.writer(shuffleWriter, "dataset")) {
        for (KV<Integer, String> kv : expected) {
            actualSizes.add(shuffleSinkWriter.add(WindowedValue.of(KV.of(kv.getKey(), kv.getValue()), timestamp, Lists.newArrayList(window), PaneInfo.NO_FIRING)));
        }
    }
    List<ShuffleEntry> records = shuffleWriter.getRecords();
    List<KV<Integer, String>> actual = new ArrayList<>();
    for (ShuffleEntry record : records) {
        byte[] keyBytes = record.getKey();
        byte[] valueBytes = record.getValue();
        Assert.assertEquals(timestamp, CoderUtils.decodeFromByteArray(InstantCoder.of(), record.getSecondaryKey()));
        Integer key = CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(), keyBytes);
        String valueElem = CoderUtils.decodeFromByteArray(StringUtf8Coder.of(), valueBytes);
        actual.add(KV.of(key, valueElem));
    }
    Assert.assertEquals(expected, actual);
    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
}
Also used : ArrayList(java.util.ArrayList) KV(org.apache.beam.sdk.values.KV) ShuffleEntry(org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry) WindowedValue(org.apache.beam.sdk.util.WindowedValue)

Example 13 with ShuffleEntry

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry in project beam by apache.

the class PartitioningShuffleReaderTest method runTestReadFromShuffle.

private void runTestReadFromShuffle(List<WindowedValue<KV<Integer, String>>> expected) throws Exception {
    Coder<WindowedValue<KV<Integer, String>>> elemCoder = WindowedValue.getFullCoder(KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()), IntervalWindow.getCoder());
    BatchModeExecutionContext executionContext = BatchModeExecutionContext.forTesting(PipelineOptionsFactory.create(), "STAGE");
    // Write to shuffle with PARTITION_KEYS ShuffleSink.
    ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.PARTITION_KEYS, elemCoder, executionContext, TestOperationContext.create());
    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
    List<Long> actualSizes = new ArrayList<>();
    try (Sink.SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter = shuffleSink.writer(shuffleWriter, "dataset")) {
        for (WindowedValue<KV<Integer, String>> value : expected) {
            actualSizes.add(shuffleSinkWriter.add(value));
        }
    }
    List<ShuffleEntry> records = shuffleWriter.getRecords();
    Assert.assertEquals(expected.size(), records.size());
    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
    // Read from shuffle with PartitioningShuffleReader.
    PartitioningShuffleReader<Integer, String> partitioningShuffleReader = new PartitioningShuffleReader<>(PipelineOptionsFactory.create(), null, null, null, elemCoder, executionContext, TestOperationContext.create());
    ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(partitioningShuffleReader);
    TestShuffleReader shuffleReader = new TestShuffleReader();
    List<Integer> expectedSizes = new ArrayList<>();
    for (ShuffleEntry record : records) {
        expectedSizes.add(record.length());
        shuffleReader.addEntry(record);
    }
    List<WindowedValue<KV<Integer, String>>> actual = new ArrayList<>();
    Assert.assertFalse(shuffleReader.isClosed());
    try (PartitioningShuffleReaderIterator<Integer, String> iter = partitioningShuffleReader.iterator(shuffleReader)) {
        for (boolean more = iter.start(); more; more = iter.advance()) {
            actual.add(iter.getCurrent());
        }
        Assert.assertFalse(iter.advance());
        try {
            iter.getCurrent();
            Assert.fail("should have failed");
        } catch (NoSuchElementException exn) {
        // As expected.
        }
    }
    Assert.assertTrue(shuffleReader.isClosed());
    Assert.assertEquals(expected, actual);
    Assert.assertEquals(expectedSizes, observer.getActualSizes());
}
Also used : ExecutorTestUtils(org.apache.beam.runners.dataflow.worker.util.common.worker.ExecutorTestUtils) ArrayList(java.util.ArrayList) ShuffleEntry(org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry) Sink(org.apache.beam.runners.dataflow.worker.util.common.worker.Sink) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) NoSuchElementException(java.util.NoSuchElementException)

Example 14 with ShuffleEntry

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry in project beam by apache.

the class TestShuffleWriter method write.

@Override
public void write(byte[] chunk) throws IOException {
    if (closed) {
        throw new AssertionError("shuffle writer already closed");
    }
    DataInputStream dais = new DataInputStream(new ByteArrayInputStream(chunk));
    while (dais.available() > 0) {
        byte[] key = new byte[dais.readInt()];
        dais.readFully(key);
        byte[] sortKey = new byte[dais.readInt()];
        dais.readFully(sortKey);
        byte[] value = new byte[dais.readInt()];
        dais.readFully(value);
        ShuffleEntry entry = new ShuffleEntry(key, sortKey, value);
        records.add(entry);
        long size = entry.length();
        sizes.add(size);
    }
}
Also used : ShuffleEntry(org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry) ByteArrayInputStream(java.io.ByteArrayInputStream) DataInputStream(java.io.DataInputStream)

Example 15 with ShuffleEntry

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry in project beam by apache.

the class TestShuffleReaderTest method readShuffleEntries.

private List<KV<String, KV<String, String>>> readShuffleEntries(Reiterator<ShuffleEntry> iter) {
    List<KV<String, KV<String, String>>> actual = new ArrayList<>();
    while (iter.hasNext()) {
        ShuffleEntry entry = iter.next();
        actual.add(KV.of(new String(entry.getKey(), StandardCharsets.UTF_8), KV.of(new String(entry.getSecondaryKey(), StandardCharsets.UTF_8), new String(entry.getValue(), StandardCharsets.UTF_8))));
    }
    return actual;
}
Also used : ShuffleEntry(org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry) ArrayList(java.util.ArrayList) KV(org.apache.beam.sdk.values.KV)

Aggregations

ShuffleEntry (org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry)15 ArrayList (java.util.ArrayList)9 WindowedValue (org.apache.beam.sdk.util.WindowedValue)7 KV (org.apache.beam.sdk.values.KV)7 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)6 Test (org.junit.Test)5 Sink (org.apache.beam.runners.dataflow.worker.util.common.worker.Sink)4 Base64.encodeBase64URLSafeString (com.google.api.client.util.Base64.encodeBase64URLSafeString)3 ByteArrayInputStream (java.io.ByteArrayInputStream)3 ExecutorTestUtils (org.apache.beam.runners.dataflow.worker.util.common.worker.ExecutorTestUtils)3 Position (com.google.api.services.dataflow.model.Position)2 DataInputStream (java.io.DataInputStream)2 NoSuchElementException (java.util.NoSuchElementException)2 ReaderTestUtils.approximateSplitRequestAtPosition (org.apache.beam.runners.dataflow.worker.ReaderTestUtils.approximateSplitRequestAtPosition)2 ReaderTestUtils.splitRequestAtPosition (org.apache.beam.runners.dataflow.worker.ReaderTestUtils.splitRequestAtPosition)2 ByteArrayShufflePosition (org.apache.beam.runners.dataflow.worker.util.common.worker.ByteArrayShufflePosition)2 GlobalWindows (org.apache.beam.sdk.transforms.windowing.GlobalWindows)2 ApproximateReportedProgress (com.google.api.services.dataflow.model.ApproximateReportedProgress)1 Closeable (java.io.Closeable)1 HashMap (java.util.HashMap)1