use of com.google.api.services.dataflow.model.Position in project beam by apache.
the class GroupingShuffleReaderTest method testReadFromShuffleAndDynamicSplit.
@Test
public void testReadFromShuffleAndDynamicSplit() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
BatchModeExecutionContext context = BatchModeExecutionContext.forTesting(options, "testStage");
TestOperationContext operationContext = TestOperationContext.create();
GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(options, null, null, null, WindowedValue.getFullCoder(KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())), IntervalWindow.getCoder()), context, operationContext, ShuffleReadCounterFactory.INSTANCE, false);
groupingShuffleReader.perOperationPerDatasetBytesCounter = operationContext.counterFactory().longSum(CounterName.named("dax-shuffle-test-wf-read-bytes"));
TestShuffleReader shuffleReader = new TestShuffleReader();
final int kNumRecords = 10;
final int kFirstShard = 0;
final int kSecondShard = 1;
// therefore each record comes with a unique position constructed.
for (int i = 0; i < kNumRecords; ++i) {
byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
ShuffleEntry entry = new ShuffleEntry(fabricatePosition(kFirstShard, keyByte), keyByte, EMPTY_BYTE_ARRAY, keyByte);
shuffleReader.addEntry(entry);
}
for (int i = kNumRecords; i < 2 * kNumRecords; ++i) {
byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
ShuffleEntry entry = new ShuffleEntry(fabricatePosition(kSecondShard, keyByte), keyByte, EMPTY_BYTE_ARRAY, keyByte);
shuffleReader.addEntry(entry);
}
int i = 0;
assertFalse(shuffleReader.isClosed());
try (GroupingShuffleReaderIterator<Integer, Integer> iter = groupingShuffleReader.iterator(shuffleReader)) {
// Poke the iterator so we can test dynamic splitting.
assertTrue(iter.start());
++i;
assertNull(iter.requestDynamicSplit(splitRequestAtPosition(new Position())));
// Split at the shard boundary
NativeReader.DynamicSplitResult dynamicSplitResult = iter.requestDynamicSplit(splitRequestAtPosition(makeShufflePosition(kSecondShard, null)));
assertNotNull(dynamicSplitResult);
assertEquals(encodeBase64URLSafeString(fabricatePosition(kSecondShard).getPosition()), positionFromSplitResult(dynamicSplitResult).getShufflePosition());
for (; iter.advance(); ++i) {
// iter.getCurrent() is supposed to be side-effect-free and give the same result if called
// repeatedly. Test that this is indeed the case.
iter.getCurrent();
iter.getCurrent();
KV<Integer, Reiterable<Integer>> elem = iter.getCurrent().getValue();
int key = elem.getKey();
assertEquals(key, i);
Reiterable<Integer> valuesIterable = elem.getValue();
Reiterator<Integer> valuesIterator = valuesIterable.iterator();
int j = 0;
while (valuesIterator.hasNext()) {
assertTrue(valuesIterator.hasNext());
assertTrue(valuesIterator.hasNext());
int value = valuesIterator.next();
assertEquals(value, i);
++j;
}
assertFalse(valuesIterator.hasNext());
assertFalse(valuesIterator.hasNext());
assertEquals(1, j);
}
assertFalse(iter.advance());
}
assertTrue(shuffleReader.isClosed());
assertEquals(i, kNumRecords);
// There are 10 Shuffle records that each encode an integer key (4 bytes) and integer value (4
// bytes). We therefore expect to read 80 bytes.
assertEquals(80L, (long) groupingShuffleReader.perOperationPerDatasetBytesCounter.getAggregate());
}
use of com.google.api.services.dataflow.model.Position in project beam by apache.
the class GroupingShuffleReaderTest method testGetApproximateProgress.
@Test
public void testGetApproximateProgress() throws Exception {
// Store the positions of all KVs returned.
List<ByteArrayShufflePosition> positionsList = new ArrayList<>();
PipelineOptions options = PipelineOptionsFactory.create();
BatchModeExecutionContext context = BatchModeExecutionContext.forTesting(options, "testStage");
TestOperationContext operationContext = TestOperationContext.create();
GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(options, null, null, null, WindowedValue.getFullCoder(KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())), IntervalWindow.getCoder()), context, operationContext, ShuffleReadCounterFactory.INSTANCE, false);
TestShuffleReader shuffleReader = new TestShuffleReader();
final int kNumRecords = 10;
for (int i = 0; i < kNumRecords; ++i) {
ByteArrayShufflePosition position = fabricatePosition(i);
byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
positionsList.add(position);
ShuffleEntry entry = new ShuffleEntry(position, keyByte, EMPTY_BYTE_ARRAY, keyByte);
shuffleReader.addEntry(entry);
}
assertFalse(shuffleReader.isClosed());
try (GroupingShuffleReaderIterator<Integer, Integer> iter = groupingShuffleReader.iterator(shuffleReader)) {
Integer i = 0;
for (boolean more = iter.start(); more; more = iter.advance()) {
ApproximateReportedProgress progress = readerProgressToCloudProgress(iter.getProgress());
assertNotNull(progress.getPosition().getShufflePosition());
// Compare returned position with the expected position.
assertEquals(positionsList.get(i).encodeBase64(), progress.getPosition().getShufflePosition());
WindowedValue<KV<Integer, Reiterable<Integer>>> elem = iter.getCurrent();
assertEquals(i, elem.getValue().getKey());
i++;
}
assertFalse(iter.advance());
// Cannot split since all input was consumed.
Position proposedSplitPosition = new Position();
String stop = encodeBase64URLSafeString(fabricatePosition(0).getPosition());
proposedSplitPosition.setShufflePosition(stop);
assertNull(iter.requestDynamicSplit(toDynamicSplitRequest(approximateSplitRequestAtPosition(proposedSplitPosition))));
}
assertTrue(shuffleReader.isClosed());
}
use of com.google.api.services.dataflow.model.Position in project beam by apache.
the class WorkItemStatusClientTest method populateSplitResultNativeReader.
@Test
public void populateSplitResultNativeReader() throws Exception {
WorkItemStatus status = new WorkItemStatus();
statusClient.setWorker(worker, executionContext);
Position position = ReaderTestUtils.positionAtIndex(42L);
DynamicSplitResult result = new NativeReader.DynamicSplitResultWithPosition(new DataflowReaderPosition(position));
statusClient.populateSplitResult(status, result);
assertThat(status.getStopPosition(), equalTo(position));
assertThat(status.getDynamicSourceSplit(), nullValue());
}
Aggregations