Search in sources :

Example 1 with ApproximateReportedProgress

use of com.google.api.services.dataflow.model.ApproximateReportedProgress in project beam by apache.

the class AvroByteReaderTest method testGetProgress.

@Test
public void testGetProgress() throws Exception {
    // Ensure that AvroByteReader reports progress from the underlying AvroSource.
    // 4 blocks with 4 split points.
    List<List<String>> elements = generateInputBlocks(4, 10, 10);
    Coder<String> coder = StringUtf8Coder.of();
    AvroFileInfo<String> fileInfo = initInputFile(elements, coder);
    AvroByteReader<String> reader = new AvroByteReader<String>(fileInfo.filename, 0L, Long.MAX_VALUE, coder, PipelineOptionsFactory.create());
    AvroByteFileIterator iterator = reader.iterator();
    Assert.assertTrue(iterator.start());
    ApproximateReportedProgress progress = readerProgressToCloudProgress(reader.iterator().getProgress());
    Assert.assertEquals(0.0, progress.getConsumedParallelism().getValue(), 1e-6);
    Assert.assertEquals(0.0, progress.getFractionConsumed(), 1e-6);
    Assert.assertNull(progress.getRemainingParallelism());
    Assert.assertTrue(iterator.advance());
    progress = readerProgressToCloudProgress(iterator.getProgress());
    Assert.assertEquals(1.0, progress.getConsumedParallelism().getValue(), 1e-6);
    Assert.assertNull(progress.getRemainingParallelism());
    // Advance to the end of last block and check consumed parallelism along the way.
    Assert.assertTrue(iterator.advance());
    progress = readerProgressToCloudProgress(iterator.getProgress());
    Assert.assertEquals(2.0, progress.getConsumedParallelism().getValue(), 1e-6);
    Assert.assertNull(progress.getRemainingParallelism());
    Assert.assertTrue(iterator.advance());
    progress = readerProgressToCloudProgress(iterator.getProgress());
    Assert.assertEquals(3.0, progress.getConsumedParallelism().getValue(), 1e-6);
    Assert.assertEquals(1.0, progress.getRemainingParallelism().getValue(), 1e-6);
    Assert.assertFalse(iterator.advance());
    progress = readerProgressToCloudProgress(iterator.getProgress());
    Assert.assertEquals(4.0, progress.getConsumedParallelism().getValue(), 1e-6);
    Assert.assertEquals(0.0, progress.getRemainingParallelism().getValue(), 1e-6);
    Assert.assertEquals(1.0, progress.getFractionConsumed(), 1e-6);
}
Also used : AvroByteFileIterator(org.apache.beam.runners.dataflow.worker.AvroByteReader.AvroByteFileIterator) ApproximateReportedProgress(com.google.api.services.dataflow.model.ApproximateReportedProgress) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Example 2 with ApproximateReportedProgress

use of com.google.api.services.dataflow.model.ApproximateReportedProgress in project beam by apache.

the class ReadOperationTest method testCheckpoint.

@Test
public void testCheckpoint() throws Exception {
    InMemoryReader<String> reader = new InMemoryReader<>(Arrays.asList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"), 0, 10, StringUtf8Coder.of());
    final ReadOperation[] readOperationHolder = new ReadOperation[1];
    OutputReceiver receiver = new OutputReceiver() {

        @Override
        public void process(Object elem) throws Exception {
            ReadOperation readOperation = readOperationHolder[0];
            if ("1".equals(elem)) {
                NativeReader.DynamicSplitResultWithPosition split = (NativeReader.DynamicSplitResultWithPosition) readOperation.requestCheckpoint();
                assertNotNull(split);
                assertEquals(positionAtIndex(2L), toCloudPosition(split.getAcceptedPosition()));
                // Check that the progress has been recomputed.
                ApproximateReportedProgress progress = readerProgressToCloudProgress(readOperation.getProgress());
                assertEquals(1, progress.getPosition().getRecordIndex().longValue());
            }
        }
    };
    ReadOperation readOperation = ReadOperation.forTest(reader, receiver, context);
    readOperation.setProgressUpdatePeriodMs(ReadOperation.UPDATE_ON_EACH_ITERATION);
    readOperationHolder[0] = readOperation;
    // An unstarted ReadOperation refuses checkpoint requests.
    assertNull(readOperation.requestCheckpoint());
    readOperation.start();
    readOperation.finish();
    // Operation is now finished. Check that it refuses a checkpoint request.
    assertNull(readOperation.requestCheckpoint());
}
Also used : InMemoryReader(org.apache.beam.runners.dataflow.worker.InMemoryReader) ApproximateReportedProgress(com.google.api.services.dataflow.model.ApproximateReportedProgress) Test(org.junit.Test)

Example 3 with ApproximateReportedProgress

use of com.google.api.services.dataflow.model.ApproximateReportedProgress in project beam by apache.

the class ReadOperationTest method testGetProgress.

@Test
public void testGetProgress() throws Exception {
    MockReaderIterator iterator = new MockReaderIterator(0, 5);
    MockOutputReceiver receiver = new MockOutputReceiver();
    ManualScheduler scheduler = new ManualScheduler();
    final ReadOperation readOperation = ReadOperation.forTest(new MockReader(iterator), receiver, scheduler.getExecutor(), context);
    Thread thread = runReadLoopInThread(readOperation);
    for (int i = 0; i < 5; ++i) {
        // Reader currently blocked in start()/advance().
        // Ensure that getProgress() doesn't block while the reader advances.
        ApproximateReportedProgress progress = readerProgressToCloudProgress(readOperation.getProgress());
        Long observedIndex = (progress == null) ? null : progress.getPosition().getRecordIndex().longValue();
        assertTrue("Actual: " + observedIndex + " instead of " + i, (i == 0 && progress == null) || i == observedIndex || i == observedIndex + 1);
        iterator.offerNext(i);
        // Now the reader is not blocked (instead the receiver is blocked): progress can be
        // updated. Wait for it to be updated.
        scheduler.runOnce();
        receiver.unblockProcess();
    }
    thread.join();
}
Also used : ApproximateReportedProgress(com.google.api.services.dataflow.model.ApproximateReportedProgress) Matchers.anyLong(org.mockito.Matchers.anyLong) Test(org.junit.Test)

Example 4 with ApproximateReportedProgress

use of com.google.api.services.dataflow.model.ApproximateReportedProgress in project beam by apache.

the class WorkerCustomSourcesTest method testGetReaderProgress.

@Test
public void testGetReaderProgress() {
    ApproximateReportedProgress progress = getReaderProgress(new TestBoundedReader(0.75, 1, 2));
    assertEquals(0.75, progress.getFractionConsumed(), 1e-6);
    assertEquals(1.0, progress.getConsumedParallelism().getValue(), 1e-6);
    assertEquals(2.0, progress.getRemainingParallelism().getValue(), 1e-6);
    progress = getReaderProgress(new TestBoundedReader(null, -1, 4));
    assertNull(progress.getFractionConsumed());
    assertNull(progress.getConsumedParallelism());
    assertEquals(4.0, progress.getRemainingParallelism().getValue(), 1e-6);
    progress = getReaderProgress(new TestBoundedReader(null, -1, -2));
    assertNull(progress.getFractionConsumed());
    assertNull(progress.getConsumedParallelism());
    assertNull(progress.getRemainingParallelism());
}
Also used : ApproximateReportedProgress(com.google.api.services.dataflow.model.ApproximateReportedProgress) Test(org.junit.Test)

Example 5 with ApproximateReportedProgress

use of com.google.api.services.dataflow.model.ApproximateReportedProgress in project beam by apache.

the class GroupingShuffleReaderTest method testGetApproximateProgress.

@Test
public void testGetApproximateProgress() throws Exception {
    // Store the positions of all KVs returned.
    List<ByteArrayShufflePosition> positionsList = new ArrayList<>();
    PipelineOptions options = PipelineOptionsFactory.create();
    BatchModeExecutionContext context = BatchModeExecutionContext.forTesting(options, "testStage");
    TestOperationContext operationContext = TestOperationContext.create();
    GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(options, null, null, null, WindowedValue.getFullCoder(KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())), IntervalWindow.getCoder()), context, operationContext, ShuffleReadCounterFactory.INSTANCE, false);
    TestShuffleReader shuffleReader = new TestShuffleReader();
    final int kNumRecords = 10;
    for (int i = 0; i < kNumRecords; ++i) {
        ByteArrayShufflePosition position = fabricatePosition(i);
        byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
        positionsList.add(position);
        ShuffleEntry entry = new ShuffleEntry(position, keyByte, EMPTY_BYTE_ARRAY, keyByte);
        shuffleReader.addEntry(entry);
    }
    assertFalse(shuffleReader.isClosed());
    try (GroupingShuffleReaderIterator<Integer, Integer> iter = groupingShuffleReader.iterator(shuffleReader)) {
        Integer i = 0;
        for (boolean more = iter.start(); more; more = iter.advance()) {
            ApproximateReportedProgress progress = readerProgressToCloudProgress(iter.getProgress());
            assertNotNull(progress.getPosition().getShufflePosition());
            // Compare returned position with the expected position.
            assertEquals(positionsList.get(i).encodeBase64(), progress.getPosition().getShufflePosition());
            WindowedValue<KV<Integer, Reiterable<Integer>>> elem = iter.getCurrent();
            assertEquals(i, elem.getValue().getKey());
            i++;
        }
        assertFalse(iter.advance());
        // Cannot split since all input was consumed.
        Position proposedSplitPosition = new Position();
        String stop = encodeBase64URLSafeString(fabricatePosition(0).getPosition());
        proposedSplitPosition.setShufflePosition(stop);
        assertNull(iter.requestDynamicSplit(toDynamicSplitRequest(approximateSplitRequestAtPosition(proposedSplitPosition))));
    }
    assertTrue(shuffleReader.isClosed());
}
Also used : ByteArrayShufflePosition(org.apache.beam.runners.dataflow.worker.util.common.worker.ByteArrayShufflePosition) ReaderTestUtils.approximateSplitRequestAtPosition(org.apache.beam.runners.dataflow.worker.ReaderTestUtils.approximateSplitRequestAtPosition) ByteArrayShufflePosition(org.apache.beam.runners.dataflow.worker.util.common.worker.ByteArrayShufflePosition) Position(com.google.api.services.dataflow.model.Position) ReaderTestUtils.splitRequestAtPosition(org.apache.beam.runners.dataflow.worker.ReaderTestUtils.splitRequestAtPosition) ArrayList(java.util.ArrayList) KV(org.apache.beam.sdk.values.KV) Base64.encodeBase64URLSafeString(com.google.api.client.util.Base64.encodeBase64URLSafeString) ShuffleEntry(org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ApproximateReportedProgress(com.google.api.services.dataflow.model.ApproximateReportedProgress) Test(org.junit.Test)

Aggregations

ApproximateReportedProgress (com.google.api.services.dataflow.model.ApproximateReportedProgress)9 Test (org.junit.Test)8 ArrayList (java.util.ArrayList)2 InMemoryReader (org.apache.beam.runners.dataflow.worker.InMemoryReader)2 Base64.encodeBase64URLSafeString (com.google.api.client.util.Base64.encodeBase64URLSafeString)1 Position (com.google.api.services.dataflow.model.Position)1 List (java.util.List)1 AvroByteFileIterator (org.apache.beam.runners.dataflow.worker.AvroByteReader.AvroByteFileIterator)1 ReaderTestUtils.approximateSplitRequestAtPosition (org.apache.beam.runners.dataflow.worker.ReaderTestUtils.approximateSplitRequestAtPosition)1 ReaderTestUtils.splitRequestAtPosition (org.apache.beam.runners.dataflow.worker.ReaderTestUtils.splitRequestAtPosition)1 ByteArrayShufflePosition (org.apache.beam.runners.dataflow.worker.util.common.worker.ByteArrayShufflePosition)1 ShuffleEntry (org.apache.beam.runners.dataflow.worker.util.common.worker.ShuffleEntry)1 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)1 KV (org.apache.beam.sdk.values.KV)1 Matchers.anyLong (org.mockito.Matchers.anyLong)1