Search in sources :

Example 1 with DynamicSplitResult

use of org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader.DynamicSplitResult in project beam by apache.

the class ConcatReaderTest method runUpdateStopPositionTest.

// This is an exhaustive test for method ConcatIterator#splitAtPosition.
// Given an array of reader sizes of length 's' this method exhaustively create ConcatReaders that
// have read up to every possible position. For each position 'p', this method creates a set of
// ConcatReaders of size 's+1' that have read up to position 'p' and tests splitting those
// ConcatReaders for index positions in the range [0, s].
public void runUpdateStopPositionTest(int... readerSizes) throws Exception {
    ConcatReader<String> concatReader = createConcatReadersOfSizes(new ArrayList<String>(), readerSizes);
    // This includes indexToSplit == sizes.length case to test out of range split requests.
    for (int indexToSplit = 0; indexToSplit <= readerSizes.length; indexToSplit++) {
        // Number of records to read from the ConcatReader before splitting.
        int recordsToRead = -1;
        for (int readerIndex = 0; readerIndex < readerSizes.length; readerIndex++) {
            for (int recordIndex = 0; recordIndex <= readerSizes[readerIndex]; recordIndex++) {
                if (readerIndex > 0 && recordIndex == 0) {
                    // should be the reader at 'readerIndex - 1'.
                    continue;
                }
                recordsToRead++;
                NativeReader.NativeReaderIterator<String> iterator = concatReader.iterator();
                for (int i = 0; i < recordsToRead; i++) {
                    if (i == 0) {
                        iterator.start();
                    } else {
                        iterator.advance();
                    }
                }
                DynamicSplitResult splitResult = iterator.requestDynamicSplit(ReaderTestUtils.splitRequestAtConcatPosition(indexToSplit, null));
                if ((recordsToRead == 0) || (readerIndex >= indexToSplit) || (indexToSplit < 0 || indexToSplit >= readerSizes.length)) {
                    assertNull(splitResult);
                } else {
                    Assert.assertEquals(indexToSplit, ReaderTestUtils.positionFromSplitResult(splitResult).getConcatPosition().getIndex().intValue());
                }
            }
        }
    }
}
Also used : NativeReader(org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader) DynamicSplitResult(org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader.DynamicSplitResult)

Example 2 with DynamicSplitResult

use of org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader.DynamicSplitResult in project beam by apache.

the class AvroByteReaderTest method testRequestDynamicSplitInternal.

private <T> void testRequestDynamicSplitInternal(AvroByteReader<T> reader, float splitAtFraction, int readBeforeSplit, SplitVerificationBehavior splitVerificationBehavior) throws Exception {
    // Read all elements from the reader
    Long endOffset = reader.endPosition;
    List<T> expectedElements = readAllFromReader(reader);
    List<T> primaryElements;
    List<T> residualElements = new ArrayList<>();
    try (AvroByteReader<T>.AvroByteFileIterator iterator = reader.iterator()) {
        // Read n elements from the reader
        primaryElements = readNItemsFromUnstartedIterator(iterator, readBeforeSplit);
        // Request a split at the specified position
        DynamicSplitResult splitResult = iterator.requestDynamicSplit(ReaderTestUtils.splitRequestAtFraction(splitAtFraction));
        switch(splitVerificationBehavior) {
            case VERIFY_SUCCESS:
                Assert.assertNotNull(splitResult);
                break;
            case VERIFY_FAILURE:
                Assert.assertNull(splitResult);
                break;
            case DO_NOT_VERIFY:
        }
        // Finish reading from the original reader.
        primaryElements.addAll(readRemainingFromIterator(iterator, readBeforeSplit > 0));
        if (splitResult != null) {
            Long splitPosition = ReaderTestUtils.positionFromSplitResult(splitResult).getByteOffset();
            AvroByteReader<T> residualReader = new AvroByteReader<T>(reader.avroSource.getFileOrPatternSpec(), splitPosition, endOffset, reader.coder, reader.options);
            // Read from the residual until it is complete.
            residualElements = readAllFromReader(residualReader);
        }
    }
    primaryElements.addAll(residualElements);
    Assert.assertEquals(expectedElements, primaryElements);
    if (splitVerificationBehavior == SplitVerificationBehavior.VERIFY_SUCCESS) {
        Assert.assertNotEquals(0, residualElements.size());
    }
}
Also used : ArrayList(java.util.ArrayList) DynamicSplitResult(org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader.DynamicSplitResult)

Example 3 with DynamicSplitResult

use of org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader.DynamicSplitResult in project beam by apache.

the class WorkItemStatusClientTest method populateSplitResultNativeReader.

@Test
public void populateSplitResultNativeReader() throws Exception {
    WorkItemStatus status = new WorkItemStatus();
    statusClient.setWorker(worker, executionContext);
    Position position = ReaderTestUtils.positionAtIndex(42L);
    DynamicSplitResult result = new NativeReader.DynamicSplitResultWithPosition(new DataflowReaderPosition(position));
    statusClient.populateSplitResult(status, result);
    assertThat(status.getStopPosition(), equalTo(position));
    assertThat(status.getDynamicSourceSplit(), nullValue());
}
Also used : WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus) DataflowReaderPosition(org.apache.beam.runners.dataflow.worker.SourceTranslationUtils.DataflowReaderPosition) Position(com.google.api.services.dataflow.model.Position) DataflowReaderPosition(org.apache.beam.runners.dataflow.worker.SourceTranslationUtils.DataflowReaderPosition) DynamicSplitResult(org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader.DynamicSplitResult) Test(org.junit.Test)

Aggregations

DynamicSplitResult (org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader.DynamicSplitResult)3 Position (com.google.api.services.dataflow.model.Position)1 WorkItemStatus (com.google.api.services.dataflow.model.WorkItemStatus)1 ArrayList (java.util.ArrayList)1 DataflowReaderPosition (org.apache.beam.runners.dataflow.worker.SourceTranslationUtils.DataflowReaderPosition)1 NativeReader (org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader)1 Test (org.junit.Test)1