Search in sources :

Example 16 with OffsetRange

use of org.apache.beam.sdk.io.range.OffsetRange in project beam by apache.

the class GrowableOffsetRangeTrackerTest method testSmallRangeWithLargeValue.

@Test
public void testSmallRangeWithLargeValue() throws Exception {
    SimpleEstimator simpleEstimator = new SimpleEstimator();
    GrowableOffsetRangeTracker tracker = new GrowableOffsetRangeTracker(123456789012345677L, simpleEstimator);
    assertTrue(tracker.tryClaim(123456789012345677L));
    simpleEstimator.setEstimateRangeEnd(123456789012345679L);
    SplitResult res = tracker.trySplit(0.5);
    assertEquals(new OffsetRange(123456789012345677L, 123456789012345678L), res.getPrimary());
    assertEquals(new OffsetRange(123456789012345678L, Long.MAX_VALUE), res.getResidual());
    tracker = new GrowableOffsetRangeTracker(123456789012345681L, simpleEstimator);
    assertTrue(tracker.tryClaim(123456789012345681L));
    simpleEstimator.setEstimateRangeEnd(123456789012345683L);
    res = tracker.trySplit(0.5);
    assertEquals(new OffsetRange(123456789012345681L, 123456789012345682L), res.getPrimary());
    assertEquals(new OffsetRange(123456789012345682L, Long.MAX_VALUE), res.getResidual());
}
Also used : OffsetRange(org.apache.beam.sdk.io.range.OffsetRange) Test(org.junit.Test)

Example 17 with OffsetRange

use of org.apache.beam.sdk.io.range.OffsetRange in project beam by apache.

the class PCollectionViewsTest method testRandomRanges.

@Test
public void testRandomRanges() {
    Random random = // use an arbitrary seed to make this test deterministic
    new Random(123892154890L);
    for (int i = 0; i < 1000; ++i) {
        List<OffsetRange> ranges = new ArrayList<>();
        for (int j = 0; j < 20; ++j) {
            long start = random.nextInt(10);
            ranges.add(range(start, start + random.nextInt(10) + 1));
            Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition = computeOverlappingRanges(ranges);
            assertNonEmptyRangesAndPositions(ranges, nonOverlappingRangesToNumElementsPerPosition);
        }
    }
}
Also used : OffsetRange(org.apache.beam.sdk.io.range.OffsetRange) Random(java.util.Random) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 18 with OffsetRange

use of org.apache.beam.sdk.io.range.OffsetRange in project beam by apache.

the class PCollectionViews method computeOverlappingRanges.

@VisibleForTesting
static SortedMap<OffsetRange, Integer> computeOverlappingRanges(Iterable<OffsetRange> ranges) {
    ImmutableSortedMap.Builder<OffsetRange, Integer> rval = ImmutableSortedMap.orderedBy(OffsetRangeComparator.INSTANCE);
    List<OffsetRange> sortedRanges = Lists.newArrayList(ranges);
    if (sortedRanges.isEmpty()) {
        return rval.build();
    }
    Collections.sort(sortedRanges, OffsetRangeComparator.INSTANCE);
    // Stores ranges in smallest 'from' and then smallest 'to' order
    // e.g. [2, 7), [3, 4), [3, 5), [3, 5), [3, 6), [4, 0)
    PriorityQueue<OffsetRange> rangesWithSameFrom = new PriorityQueue<>(OffsetRangeComparator.INSTANCE);
    Iterator<OffsetRange> iterator = sortedRanges.iterator();
    // Stored in reverse sorted order so that when we iterate and re-add them back to
    // overlappingRanges they are stored in sorted order from smallest to largest range.to
    List<OffsetRange> rangesToProcess = new ArrayList<>();
    while (iterator.hasNext()) {
        OffsetRange current = iterator.next();
        // Skip empty ranges
        if (current.getFrom() == current.getTo()) {
            continue;
        }
        // ranges in [rangesWithSameFrom.from, current.from)
        while (!rangesWithSameFrom.isEmpty() && rangesWithSameFrom.peek().getFrom() != current.getFrom()) {
            rangesToProcess.addAll(rangesWithSameFrom);
            Collections.sort(rangesToProcess, OffsetRangeComparator.INSTANCE);
            rangesWithSameFrom.clear();
            int i = 0;
            long lastTo = rangesToProcess.get(i).getFrom();
            // [5, 6) := 1
            for (; i < rangesToProcess.size(); ++i) {
                if (rangesToProcess.get(i).getTo() > current.getFrom()) {
                    break;
                }
                // Output only the first of any subsequent duplicate ranges
                if (i == 0 || rangesToProcess.get(i - 1).getTo() != rangesToProcess.get(i).getTo()) {
                    rval.put(new OffsetRange(lastTo, rangesToProcess.get(i).getTo()), rangesToProcess.size() - i);
                    lastTo = rangesToProcess.get(i).getTo();
                }
            }
            // current.from) if it is non-empty
            if (lastTo < current.getFrom() && i != rangesToProcess.size()) {
                rval.put(new OffsetRange(lastTo, current.getFrom()), rangesToProcess.size() - i);
            }
            // with current so add them back to rangesWithSameFrom with the updated 'from'
            for (; i < rangesToProcess.size(); ++i) {
                rangesWithSameFrom.add(new OffsetRange(current.getFrom(), rangesToProcess.get(i).getTo()));
            }
            rangesToProcess.clear();
        }
        rangesWithSameFrom.add(current);
    }
    // Process the last chunk of overlapping ranges
    while (!rangesWithSameFrom.isEmpty()) {
        // This range always represents the range with with the smallest 'to'
        OffsetRange current = rangesWithSameFrom.remove();
        rangesToProcess.addAll(rangesWithSameFrom);
        Collections.sort(rangesToProcess, OffsetRangeComparator.INSTANCE);
        rangesWithSameFrom.clear();
        rval.put(current, rangesToProcess.size() + 1);
        // Shorten all the remaining ranges such that they start with current.to
        for (OffsetRange rangeWithDifferentFrom : rangesToProcess) {
            // Skip any duplicates of current
            if (rangeWithDifferentFrom.getTo() > current.getTo()) {
                rangesWithSameFrom.add(new OffsetRange(current.getTo(), rangeWithDifferentFrom.getTo()));
            }
        }
        rangesToProcess.clear();
    }
    return rval.build();
}
Also used : OffsetRange(org.apache.beam.sdk.io.range.OffsetRange) ArrayList(java.util.ArrayList) ImmutableSortedMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSortedMap) PriorityQueue(java.util.PriorityQueue) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 19 with OffsetRange

use of org.apache.beam.sdk.io.range.OffsetRange in project beam by apache.

the class ReadChangeStreamPartitionRangeTrackerTest method testTryClaim.

@Test
public void testTryClaim() {
    final PartitionMetadata partition = mock(PartitionMetadata.class);
    final OffsetRange range = new OffsetRange(100, 200);
    final ReadChangeStreamPartitionRangeTracker tracker = new ReadChangeStreamPartitionRangeTracker(partition, range);
    assertEquals(range, tracker.currentRestriction());
    assertTrue(tracker.tryClaim(100L));
    assertTrue(tracker.tryClaim(100L));
    assertTrue(tracker.tryClaim(150L));
    assertTrue(tracker.tryClaim(199L));
    assertFalse(tracker.tryClaim(200L));
}
Also used : OffsetRange(org.apache.beam.sdk.io.range.OffsetRange) PartitionMetadata(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata) Test(org.junit.Test)

Example 20 with OffsetRange

use of org.apache.beam.sdk.io.range.OffsetRange in project beam by apache.

the class ParquetIOTest method testSplitBlockWithLimit.

@Test
public void testSplitBlockWithLimit() {
    ParquetIO.ReadFiles.SplitReadFn<GenericRecord> testFn = new ParquetIO.ReadFiles.SplitReadFn<>(null, null, ParquetIO.GenericRecordPassthroughFn.create(), null);
    ArrayList<BlockMetaData> blockList = new ArrayList<>();
    ArrayList<OffsetRange> rangeList;
    BlockMetaData testBlock = mock(BlockMetaData.class);
    when(testBlock.getTotalByteSize()).thenReturn((long) 60);
    rangeList = testFn.splitBlockWithLimit(0, blockList.size(), blockList, 200);
    assertTrue(rangeList.isEmpty());
    for (int i = 0; i < 6; i++) {
        blockList.add(testBlock);
    }
    rangeList = testFn.splitBlockWithLimit(1, blockList.size(), blockList, 200);
    assertEquals(1L, rangeList.get(0).getFrom());
    assertEquals(5L, rangeList.get(0).getTo());
    assertEquals(5L, rangeList.get(1).getFrom());
    assertEquals(6L, rangeList.get(1).getTo());
    assertEquals(2L, rangeList.size());
}
Also used : OffsetRange(org.apache.beam.sdk.io.range.OffsetRange) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ArrayList(java.util.ArrayList) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Aggregations

OffsetRange (org.apache.beam.sdk.io.range.OffsetRange)63 Test (org.junit.Test)53 Instant (org.joda.time.Instant)8 ArrayList (java.util.ArrayList)5 OffsetRangeTracker (org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker)5 Progress (org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker.Progress)5 ProcessContinuation (org.apache.beam.sdk.transforms.DoFn.ProcessContinuation)4 PartitionMetadata (org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata)3 DoFn (org.apache.beam.sdk.transforms.DoFn)3 BigDecimal (java.math.BigDecimal)2 RestrictionTracker (org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker)2 Offset (com.google.cloud.pubsublite.Offset)1 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 Scope (io.opencensus.common.Scope)1 AttributeValue (io.opencensus.trace.AttributeValue)1 Tracer (io.opencensus.trace.Tracer)1 Tracing (io.opencensus.trace.Tracing)1 Serializable (java.io.Serializable)1 Map (java.util.Map)1 Optional (java.util.Optional)1