use of org.apache.beam.sdk.io.range.OffsetRange in project beam by apache.
the class GrowableOffsetRangeTrackerTest method testSmallRangeWithLargeValue.
@Test
public void testSmallRangeWithLargeValue() throws Exception {
SimpleEstimator simpleEstimator = new SimpleEstimator();
GrowableOffsetRangeTracker tracker = new GrowableOffsetRangeTracker(123456789012345677L, simpleEstimator);
assertTrue(tracker.tryClaim(123456789012345677L));
simpleEstimator.setEstimateRangeEnd(123456789012345679L);
SplitResult res = tracker.trySplit(0.5);
assertEquals(new OffsetRange(123456789012345677L, 123456789012345678L), res.getPrimary());
assertEquals(new OffsetRange(123456789012345678L, Long.MAX_VALUE), res.getResidual());
tracker = new GrowableOffsetRangeTracker(123456789012345681L, simpleEstimator);
assertTrue(tracker.tryClaim(123456789012345681L));
simpleEstimator.setEstimateRangeEnd(123456789012345683L);
res = tracker.trySplit(0.5);
assertEquals(new OffsetRange(123456789012345681L, 123456789012345682L), res.getPrimary());
assertEquals(new OffsetRange(123456789012345682L, Long.MAX_VALUE), res.getResidual());
}
use of org.apache.beam.sdk.io.range.OffsetRange in project beam by apache.
the class PCollectionViewsTest method testRandomRanges.
@Test
public void testRandomRanges() {
Random random = // use an arbitrary seed to make this test deterministic
new Random(123892154890L);
for (int i = 0; i < 1000; ++i) {
List<OffsetRange> ranges = new ArrayList<>();
for (int j = 0; j < 20; ++j) {
long start = random.nextInt(10);
ranges.add(range(start, start + random.nextInt(10) + 1));
Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition = computeOverlappingRanges(ranges);
assertNonEmptyRangesAndPositions(ranges, nonOverlappingRangesToNumElementsPerPosition);
}
}
}
use of org.apache.beam.sdk.io.range.OffsetRange in project beam by apache.
the class PCollectionViews method computeOverlappingRanges.
@VisibleForTesting
static SortedMap<OffsetRange, Integer> computeOverlappingRanges(Iterable<OffsetRange> ranges) {
ImmutableSortedMap.Builder<OffsetRange, Integer> rval = ImmutableSortedMap.orderedBy(OffsetRangeComparator.INSTANCE);
List<OffsetRange> sortedRanges = Lists.newArrayList(ranges);
if (sortedRanges.isEmpty()) {
return rval.build();
}
Collections.sort(sortedRanges, OffsetRangeComparator.INSTANCE);
// Stores ranges in smallest 'from' and then smallest 'to' order
// e.g. [2, 7), [3, 4), [3, 5), [3, 5), [3, 6), [4, 0)
PriorityQueue<OffsetRange> rangesWithSameFrom = new PriorityQueue<>(OffsetRangeComparator.INSTANCE);
Iterator<OffsetRange> iterator = sortedRanges.iterator();
// Stored in reverse sorted order so that when we iterate and re-add them back to
// overlappingRanges they are stored in sorted order from smallest to largest range.to
List<OffsetRange> rangesToProcess = new ArrayList<>();
while (iterator.hasNext()) {
OffsetRange current = iterator.next();
// Skip empty ranges
if (current.getFrom() == current.getTo()) {
continue;
}
// ranges in [rangesWithSameFrom.from, current.from)
while (!rangesWithSameFrom.isEmpty() && rangesWithSameFrom.peek().getFrom() != current.getFrom()) {
rangesToProcess.addAll(rangesWithSameFrom);
Collections.sort(rangesToProcess, OffsetRangeComparator.INSTANCE);
rangesWithSameFrom.clear();
int i = 0;
long lastTo = rangesToProcess.get(i).getFrom();
// [5, 6) := 1
for (; i < rangesToProcess.size(); ++i) {
if (rangesToProcess.get(i).getTo() > current.getFrom()) {
break;
}
// Output only the first of any subsequent duplicate ranges
if (i == 0 || rangesToProcess.get(i - 1).getTo() != rangesToProcess.get(i).getTo()) {
rval.put(new OffsetRange(lastTo, rangesToProcess.get(i).getTo()), rangesToProcess.size() - i);
lastTo = rangesToProcess.get(i).getTo();
}
}
// current.from) if it is non-empty
if (lastTo < current.getFrom() && i != rangesToProcess.size()) {
rval.put(new OffsetRange(lastTo, current.getFrom()), rangesToProcess.size() - i);
}
// with current so add them back to rangesWithSameFrom with the updated 'from'
for (; i < rangesToProcess.size(); ++i) {
rangesWithSameFrom.add(new OffsetRange(current.getFrom(), rangesToProcess.get(i).getTo()));
}
rangesToProcess.clear();
}
rangesWithSameFrom.add(current);
}
// Process the last chunk of overlapping ranges
while (!rangesWithSameFrom.isEmpty()) {
// This range always represents the range with with the smallest 'to'
OffsetRange current = rangesWithSameFrom.remove();
rangesToProcess.addAll(rangesWithSameFrom);
Collections.sort(rangesToProcess, OffsetRangeComparator.INSTANCE);
rangesWithSameFrom.clear();
rval.put(current, rangesToProcess.size() + 1);
// Shorten all the remaining ranges such that they start with current.to
for (OffsetRange rangeWithDifferentFrom : rangesToProcess) {
// Skip any duplicates of current
if (rangeWithDifferentFrom.getTo() > current.getTo()) {
rangesWithSameFrom.add(new OffsetRange(current.getTo(), rangeWithDifferentFrom.getTo()));
}
}
rangesToProcess.clear();
}
return rval.build();
}
use of org.apache.beam.sdk.io.range.OffsetRange in project beam by apache.
the class ReadChangeStreamPartitionRangeTrackerTest method testTryClaim.
@Test
public void testTryClaim() {
final PartitionMetadata partition = mock(PartitionMetadata.class);
final OffsetRange range = new OffsetRange(100, 200);
final ReadChangeStreamPartitionRangeTracker tracker = new ReadChangeStreamPartitionRangeTracker(partition, range);
assertEquals(range, tracker.currentRestriction());
assertTrue(tracker.tryClaim(100L));
assertTrue(tracker.tryClaim(100L));
assertTrue(tracker.tryClaim(150L));
assertTrue(tracker.tryClaim(199L));
assertFalse(tracker.tryClaim(200L));
}
use of org.apache.beam.sdk.io.range.OffsetRange in project beam by apache.
the class ParquetIOTest method testSplitBlockWithLimit.
@Test
public void testSplitBlockWithLimit() {
ParquetIO.ReadFiles.SplitReadFn<GenericRecord> testFn = new ParquetIO.ReadFiles.SplitReadFn<>(null, null, ParquetIO.GenericRecordPassthroughFn.create(), null);
ArrayList<BlockMetaData> blockList = new ArrayList<>();
ArrayList<OffsetRange> rangeList;
BlockMetaData testBlock = mock(BlockMetaData.class);
when(testBlock.getTotalByteSize()).thenReturn((long) 60);
rangeList = testFn.splitBlockWithLimit(0, blockList.size(), blockList, 200);
assertTrue(rangeList.isEmpty());
for (int i = 0; i < 6; i++) {
blockList.add(testBlock);
}
rangeList = testFn.splitBlockWithLimit(1, blockList.size(), blockList, 200);
assertEquals(1L, rangeList.get(0).getFrom());
assertEquals(5L, rangeList.get(0).getTo());
assertEquals(5L, rangeList.get(1).getFrom());
assertEquals(6L, rangeList.get(1).getTo());
assertEquals(2L, rangeList.size());
}
Aggregations