use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class PCollectionViews method computeOverlappingRanges.
@VisibleForTesting
static SortedMap<OffsetRange, Integer> computeOverlappingRanges(Iterable<OffsetRange> ranges) {
ImmutableSortedMap.Builder<OffsetRange, Integer> rval = ImmutableSortedMap.orderedBy(OffsetRangeComparator.INSTANCE);
List<OffsetRange> sortedRanges = Lists.newArrayList(ranges);
if (sortedRanges.isEmpty()) {
return rval.build();
}
Collections.sort(sortedRanges, OffsetRangeComparator.INSTANCE);
// Stores ranges in smallest 'from' and then smallest 'to' order
// e.g. [2, 7), [3, 4), [3, 5), [3, 5), [3, 6), [4, 0)
PriorityQueue<OffsetRange> rangesWithSameFrom = new PriorityQueue<>(OffsetRangeComparator.INSTANCE);
Iterator<OffsetRange> iterator = sortedRanges.iterator();
// Stored in reverse sorted order so that when we iterate and re-add them back to
// overlappingRanges they are stored in sorted order from smallest to largest range.to
List<OffsetRange> rangesToProcess = new ArrayList<>();
while (iterator.hasNext()) {
OffsetRange current = iterator.next();
// Skip empty ranges
if (current.getFrom() == current.getTo()) {
continue;
}
// ranges in [rangesWithSameFrom.from, current.from)
while (!rangesWithSameFrom.isEmpty() && rangesWithSameFrom.peek().getFrom() != current.getFrom()) {
rangesToProcess.addAll(rangesWithSameFrom);
Collections.sort(rangesToProcess, OffsetRangeComparator.INSTANCE);
rangesWithSameFrom.clear();
int i = 0;
long lastTo = rangesToProcess.get(i).getFrom();
// [5, 6) := 1
for (; i < rangesToProcess.size(); ++i) {
if (rangesToProcess.get(i).getTo() > current.getFrom()) {
break;
}
// Output only the first of any subsequent duplicate ranges
if (i == 0 || rangesToProcess.get(i - 1).getTo() != rangesToProcess.get(i).getTo()) {
rval.put(new OffsetRange(lastTo, rangesToProcess.get(i).getTo()), rangesToProcess.size() - i);
lastTo = rangesToProcess.get(i).getTo();
}
}
// current.from) if it is non-empty
if (lastTo < current.getFrom() && i != rangesToProcess.size()) {
rval.put(new OffsetRange(lastTo, current.getFrom()), rangesToProcess.size() - i);
}
// with current so add them back to rangesWithSameFrom with the updated 'from'
for (; i < rangesToProcess.size(); ++i) {
rangesWithSameFrom.add(new OffsetRange(current.getFrom(), rangesToProcess.get(i).getTo()));
}
rangesToProcess.clear();
}
rangesWithSameFrom.add(current);
}
// Process the last chunk of overlapping ranges
while (!rangesWithSameFrom.isEmpty()) {
// This range always represents the range with with the smallest 'to'
OffsetRange current = rangesWithSameFrom.remove();
rangesToProcess.addAll(rangesWithSameFrom);
Collections.sort(rangesToProcess, OffsetRangeComparator.INSTANCE);
rangesWithSameFrom.clear();
rval.put(current, rangesToProcess.size() + 1);
// Shorten all the remaining ranges such that they start with current.to
for (OffsetRange rangeWithDifferentFrom : rangesToProcess) {
// Skip any duplicates of current
if (rangeWithDifferentFrom.getTo() > current.getTo()) {
rangesWithSameFrom.add(new OffsetRange(current.getTo(), rangeWithDifferentFrom.getTo()));
}
}
rangesToProcess.clear();
}
return rval.build();
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class GcsUtil method makeGetBatches.
/**
* Makes get {@link BatchInterface BatchInterfaces}.
*
* @param paths {@link GcsPath GcsPaths}.
* @param results mutable {@link List} for return values.
* @return {@link BatchInterface BatchInterfaces} to execute.
* @throws IOException
*/
@VisibleForTesting
List<BatchInterface> makeGetBatches(Collection<GcsPath> paths, List<StorageObjectOrIOException[]> results) throws IOException {
List<BatchInterface> batches = new ArrayList<>();
for (List<GcsPath> filesToGet : Lists.partition(Lists.newArrayList(paths), MAX_REQUESTS_PER_BATCH)) {
BatchInterface batch = batchRequestSupplier.get();
for (GcsPath path : filesToGet) {
results.add(enqueueGetFileSize(path, batch));
}
batches.add(batch);
}
return batches;
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class GcsUtil method open.
/**
* Opens an object in GCS.
*
* <p>Returns a SeekableByteChannel that provides access to data in the bucket.
*
* @param path the GCS filename to read from
* @param readOptions Fine-grained options for behaviors of retries, buffering, etc.
* @return a SeekableByteChannel that can read the object data
*/
@VisibleForTesting
SeekableByteChannel open(GcsPath path, GoogleCloudStorageReadOptions readOptions) throws IOException {
HashMap<String, String> baseLabels = new HashMap<>();
baseLabels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
baseLabels.put(MonitoringInfoConstants.Labels.SERVICE, "Storage");
baseLabels.put(MonitoringInfoConstants.Labels.METHOD, "GcsGet");
baseLabels.put(MonitoringInfoConstants.Labels.RESOURCE, GcpResourceIdentifiers.cloudStorageBucket(path.getBucket()));
baseLabels.put(MonitoringInfoConstants.Labels.GCS_PROJECT_ID, googleCloudStorageOptions.getProjectId());
baseLabels.put(MonitoringInfoConstants.Labels.GCS_BUCKET, path.getBucket());
ServiceCallMetric serviceCallMetric = new ServiceCallMetric(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, baseLabels);
try {
SeekableByteChannel channel = googleCloudStorage.open(new StorageResourceId(path.getBucket(), path.getObject()), readOptions);
serviceCallMetric.call("ok");
return channel;
} catch (IOException e) {
if (e.getCause() instanceof GoogleJsonResponseException) {
serviceCallMetric.call(((GoogleJsonResponseException) e.getCause()).getDetails().getCode());
}
throw e;
}
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class GcsUtil method createBucket.
@VisibleForTesting
void createBucket(String projectId, Bucket bucket, BackOff backoff, Sleeper sleeper) throws IOException {
Storage.Buckets.Insert insertBucket = storageClient.buckets().insert(projectId, bucket);
insertBucket.setPredefinedAcl("projectPrivate");
insertBucket.setPredefinedDefaultObjectAcl("projectPrivate");
try {
ResilientOperation.retry(insertBucket::execute, backoff, new RetryDeterminer<IOException>() {
@Override
public boolean shouldRetry(IOException e) {
if (errorExtractor.itemAlreadyExists(e) || errorExtractor.accessDenied(e)) {
return false;
}
return RetryDeterminer.SOCKET_ERRORS.shouldRetry(e);
}
}, IOException.class, sleeper);
return;
} catch (GoogleJsonResponseException e) {
if (errorExtractor.accessDenied(e)) {
throw new AccessDeniedException(bucket.getName(), null, e.getMessage());
}
if (errorExtractor.itemAlreadyExists(e)) {
throw new FileAlreadyExistsException(bucket.getName(), null, e.getMessage());
}
throw e;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException(String.format("Error while attempting to create bucket gs://%s for project %s", bucket.getName(), projectId), e);
}
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class BeamIOPushDownRule method isProjectRenameOnlyProgram.
/**
* Determine whether a program only performs renames and/or projects. RexProgram#isTrivial is not
* sufficient in this case, because number of projects does not need to be the same as inputs.
* Calc should NOT be dropped in the following cases:<br>
* 1. Projected fields are manipulated (ex: 'select field1+10').<br>
* 2. When the same field projected more than once.<br>
* 3. When an IO does not supports field reordering and projects fields in a different (from
* schema) order.
*
* @param program A program to check.
* @param projectReorderingSupported Whether project push-down supports field reordering.
* @return True when program performs only projects (w/o any modifications), false otherwise.
*/
@VisibleForTesting
boolean isProjectRenameOnlyProgram(RexProgram program, boolean projectReorderingSupported) {
int fieldCount = program.getInputRowType().getFieldCount();
Set<Integer> projectIndex = new HashSet<>();
int previousIndex = -1;
for (RexLocalRef ref : program.getProjectList()) {
int index = ref.getIndex();
if (// Projected values are InputRefs.
index >= fieldCount || // Each field projected once.
!projectIndex.add(ref.getIndex()) || (!projectReorderingSupported && index <= previousIndex)) {
// In the same order.
return false;
}
previousIndex = index;
}
return true;
}
Aggregations