Search in sources :

Example 31 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class PCollectionViews method computeOverlappingRanges.

@VisibleForTesting
static SortedMap<OffsetRange, Integer> computeOverlappingRanges(Iterable<OffsetRange> ranges) {
    ImmutableSortedMap.Builder<OffsetRange, Integer> rval = ImmutableSortedMap.orderedBy(OffsetRangeComparator.INSTANCE);
    List<OffsetRange> sortedRanges = Lists.newArrayList(ranges);
    if (sortedRanges.isEmpty()) {
        return rval.build();
    }
    Collections.sort(sortedRanges, OffsetRangeComparator.INSTANCE);
    // Stores ranges in smallest 'from' and then smallest 'to' order
    // e.g. [2, 7), [3, 4), [3, 5), [3, 5), [3, 6), [4, 0)
    PriorityQueue<OffsetRange> rangesWithSameFrom = new PriorityQueue<>(OffsetRangeComparator.INSTANCE);
    Iterator<OffsetRange> iterator = sortedRanges.iterator();
    // Stored in reverse sorted order so that when we iterate and re-add them back to
    // overlappingRanges they are stored in sorted order from smallest to largest range.to
    List<OffsetRange> rangesToProcess = new ArrayList<>();
    while (iterator.hasNext()) {
        OffsetRange current = iterator.next();
        // Skip empty ranges
        if (current.getFrom() == current.getTo()) {
            continue;
        }
        // ranges in [rangesWithSameFrom.from, current.from)
        while (!rangesWithSameFrom.isEmpty() && rangesWithSameFrom.peek().getFrom() != current.getFrom()) {
            rangesToProcess.addAll(rangesWithSameFrom);
            Collections.sort(rangesToProcess, OffsetRangeComparator.INSTANCE);
            rangesWithSameFrom.clear();
            int i = 0;
            long lastTo = rangesToProcess.get(i).getFrom();
            // [5, 6) := 1
            for (; i < rangesToProcess.size(); ++i) {
                if (rangesToProcess.get(i).getTo() > current.getFrom()) {
                    break;
                }
                // Output only the first of any subsequent duplicate ranges
                if (i == 0 || rangesToProcess.get(i - 1).getTo() != rangesToProcess.get(i).getTo()) {
                    rval.put(new OffsetRange(lastTo, rangesToProcess.get(i).getTo()), rangesToProcess.size() - i);
                    lastTo = rangesToProcess.get(i).getTo();
                }
            }
            // current.from) if it is non-empty
            if (lastTo < current.getFrom() && i != rangesToProcess.size()) {
                rval.put(new OffsetRange(lastTo, current.getFrom()), rangesToProcess.size() - i);
            }
            // with current so add them back to rangesWithSameFrom with the updated 'from'
            for (; i < rangesToProcess.size(); ++i) {
                rangesWithSameFrom.add(new OffsetRange(current.getFrom(), rangesToProcess.get(i).getTo()));
            }
            rangesToProcess.clear();
        }
        rangesWithSameFrom.add(current);
    }
    // Process the last chunk of overlapping ranges
    while (!rangesWithSameFrom.isEmpty()) {
        // This range always represents the range with with the smallest 'to'
        OffsetRange current = rangesWithSameFrom.remove();
        rangesToProcess.addAll(rangesWithSameFrom);
        Collections.sort(rangesToProcess, OffsetRangeComparator.INSTANCE);
        rangesWithSameFrom.clear();
        rval.put(current, rangesToProcess.size() + 1);
        // Shorten all the remaining ranges such that they start with current.to
        for (OffsetRange rangeWithDifferentFrom : rangesToProcess) {
            // Skip any duplicates of current
            if (rangeWithDifferentFrom.getTo() > current.getTo()) {
                rangesWithSameFrom.add(new OffsetRange(current.getTo(), rangeWithDifferentFrom.getTo()));
            }
        }
        rangesToProcess.clear();
    }
    return rval.build();
}
Also used : OffsetRange(org.apache.beam.sdk.io.range.OffsetRange) ArrayList(java.util.ArrayList) ImmutableSortedMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSortedMap) PriorityQueue(java.util.PriorityQueue) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 32 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class GcsUtil method makeGetBatches.

/**
 * Makes get {@link BatchInterface BatchInterfaces}.
 *
 * @param paths {@link GcsPath GcsPaths}.
 * @param results mutable {@link List} for return values.
 * @return {@link BatchInterface BatchInterfaces} to execute.
 * @throws IOException
 */
@VisibleForTesting
List<BatchInterface> makeGetBatches(Collection<GcsPath> paths, List<StorageObjectOrIOException[]> results) throws IOException {
    List<BatchInterface> batches = new ArrayList<>();
    for (List<GcsPath> filesToGet : Lists.partition(Lists.newArrayList(paths), MAX_REQUESTS_PER_BATCH)) {
        BatchInterface batch = batchRequestSupplier.get();
        for (GcsPath path : filesToGet) {
            results.add(enqueueGetFileSize(path, batch));
        }
        batches.add(batch);
    }
    return batches;
}
Also used : ArrayList(java.util.ArrayList) GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 33 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class GcsUtil method open.

/**
 * Opens an object in GCS.
 *
 * <p>Returns a SeekableByteChannel that provides access to data in the bucket.
 *
 * @param path the GCS filename to read from
 * @param readOptions Fine-grained options for behaviors of retries, buffering, etc.
 * @return a SeekableByteChannel that can read the object data
 */
@VisibleForTesting
SeekableByteChannel open(GcsPath path, GoogleCloudStorageReadOptions readOptions) throws IOException {
    HashMap<String, String> baseLabels = new HashMap<>();
    baseLabels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
    baseLabels.put(MonitoringInfoConstants.Labels.SERVICE, "Storage");
    baseLabels.put(MonitoringInfoConstants.Labels.METHOD, "GcsGet");
    baseLabels.put(MonitoringInfoConstants.Labels.RESOURCE, GcpResourceIdentifiers.cloudStorageBucket(path.getBucket()));
    baseLabels.put(MonitoringInfoConstants.Labels.GCS_PROJECT_ID, googleCloudStorageOptions.getProjectId());
    baseLabels.put(MonitoringInfoConstants.Labels.GCS_BUCKET, path.getBucket());
    ServiceCallMetric serviceCallMetric = new ServiceCallMetric(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, baseLabels);
    try {
        SeekableByteChannel channel = googleCloudStorage.open(new StorageResourceId(path.getBucket(), path.getObject()), readOptions);
        serviceCallMetric.call("ok");
        return channel;
    } catch (IOException e) {
        if (e.getCause() instanceof GoogleJsonResponseException) {
            serviceCallMetric.call(((GoogleJsonResponseException) e.getCause()).getDetails().getCode());
        }
        throw e;
    }
}
Also used : SeekableByteChannel(java.nio.channels.SeekableByteChannel) GoogleJsonResponseException(com.google.api.client.googleapis.json.GoogleJsonResponseException) HashMap(java.util.HashMap) IOException(java.io.IOException) StorageResourceId(com.google.cloud.hadoop.gcsio.StorageResourceId) ServiceCallMetric(org.apache.beam.runners.core.metrics.ServiceCallMetric) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 34 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class GcsUtil method createBucket.

@VisibleForTesting
void createBucket(String projectId, Bucket bucket, BackOff backoff, Sleeper sleeper) throws IOException {
    Storage.Buckets.Insert insertBucket = storageClient.buckets().insert(projectId, bucket);
    insertBucket.setPredefinedAcl("projectPrivate");
    insertBucket.setPredefinedDefaultObjectAcl("projectPrivate");
    try {
        ResilientOperation.retry(insertBucket::execute, backoff, new RetryDeterminer<IOException>() {

            @Override
            public boolean shouldRetry(IOException e) {
                if (errorExtractor.itemAlreadyExists(e) || errorExtractor.accessDenied(e)) {
                    return false;
                }
                return RetryDeterminer.SOCKET_ERRORS.shouldRetry(e);
            }
        }, IOException.class, sleeper);
        return;
    } catch (GoogleJsonResponseException e) {
        if (errorExtractor.accessDenied(e)) {
            throw new AccessDeniedException(bucket.getName(), null, e.getMessage());
        }
        if (errorExtractor.itemAlreadyExists(e)) {
            throw new FileAlreadyExistsException(bucket.getName(), null, e.getMessage());
        }
        throw e;
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new IOException(String.format("Error while attempting to create bucket gs://%s for project %s", bucket.getName(), projectId), e);
    }
}
Also used : GoogleJsonResponseException(com.google.api.client.googleapis.json.GoogleJsonResponseException) AccessDeniedException(java.nio.file.AccessDeniedException) FileAlreadyExistsException(java.nio.file.FileAlreadyExistsException) IOException(java.io.IOException) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 35 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class BeamIOPushDownRule method isProjectRenameOnlyProgram.

/**
 * Determine whether a program only performs renames and/or projects. RexProgram#isTrivial is not
 * sufficient in this case, because number of projects does not need to be the same as inputs.
 * Calc should NOT be dropped in the following cases:<br>
 * 1. Projected fields are manipulated (ex: 'select field1+10').<br>
 * 2. When the same field projected more than once.<br>
 * 3. When an IO does not supports field reordering and projects fields in a different (from
 * schema) order.
 *
 * @param program A program to check.
 * @param projectReorderingSupported Whether project push-down supports field reordering.
 * @return True when program performs only projects (w/o any modifications), false otherwise.
 */
@VisibleForTesting
boolean isProjectRenameOnlyProgram(RexProgram program, boolean projectReorderingSupported) {
    int fieldCount = program.getInputRowType().getFieldCount();
    Set<Integer> projectIndex = new HashSet<>();
    int previousIndex = -1;
    for (RexLocalRef ref : program.getProjectList()) {
        int index = ref.getIndex();
        if (// Projected values are InputRefs.
        index >= fieldCount || // Each field projected once.
        !projectIndex.add(ref.getIndex()) || (!projectReorderingSupported && index <= previousIndex)) {
            // In the same order.
            return false;
        }
        previousIndex = index;
    }
    return true;
}
Also used : RexLocalRef(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexLocalRef) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Aggregations

VisibleForTesting (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)81 ArrayList (java.util.ArrayList)18 IOException (java.io.IOException)17 ParameterizedType (java.lang.reflect.ParameterizedType)15 Type (java.lang.reflect.Type)15 Parameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter)14 BundleFinalizerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.BundleFinalizerParameter)14 PipelineOptionsParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.PipelineOptionsParameter)14 RestrictionParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter)14 RestrictionTrackerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter)14 SchemaElementParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter)14 StateParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.StateParameter)14 TimerFamilyParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerFamilyParameter)14 TimerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerParameter)14 WatermarkEstimatorParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorParameter)14 WatermarkEstimatorStateParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorStateParameter)14 WindowParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WindowParameter)14 TypeParameter (org.apache.beam.sdk.values.TypeParameter)14 DoFn (org.apache.beam.sdk.transforms.DoFn)10 Map (java.util.Map)7