Search in sources :

Example 56 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class S3FileSystem method atomicCopy.

@VisibleForTesting
CopyObjectResult atomicCopy(S3ResourceId sourcePath, S3ResourceId destinationPath, ObjectMetadata sourceObjectMetadata) throws AmazonClientException {
    CopyObjectRequest copyObjectRequest = new CopyObjectRequest(sourcePath.getBucket(), sourcePath.getKey(), destinationPath.getBucket(), destinationPath.getKey());
    copyObjectRequest.setNewObjectMetadata(sourceObjectMetadata);
    copyObjectRequest.setStorageClass(config.getS3StorageClass());
    copyObjectRequest.setSourceSSECustomerKey(config.getSSECustomerKey());
    copyObjectRequest.setDestinationSSECustomerKey(config.getSSECustomerKey());
    return amazonS3.get().copyObject(copyObjectRequest);
}
Also used : CopyObjectRequest(com.amazonaws.services.s3.model.CopyObjectRequest) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 57 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class S3FileSystem method matchGlobPaths.

/**
 * Gets {@link MatchResult} representing all objects that match wildcard-containing paths.
 */
@VisibleForTesting
List<MatchResult> matchGlobPaths(Collection<S3ResourceId> globPaths) throws IOException {
    List<Callable<ExpandedGlob>> expandTasks = new ArrayList<>(globPaths.size());
    for (final S3ResourceId path : globPaths) {
        expandTasks.add(() -> expandGlob(path));
    }
    Map<S3ResourceId, ExpandedGlob> expandedGlobByGlobPath = new HashMap<>();
    List<Callable<PathWithEncoding>> contentTypeTasks = new ArrayList<>(globPaths.size());
    for (ExpandedGlob expandedGlob : callTasks(expandTasks)) {
        expandedGlobByGlobPath.put(expandedGlob.getGlobPath(), expandedGlob);
        if (expandedGlob.getExpandedPaths() != null) {
            for (final S3ResourceId path : expandedGlob.getExpandedPaths()) {
                contentTypeTasks.add(() -> getPathContentEncoding(path));
            }
        }
    }
    Map<S3ResourceId, PathWithEncoding> exceptionByPath = new HashMap<>();
    for (PathWithEncoding pathWithException : callTasks(contentTypeTasks)) {
        exceptionByPath.put(pathWithException.getPath(), pathWithException);
    }
    List<MatchResult> results = new ArrayList<>(globPaths.size());
    for (S3ResourceId globPath : globPaths) {
        ExpandedGlob expandedGlob = expandedGlobByGlobPath.get(globPath);
        if (expandedGlob.getException() != null) {
            results.add(MatchResult.create(MatchResult.Status.ERROR, expandedGlob.getException()));
        } else {
            List<MatchResult.Metadata> metadatas = new ArrayList<>();
            IOException exception = null;
            for (S3ResourceId expandedPath : expandedGlob.getExpandedPaths()) {
                PathWithEncoding pathWithEncoding = exceptionByPath.get(expandedPath);
                if (pathWithEncoding.getException() != null) {
                    exception = pathWithEncoding.getException();
                    break;
                } else {
                    // TODO(BEAM-11821): Support file checksum in this method.
                    metadatas.add(createBeamMetadata(pathWithEncoding.getPath(), pathWithEncoding.getContentEncoding(), null));
                }
            }
            if (exception != null) {
                if (exception instanceof FileNotFoundException) {
                    results.add(MatchResult.create(MatchResult.Status.NOT_FOUND, exception));
                } else {
                    results.add(MatchResult.create(MatchResult.Status.ERROR, exception));
                }
            } else {
                results.add(MatchResult.create(MatchResult.Status.OK, metadatas));
            }
        }
    }
    return ImmutableList.copyOf(results);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ObjectMetadata(com.amazonaws.services.s3.model.ObjectMetadata) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) MatchResult(org.apache.beam.sdk.io.fs.MatchResult) Callable(java.util.concurrent.Callable) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 58 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class S3FileSystem method multipartCopy.

@VisibleForTesting
CompleteMultipartUploadResult multipartCopy(S3ResourceId sourcePath, S3ResourceId destinationPath, ObjectMetadata sourceObjectMetadata) throws AmazonClientException {
    InitiateMultipartUploadRequest initiateUploadRequest = new InitiateMultipartUploadRequest(destinationPath.getBucket(), destinationPath.getKey()).withStorageClass(config.getS3StorageClass()).withObjectMetadata(sourceObjectMetadata);
    initiateUploadRequest.setSSECustomerKey(config.getSSECustomerKey());
    InitiateMultipartUploadResult initiateUploadResult = amazonS3.get().initiateMultipartUpload(initiateUploadRequest);
    final String uploadId = initiateUploadResult.getUploadId();
    List<PartETag> eTags = new ArrayList<>();
    final long objectSize = sourceObjectMetadata.getContentLength();
    // without using S3FileSystem.copy in the future
    if (objectSize == 0) {
        final CopyPartRequest copyPartRequest = new CopyPartRequest().withSourceBucketName(sourcePath.getBucket()).withSourceKey(sourcePath.getKey()).withDestinationBucketName(destinationPath.getBucket()).withDestinationKey(destinationPath.getKey()).withUploadId(uploadId).withPartNumber(1);
        copyPartRequest.setSourceSSECustomerKey(config.getSSECustomerKey());
        copyPartRequest.setDestinationSSECustomerKey(config.getSSECustomerKey());
        CopyPartResult copyPartResult = amazonS3.get().copyPart(copyPartRequest);
        eTags.add(copyPartResult.getPartETag());
    } else {
        long bytePosition = 0;
        Integer uploadBufferSizeBytes = config.getS3UploadBufferSizeBytes();
        // Amazon parts are 1-indexed, not zero-indexed.
        for (int partNumber = 1; bytePosition < objectSize; partNumber++) {
            final CopyPartRequest copyPartRequest = new CopyPartRequest().withSourceBucketName(sourcePath.getBucket()).withSourceKey(sourcePath.getKey()).withDestinationBucketName(destinationPath.getBucket()).withDestinationKey(destinationPath.getKey()).withUploadId(uploadId).withPartNumber(partNumber).withFirstByte(bytePosition).withLastByte(Math.min(objectSize - 1, bytePosition + uploadBufferSizeBytes - 1));
            copyPartRequest.setSourceSSECustomerKey(config.getSSECustomerKey());
            copyPartRequest.setDestinationSSECustomerKey(config.getSSECustomerKey());
            CopyPartResult copyPartResult = amazonS3.get().copyPart(copyPartRequest);
            eTags.add(copyPartResult.getPartETag());
            bytePosition += uploadBufferSizeBytes;
        }
    }
    CompleteMultipartUploadRequest completeUploadRequest = new CompleteMultipartUploadRequest().withBucketName(destinationPath.getBucket()).withKey(destinationPath.getKey()).withUploadId(uploadId).withPartETags(eTags);
    return amazonS3.get().completeMultipartUpload(completeUploadRequest);
}
Also used : CopyPartRequest(com.amazonaws.services.s3.model.CopyPartRequest) InitiateMultipartUploadResult(com.amazonaws.services.s3.model.InitiateMultipartUploadResult) InitiateMultipartUploadRequest(com.amazonaws.services.s3.model.InitiateMultipartUploadRequest) ArrayList(java.util.ArrayList) CopyPartResult(com.amazonaws.services.s3.model.CopyPartResult) PartETag(com.amazonaws.services.s3.model.PartETag) CompleteMultipartUploadRequest(com.amazonaws.services.s3.model.CompleteMultipartUploadRequest) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 59 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class FnApiDoFnRunner method constructSplitResult.

@VisibleForTesting
static <WatermarkEstimatorStateT> HandlesSplits.SplitResult constructSplitResult(WindowedSplitResult windowedSplitResult, HandlesSplits.SplitResult downstreamElementSplit, Coder fullInputCoder, Instant initialWatermark, KV<Instant, WatermarkEstimatorStateT> watermarkAndState, String pTransformId, String mainInputId, Collection<String> outputIds, Duration resumeDelay) {
    // The element split cannot from both windowedSplitResult and downstreamElementSplit.
    checkArgument((windowedSplitResult == null || windowedSplitResult.getResidualSplitRoot() == null) || downstreamElementSplit == null);
    List<BundleApplication> primaryRoots = new ArrayList<>();
    List<DelayedBundleApplication> residualRoots = new ArrayList<>();
    // Encode window splits.
    if (windowedSplitResult != null && windowedSplitResult.getPrimaryInFullyProcessedWindowsRoot() != null) {
        ByteString.Output primaryInOtherWindowsBytes = ByteString.newOutput();
        try {
            fullInputCoder.encode(windowedSplitResult.getPrimaryInFullyProcessedWindowsRoot(), primaryInOtherWindowsBytes);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        BundleApplication.Builder primaryApplicationInOtherWindows = BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(primaryInOtherWindowsBytes.toByteString());
        primaryRoots.add(primaryApplicationInOtherWindows.build());
    }
    if (windowedSplitResult != null && windowedSplitResult.getResidualInUnprocessedWindowsRoot() != null) {
        ByteString.Output bytesOut = ByteString.newOutput();
        try {
            fullInputCoder.encode(windowedSplitResult.getResidualInUnprocessedWindowsRoot(), bytesOut);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        BundleApplication.Builder residualInUnprocessedWindowsRoot = BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(bytesOut.toByteString());
        // We don't want to change the output watermarks or set the checkpoint resume time since
        // that applies to the current window.
        Map<String, org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp> outputWatermarkMapForUnprocessedWindows = new HashMap<>();
        if (!initialWatermark.equals(GlobalWindow.TIMESTAMP_MIN_VALUE)) {
            org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp outputWatermark = org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp.newBuilder().setSeconds(initialWatermark.getMillis() / 1000).setNanos((int) (initialWatermark.getMillis() % 1000) * 1000000).build();
            for (String outputId : outputIds) {
                outputWatermarkMapForUnprocessedWindows.put(outputId, outputWatermark);
            }
        }
        residualInUnprocessedWindowsRoot.putAllOutputWatermarks(outputWatermarkMapForUnprocessedWindows);
        residualRoots.add(DelayedBundleApplication.newBuilder().setApplication(residualInUnprocessedWindowsRoot).build());
    }
    ByteString.Output primaryBytes = ByteString.newOutput();
    ByteString.Output residualBytes = ByteString.newOutput();
    // that there is no element split.
    if (windowedSplitResult != null && windowedSplitResult.getResidualSplitRoot() != null) {
        // When there is element split in windowedSplitResult, the resumeDelay should not be null.
        checkNotNull(resumeDelay);
        try {
            fullInputCoder.encode(windowedSplitResult.getPrimarySplitRoot(), primaryBytes);
            fullInputCoder.encode(windowedSplitResult.getResidualSplitRoot(), residualBytes);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        primaryRoots.add(BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(primaryBytes.toByteString()).build());
        BundleApplication.Builder residualApplication = BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(residualBytes.toByteString());
        Map<String, org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp> outputWatermarkMap = new HashMap<>();
        if (!watermarkAndState.getKey().equals(GlobalWindow.TIMESTAMP_MIN_VALUE)) {
            org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp outputWatermark = org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp.newBuilder().setSeconds(watermarkAndState.getKey().getMillis() / 1000).setNanos((int) (watermarkAndState.getKey().getMillis() % 1000) * 1000000).build();
            for (String outputId : outputIds) {
                outputWatermarkMap.put(outputId, outputWatermark);
            }
        }
        residualApplication.putAllOutputWatermarks(outputWatermarkMap);
        residualRoots.add(DelayedBundleApplication.newBuilder().setApplication(residualApplication).setRequestedTimeDelay(Durations.fromMillis(resumeDelay.getMillis())).build());
    } else if (downstreamElementSplit != null) {
        primaryRoots.add(Iterables.getOnlyElement(downstreamElementSplit.getPrimaryRoots()));
        residualRoots.add(Iterables.getOnlyElement(downstreamElementSplit.getResidualRoots()));
    }
    return HandlesSplits.SplitResult.of(primaryRoots, residualRoots);
}
Also used : DelayedBundleApplication(org.apache.beam.model.fnexecution.v1.BeamFnApi.DelayedBundleApplication) BundleApplication(org.apache.beam.model.fnexecution.v1.BeamFnApi.BundleApplication) HashMap(java.util.HashMap) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) DelayedBundleApplication(org.apache.beam.model.fnexecution.v1.BeamFnApi.DelayedBundleApplication) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 60 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class FnHarness method main.

@VisibleForTesting
public static void main(Function<String, String> environmentVarGetter) throws Exception {
    JvmInitializers.runOnStartup();
    System.out.format("SDK Fn Harness started%n");
    System.out.format("Harness ID %s%n", environmentVarGetter.apply(HARNESS_ID));
    System.out.format("Logging location %s%n", environmentVarGetter.apply(LOGGING_API_SERVICE_DESCRIPTOR));
    System.out.format("Control location %s%n", environmentVarGetter.apply(CONTROL_API_SERVICE_DESCRIPTOR));
    System.out.format("Status location %s%n", environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR));
    System.out.format("Pipeline options %s%n", environmentVarGetter.apply(PIPELINE_OPTIONS));
    String id = environmentVarGetter.apply(HARNESS_ID);
    PipelineOptions options = PipelineOptionsTranslation.fromJson(environmentVarGetter.apply(PIPELINE_OPTIONS));
    Endpoints.ApiServiceDescriptor loggingApiServiceDescriptor = getApiServiceDescriptor(environmentVarGetter.apply(LOGGING_API_SERVICE_DESCRIPTOR));
    Endpoints.ApiServiceDescriptor controlApiServiceDescriptor = getApiServiceDescriptor(environmentVarGetter.apply(CONTROL_API_SERVICE_DESCRIPTOR));
    Endpoints.ApiServiceDescriptor statusApiServiceDescriptor = environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR) == null ? null : getApiServiceDescriptor(environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR));
    String runnerCapabilitesOrNull = environmentVarGetter.apply(RUNNER_CAPABILITIES);
    Set<String> runnerCapabilites = runnerCapabilitesOrNull == null ? Collections.emptySet() : ImmutableSet.copyOf(runnerCapabilitesOrNull.split("\\s+"));
    main(id, options, runnerCapabilites, loggingApiServiceDescriptor, controlApiServiceDescriptor, statusApiServiceDescriptor);
}
Also used : Endpoints(org.apache.beam.model.pipeline.v1.Endpoints) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Aggregations

VisibleForTesting (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)81 ArrayList (java.util.ArrayList)18 IOException (java.io.IOException)17 ParameterizedType (java.lang.reflect.ParameterizedType)15 Type (java.lang.reflect.Type)15 Parameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter)14 BundleFinalizerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.BundleFinalizerParameter)14 PipelineOptionsParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.PipelineOptionsParameter)14 RestrictionParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter)14 RestrictionTrackerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter)14 SchemaElementParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter)14 StateParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.StateParameter)14 TimerFamilyParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerFamilyParameter)14 TimerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerParameter)14 WatermarkEstimatorParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorParameter)14 WatermarkEstimatorStateParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorStateParameter)14 WindowParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WindowParameter)14 TypeParameter (org.apache.beam.sdk.values.TypeParameter)14 DoFn (org.apache.beam.sdk.transforms.DoFn)10 Map (java.util.Map)7