Search in sources :

Example 36 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class FnApiDoFnRunner method computeSplitForProcessOrTruncate.

@VisibleForTesting
static <WatermarkEstimatorStateT> SplitResultsWithStopIndex computeSplitForProcessOrTruncate(WindowedValue currentElement, Object currentRestriction, BoundedWindow currentWindow, List<BoundedWindow> windows, WatermarkEstimatorStateT currentWatermarkEstimatorState, double fractionOfRemainder, RestrictionTracker currentTracker, HandlesSplits splitDelegate, KV<Instant, WatermarkEstimatorStateT> watermarkAndState, int currentWindowIndex, int stopWindowIndex) {
    // We should only have currentTracker or splitDelegate.
    checkArgument((currentTracker != null) ^ (splitDelegate != null));
    // When we have currentTracker, the watermarkAndState should not be null.
    if (currentTracker != null) {
        checkNotNull(watermarkAndState);
    }
    WindowedSplitResult windowedSplitResult = null;
    HandlesSplits.SplitResult downstreamSplitResult = null;
    int newWindowStopIndex = stopWindowIndex;
    // on a future window.
    if (currentWindowIndex != stopWindowIndex - 1) {
        // Compute the fraction of the remainder relative to the scaled progress.
        Progress elementProgress;
        if (currentTracker != null) {
            if (currentTracker instanceof HasProgress) {
                elementProgress = ((HasProgress) currentTracker).getProgress();
            } else {
                elementProgress = Progress.from(0, 1);
            }
        } else {
            double elementCompleted = splitDelegate.getProgress();
            elementProgress = Progress.from(elementCompleted, 1 - elementCompleted);
        }
        Progress scaledProgress = scaleProgress(elementProgress, currentWindowIndex, stopWindowIndex);
        double scaledFractionOfRemainder = scaledProgress.getWorkRemaining() * fractionOfRemainder;
        // boundary.
        if (scaledFractionOfRemainder >= elementProgress.getWorkRemaining()) {
            newWindowStopIndex = (int) Math.min(stopWindowIndex - 1, currentWindowIndex + Math.max(1, Math.round((elementProgress.getWorkCompleted() + scaledFractionOfRemainder) / (elementProgress.getWorkCompleted() + elementProgress.getWorkRemaining()))));
            windowedSplitResult = computeWindowSplitResult(currentElement, currentRestriction, currentWindow, windows, currentWatermarkEstimatorState, newWindowStopIndex, newWindowStopIndex, stopWindowIndex, null, watermarkAndState);
        } else {
            // Compute the element split with the scaled fraction.
            SplitResult<?> elementSplit = null;
            if (currentTracker != null) {
                elementSplit = currentTracker.trySplit(scaledFractionOfRemainder / elementProgress.getWorkRemaining());
            } else {
                downstreamSplitResult = splitDelegate.trySplit(scaledFractionOfRemainder);
            }
            newWindowStopIndex = currentWindowIndex + 1;
            int toIndex = (elementSplit == null && downstreamSplitResult == null) ? newWindowStopIndex : currentWindowIndex;
            windowedSplitResult = computeWindowSplitResult(currentElement, currentRestriction, currentWindow, windows, currentWatermarkEstimatorState, toIndex, newWindowStopIndex, stopWindowIndex, elementSplit, watermarkAndState);
        }
    } else {
        // We are on the last window then compute the element split with given fraction.
        SplitResult<?> elementSplitResult = null;
        newWindowStopIndex = stopWindowIndex;
        if (currentTracker != null) {
            elementSplitResult = currentTracker.trySplit(fractionOfRemainder);
        } else {
            downstreamSplitResult = splitDelegate.trySplit(fractionOfRemainder);
        }
        if (elementSplitResult == null && downstreamSplitResult == null) {
            return null;
        }
        windowedSplitResult = computeWindowSplitResult(currentElement, currentRestriction, currentWindow, windows, currentWatermarkEstimatorState, currentWindowIndex, stopWindowIndex, stopWindowIndex, elementSplitResult, watermarkAndState);
    }
    return SplitResultsWithStopIndex.of(windowedSplitResult, downstreamSplitResult, newWindowStopIndex);
}
Also used : Progress(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker.Progress) HasProgress(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker.HasProgress) HasProgress(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker.HasProgress) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 37 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class S3FileSystem method matchGlobPaths.

/**
 * Gets {@link MatchResult} representing all objects that match wildcard-containing paths.
 */
@VisibleForTesting
List<MatchResult> matchGlobPaths(Collection<S3ResourceId> globPaths) throws IOException {
    Stream<Callable<ExpandedGlob>> expandTasks = globPaths.stream().map(path -> () -> expandGlob(path));
    Map<S3ResourceId, ExpandedGlob> expandedGlobByGlobPath = callTasks(expandTasks).stream().collect(Collectors.toMap(ExpandedGlob::getGlobPath, expandedGlob -> expandedGlob));
    Stream<Callable<PathWithEncoding>> contentTypeTasks = expandedGlobByGlobPath.values().stream().map(ExpandedGlob::getExpandedPaths).filter(Objects::nonNull).flatMap(List::stream).map(path -> () -> getPathContentEncoding(path));
    Map<S3ResourceId, PathWithEncoding> exceptionByPath = callTasks(contentTypeTasks).stream().collect(Collectors.toMap(PathWithEncoding::getPath, pathWithEncoding -> pathWithEncoding));
    List<MatchResult> results = new ArrayList<>(globPaths.size());
    globPaths.forEach(globPath -> {
        ExpandedGlob expandedGlob = expandedGlobByGlobPath.get(globPath);
        if (expandedGlob.getException() != null) {
            results.add(MatchResult.create(MatchResult.Status.ERROR, expandedGlob.getException()));
        } else {
            List<MatchResult.Metadata> metadatas = new ArrayList<>();
            IOException exception = null;
            for (S3ResourceId expandedPath : expandedGlob.getExpandedPaths()) {
                PathWithEncoding pathWithEncoding = exceptionByPath.get(expandedPath);
                if (pathWithEncoding.getException() != null) {
                    exception = pathWithEncoding.getException();
                    break;
                } else {
                    // TODO(BEAM-11821): Support file checksum in this method.
                    metadatas.add(createBeamMetadata(pathWithEncoding.getPath(), pathWithEncoding.getContentEncoding(), null));
                }
            }
            if (exception != null) {
                if (exception instanceof FileNotFoundException) {
                    results.add(MatchResult.create(MatchResult.Status.NOT_FOUND, exception));
                } else {
                    results.add(MatchResult.create(MatchResult.Status.ERROR, exception));
                }
            } else {
                results.add(MatchResult.create(MatchResult.Status.OK, metadatas));
            }
        }
    });
    return ImmutableList.copyOf(results);
}
Also used : CompletedMultipartUpload(software.amazon.awssdk.services.s3.model.CompletedMultipartUpload) Date(java.util.Date) LoggerFactory(org.slf4j.LoggerFactory) CreateMultipartUploadResponse(software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse) S3Exception(software.amazon.awssdk.services.s3.model.S3Exception) ImmutableSet(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet) Future(java.util.concurrent.Future) Supplier(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Supplier) Strings(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) ArrayListMultimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ArrayListMultimap) ObjectIdentifier(software.amazon.awssdk.services.s3.model.ObjectIdentifier) CompleteMultipartUploadResponse(software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse) CopyObjectResponse(software.amazon.awssdk.services.s3.model.CopyObjectResponse) Suppliers(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Suppliers) MoreExecutors(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.MoreExecutors) Collection(java.util.Collection) CompleteMultipartUploadRequest(software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest) HeadObjectRequest(software.amazon.awssdk.services.s3.model.HeadObjectRequest) ListObjectsV2Request(software.amazon.awssdk.services.s3.model.ListObjectsV2Request) Collectors(java.util.stream.Collectors) S3Options(org.apache.beam.sdk.io.aws2.options.S3Options) FileNotFoundException(java.io.FileNotFoundException) Executors(java.util.concurrent.Executors) ThreadFactoryBuilder(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.ThreadFactoryBuilder) Objects(java.util.Objects) List(java.util.List) DeleteObjectsRequest(software.amazon.awssdk.services.s3.model.DeleteObjectsRequest) Stream(java.util.stream.Stream) MoveOptions(org.apache.beam.sdk.io.fs.MoveOptions) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) HeadObjectResponse(software.amazon.awssdk.services.s3.model.HeadObjectResponse) AutoValue(com.google.auto.value.AutoValue) ListeningExecutorService(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.ListeningExecutorService) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Pattern(java.util.regex.Pattern) SdkServiceException(software.amazon.awssdk.core.exception.SdkServiceException) Delete(software.amazon.awssdk.services.s3.model.Delete) FileSystem(org.apache.beam.sdk.io.FileSystem) Preconditions.checkNotNull(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull) MatchResult(org.apache.beam.sdk.io.fs.MatchResult) S3Object(software.amazon.awssdk.services.s3.model.S3Object) CopyObjectRequest(software.amazon.awssdk.services.s3.model.CopyObjectRequest) FileSystemUtils.wildcardToRegexp(org.apache.beam.sdk.io.FileSystemUtils.wildcardToRegexp) Callable(java.util.concurrent.Callable) UploadPartCopyRequest(software.amazon.awssdk.services.s3.model.UploadPartCopyRequest) ListObjectsV2Response(software.amazon.awssdk.services.s3.model.ListObjectsV2Response) ArrayList(java.util.ArrayList) Multimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Multimap) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) Nullable(org.checkerframework.checker.nullness.qual.Nullable) ReadableByteChannel(java.nio.channels.ReadableByteChannel) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) CreateOptions(org.apache.beam.sdk.io.fs.CreateOptions) S3Client(software.amazon.awssdk.services.s3.S3Client) IOException(java.io.IOException) CopyPartResult(software.amazon.awssdk.services.s3.model.CopyPartResult) CompletedPart(software.amazon.awssdk.services.s3.model.CompletedPart) ExecutionException(java.util.concurrent.ExecutionException) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) MoreFutures(org.apache.beam.sdk.util.MoreFutures) WritableByteChannel(java.nio.channels.WritableByteChannel) CreateMultipartUploadRequest(software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) MatchResult(org.apache.beam.sdk.io.fs.MatchResult) Callable(java.util.concurrent.Callable) Objects(java.util.Objects) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 38 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class S3FileSystem method multipartCopy.

@VisibleForTesting
CompleteMultipartUploadResponse multipartCopy(S3ResourceId sourcePath, S3ResourceId destinationPath, HeadObjectResponse sourceObjectHead) throws SdkServiceException {
    CreateMultipartUploadRequest initiateUploadRequest = CreateMultipartUploadRequest.builder().bucket(destinationPath.getBucket()).key(destinationPath.getKey()).storageClass(config.getS3StorageClass()).metadata(sourceObjectHead.metadata()).serverSideEncryption(config.getSSEAlgorithm()).ssekmsKeyId(config.getSSEKMSKeyId()).sseCustomerKey(config.getSSECustomerKey().getKey()).sseCustomerAlgorithm(config.getSSECustomerKey().getAlgorithm()).build();
    CreateMultipartUploadResponse createMultipartUploadResponse = s3Client.get().createMultipartUpload(initiateUploadRequest);
    final String uploadId = createMultipartUploadResponse.uploadId();
    List<CompletedPart> completedParts = new ArrayList<>();
    final long objectSize = sourceObjectHead.contentLength();
    CopyPartResult copyPartResult;
    CompletedPart completedPart;
    // without using S3FileSystem.copy in the future
    if (objectSize == 0) {
        final UploadPartCopyRequest uploadPartCopyRequest = UploadPartCopyRequest.builder().bucket(sourcePath.getBucket()).key(sourcePath.getKey()).copySource(sourcePath.getBucket() + "/" + sourcePath.getKey()).uploadId(uploadId).partNumber(1).sseCustomerKey(config.getSSECustomerKey().getKey()).sseCustomerAlgorithm(config.getSSECustomerKey().getAlgorithm()).copySourceSSECustomerKey(config.getSSECustomerKey().getKey()).copySourceSSECustomerAlgorithm(config.getSSECustomerKey().getAlgorithm()).build();
        copyPartResult = s3Client.get().uploadPartCopy(uploadPartCopyRequest).copyPartResult();
        completedPart = CompletedPart.builder().partNumber(1).eTag(copyPartResult.eTag()).build();
        completedParts.add(completedPart);
    } else {
        long bytePosition = 0;
        Integer uploadBufferSizeBytes = config.getS3UploadBufferSizeBytes();
        // Amazon parts are 1-indexed, not zero-indexed.
        for (int partNumber = 1; bytePosition < objectSize; partNumber++) {
            final UploadPartCopyRequest uploadPartCopyRequest = UploadPartCopyRequest.builder().bucket(sourcePath.getBucket()).key(sourcePath.getKey()).copySource(destinationPath.getBucket() + "/" + sourcePath.getKey()).uploadId(uploadId).partNumber(partNumber).copySourceRange(String.format("bytes=%s-%s", bytePosition, Math.min(objectSize - 1, bytePosition + uploadBufferSizeBytes - 1))).sseCustomerKey(config.getSSECustomerKey().getKey()).sseCustomerAlgorithm(config.getSSECustomerKey().getAlgorithm()).copySourceSSECustomerKey(config.getSSECustomerKey().getKey()).copySourceSSECustomerAlgorithm(config.getSSECustomerKey().getAlgorithm()).build();
            copyPartResult = s3Client.get().uploadPartCopy(uploadPartCopyRequest).copyPartResult();
            completedPart = CompletedPart.builder().partNumber(1).eTag(copyPartResult.eTag()).build();
            completedParts.add(completedPart);
            bytePosition += uploadBufferSizeBytes;
        }
    }
    CompletedMultipartUpload completedMultipartUpload = CompletedMultipartUpload.builder().parts(completedParts).build();
    CompleteMultipartUploadRequest completeUploadRequest = CompleteMultipartUploadRequest.builder().bucket(destinationPath.getBucket()).key(destinationPath.getKey()).uploadId(uploadId).multipartUpload(completedMultipartUpload).build();
    return s3Client.get().completeMultipartUpload(completeUploadRequest);
}
Also used : CompletedPart(software.amazon.awssdk.services.s3.model.CompletedPart) ArrayList(java.util.ArrayList) CreateMultipartUploadRequest(software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest) CreateMultipartUploadResponse(software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse) UploadPartCopyRequest(software.amazon.awssdk.services.s3.model.UploadPartCopyRequest) CopyPartResult(software.amazon.awssdk.services.s3.model.CopyPartResult) CompletedMultipartUpload(software.amazon.awssdk.services.s3.model.CompletedMultipartUpload) CompleteMultipartUploadRequest(software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 39 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class CoderProperties method encode.

// ////////////////////////////////////////////////////////////////////////
@VisibleForTesting
static <T> byte[] encode(Coder<T> coder, Coder.Context context, T value) throws CoderException, IOException {
    @SuppressWarnings("unchecked") Coder<T> deserializedCoder = SerializableUtils.clone(coder);
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    deserializedCoder.encode(value, new UnownedOutputStream(os), context);
    return os.toByteArray();
}
Also used : ByteArrayOutputStream(java.io.ByteArrayOutputStream) UnownedOutputStream(org.apache.beam.sdk.util.UnownedOutputStream) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 40 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class NestedPayloadKafkaTable method transformInput.

@VisibleForTesting
Row transformInput(KafkaRecord<byte[], byte[]> record) {
    Row.FieldValueBuilder builder = Row.withSchema(getSchema()).withFieldValues(ImmutableMap.of());
    if (schema.hasField(Schemas.MESSAGE_KEY_FIELD)) {
        builder.withFieldValue(Schemas.MESSAGE_KEY_FIELD, record.getKV().getKey());
    }
    if (schema.hasField(Schemas.EVENT_TIMESTAMP_FIELD)) {
        builder.withFieldValue(Schemas.EVENT_TIMESTAMP_FIELD, Instant.ofEpochMilli(record.getTimestamp()));
    }
    if (schema.hasField(Schemas.HEADERS_FIELD)) {
        @Nullable Headers recordHeaders = record.getHeaders();
        if (recordHeaders != null) {
            ImmutableListMultimap.Builder<String, byte[]> headersBuilder = ImmutableListMultimap.builder();
            recordHeaders.forEach(header -> headersBuilder.put(header.key(), header.value()));
            ImmutableList.Builder<Row> listBuilder = ImmutableList.builder();
            headersBuilder.build().asMap().forEach((key, values) -> {
                Row entry = Row.withSchema(Schemas.HEADERS_ENTRY_SCHEMA).withFieldValue(Schemas.HEADERS_KEY_FIELD, key).withFieldValue(Schemas.HEADERS_VALUES_FIELD, values).build();
                listBuilder.add(entry);
            });
            builder.withFieldValue(Schemas.HEADERS_FIELD, listBuilder.build());
        }
    }
    if (payloadSerializer == null) {
        builder.withFieldValue(Schemas.PAYLOAD_FIELD, record.getKV().getValue());
    } else {
        byte[] payload = record.getKV().getValue();
        if (payload != null) {
            builder.withFieldValue(Schemas.PAYLOAD_FIELD, payloadSerializer.deserialize(record.getKV().getValue()));
        }
    }
    return builder.build();
}
Also used : Headers(org.apache.kafka.common.header.Headers) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ImmutableListMultimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableListMultimap) Cast.castRow(org.apache.beam.sdk.schemas.transforms.Cast.castRow) Row(org.apache.beam.sdk.values.Row) Nullable(javax.annotation.Nullable) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Aggregations

VisibleForTesting (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)81 ArrayList (java.util.ArrayList)18 IOException (java.io.IOException)17 ParameterizedType (java.lang.reflect.ParameterizedType)15 Type (java.lang.reflect.Type)15 Parameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter)14 BundleFinalizerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.BundleFinalizerParameter)14 PipelineOptionsParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.PipelineOptionsParameter)14 RestrictionParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter)14 RestrictionTrackerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter)14 SchemaElementParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter)14 StateParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.StateParameter)14 TimerFamilyParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerFamilyParameter)14 TimerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerParameter)14 WatermarkEstimatorParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorParameter)14 WatermarkEstimatorStateParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorStateParameter)14 WindowParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WindowParameter)14 TypeParameter (org.apache.beam.sdk.values.TypeParameter)14 DoFn (org.apache.beam.sdk.transforms.DoFn)10 Map (java.util.Map)7