use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class S3FileSystem method atomicCopy.
@VisibleForTesting
CopyObjectResult atomicCopy(S3ResourceId sourcePath, S3ResourceId destinationPath, ObjectMetadata sourceObjectMetadata) throws AmazonClientException {
CopyObjectRequest copyObjectRequest = new CopyObjectRequest(sourcePath.getBucket(), sourcePath.getKey(), destinationPath.getBucket(), destinationPath.getKey());
copyObjectRequest.setNewObjectMetadata(sourceObjectMetadata);
copyObjectRequest.setStorageClass(config.getS3StorageClass());
copyObjectRequest.setSourceSSECustomerKey(config.getSSECustomerKey());
copyObjectRequest.setDestinationSSECustomerKey(config.getSSECustomerKey());
return amazonS3.get().copyObject(copyObjectRequest);
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class S3FileSystem method matchGlobPaths.
/**
* Gets {@link MatchResult} representing all objects that match wildcard-containing paths.
*/
@VisibleForTesting
List<MatchResult> matchGlobPaths(Collection<S3ResourceId> globPaths) throws IOException {
List<Callable<ExpandedGlob>> expandTasks = new ArrayList<>(globPaths.size());
for (final S3ResourceId path : globPaths) {
expandTasks.add(() -> expandGlob(path));
}
Map<S3ResourceId, ExpandedGlob> expandedGlobByGlobPath = new HashMap<>();
List<Callable<PathWithEncoding>> contentTypeTasks = new ArrayList<>(globPaths.size());
for (ExpandedGlob expandedGlob : callTasks(expandTasks)) {
expandedGlobByGlobPath.put(expandedGlob.getGlobPath(), expandedGlob);
if (expandedGlob.getExpandedPaths() != null) {
for (final S3ResourceId path : expandedGlob.getExpandedPaths()) {
contentTypeTasks.add(() -> getPathContentEncoding(path));
}
}
}
Map<S3ResourceId, PathWithEncoding> exceptionByPath = new HashMap<>();
for (PathWithEncoding pathWithException : callTasks(contentTypeTasks)) {
exceptionByPath.put(pathWithException.getPath(), pathWithException);
}
List<MatchResult> results = new ArrayList<>(globPaths.size());
for (S3ResourceId globPath : globPaths) {
ExpandedGlob expandedGlob = expandedGlobByGlobPath.get(globPath);
if (expandedGlob.getException() != null) {
results.add(MatchResult.create(MatchResult.Status.ERROR, expandedGlob.getException()));
} else {
List<MatchResult.Metadata> metadatas = new ArrayList<>();
IOException exception = null;
for (S3ResourceId expandedPath : expandedGlob.getExpandedPaths()) {
PathWithEncoding pathWithEncoding = exceptionByPath.get(expandedPath);
if (pathWithEncoding.getException() != null) {
exception = pathWithEncoding.getException();
break;
} else {
// TODO(BEAM-11821): Support file checksum in this method.
metadatas.add(createBeamMetadata(pathWithEncoding.getPath(), pathWithEncoding.getContentEncoding(), null));
}
}
if (exception != null) {
if (exception instanceof FileNotFoundException) {
results.add(MatchResult.create(MatchResult.Status.NOT_FOUND, exception));
} else {
results.add(MatchResult.create(MatchResult.Status.ERROR, exception));
}
} else {
results.add(MatchResult.create(MatchResult.Status.OK, metadatas));
}
}
}
return ImmutableList.copyOf(results);
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class S3FileSystem method multipartCopy.
@VisibleForTesting
CompleteMultipartUploadResult multipartCopy(S3ResourceId sourcePath, S3ResourceId destinationPath, ObjectMetadata sourceObjectMetadata) throws AmazonClientException {
InitiateMultipartUploadRequest initiateUploadRequest = new InitiateMultipartUploadRequest(destinationPath.getBucket(), destinationPath.getKey()).withStorageClass(config.getS3StorageClass()).withObjectMetadata(sourceObjectMetadata);
initiateUploadRequest.setSSECustomerKey(config.getSSECustomerKey());
InitiateMultipartUploadResult initiateUploadResult = amazonS3.get().initiateMultipartUpload(initiateUploadRequest);
final String uploadId = initiateUploadResult.getUploadId();
List<PartETag> eTags = new ArrayList<>();
final long objectSize = sourceObjectMetadata.getContentLength();
// without using S3FileSystem.copy in the future
if (objectSize == 0) {
final CopyPartRequest copyPartRequest = new CopyPartRequest().withSourceBucketName(sourcePath.getBucket()).withSourceKey(sourcePath.getKey()).withDestinationBucketName(destinationPath.getBucket()).withDestinationKey(destinationPath.getKey()).withUploadId(uploadId).withPartNumber(1);
copyPartRequest.setSourceSSECustomerKey(config.getSSECustomerKey());
copyPartRequest.setDestinationSSECustomerKey(config.getSSECustomerKey());
CopyPartResult copyPartResult = amazonS3.get().copyPart(copyPartRequest);
eTags.add(copyPartResult.getPartETag());
} else {
long bytePosition = 0;
Integer uploadBufferSizeBytes = config.getS3UploadBufferSizeBytes();
// Amazon parts are 1-indexed, not zero-indexed.
for (int partNumber = 1; bytePosition < objectSize; partNumber++) {
final CopyPartRequest copyPartRequest = new CopyPartRequest().withSourceBucketName(sourcePath.getBucket()).withSourceKey(sourcePath.getKey()).withDestinationBucketName(destinationPath.getBucket()).withDestinationKey(destinationPath.getKey()).withUploadId(uploadId).withPartNumber(partNumber).withFirstByte(bytePosition).withLastByte(Math.min(objectSize - 1, bytePosition + uploadBufferSizeBytes - 1));
copyPartRequest.setSourceSSECustomerKey(config.getSSECustomerKey());
copyPartRequest.setDestinationSSECustomerKey(config.getSSECustomerKey());
CopyPartResult copyPartResult = amazonS3.get().copyPart(copyPartRequest);
eTags.add(copyPartResult.getPartETag());
bytePosition += uploadBufferSizeBytes;
}
}
CompleteMultipartUploadRequest completeUploadRequest = new CompleteMultipartUploadRequest().withBucketName(destinationPath.getBucket()).withKey(destinationPath.getKey()).withUploadId(uploadId).withPartETags(eTags);
return amazonS3.get().completeMultipartUpload(completeUploadRequest);
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class FnApiDoFnRunner method constructSplitResult.
@VisibleForTesting
static <WatermarkEstimatorStateT> HandlesSplits.SplitResult constructSplitResult(WindowedSplitResult windowedSplitResult, HandlesSplits.SplitResult downstreamElementSplit, Coder fullInputCoder, Instant initialWatermark, KV<Instant, WatermarkEstimatorStateT> watermarkAndState, String pTransformId, String mainInputId, Collection<String> outputIds, Duration resumeDelay) {
// The element split cannot from both windowedSplitResult and downstreamElementSplit.
checkArgument((windowedSplitResult == null || windowedSplitResult.getResidualSplitRoot() == null) || downstreamElementSplit == null);
List<BundleApplication> primaryRoots = new ArrayList<>();
List<DelayedBundleApplication> residualRoots = new ArrayList<>();
// Encode window splits.
if (windowedSplitResult != null && windowedSplitResult.getPrimaryInFullyProcessedWindowsRoot() != null) {
ByteString.Output primaryInOtherWindowsBytes = ByteString.newOutput();
try {
fullInputCoder.encode(windowedSplitResult.getPrimaryInFullyProcessedWindowsRoot(), primaryInOtherWindowsBytes);
} catch (IOException e) {
throw new RuntimeException(e);
}
BundleApplication.Builder primaryApplicationInOtherWindows = BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(primaryInOtherWindowsBytes.toByteString());
primaryRoots.add(primaryApplicationInOtherWindows.build());
}
if (windowedSplitResult != null && windowedSplitResult.getResidualInUnprocessedWindowsRoot() != null) {
ByteString.Output bytesOut = ByteString.newOutput();
try {
fullInputCoder.encode(windowedSplitResult.getResidualInUnprocessedWindowsRoot(), bytesOut);
} catch (IOException e) {
throw new RuntimeException(e);
}
BundleApplication.Builder residualInUnprocessedWindowsRoot = BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(bytesOut.toByteString());
// We don't want to change the output watermarks or set the checkpoint resume time since
// that applies to the current window.
Map<String, org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp> outputWatermarkMapForUnprocessedWindows = new HashMap<>();
if (!initialWatermark.equals(GlobalWindow.TIMESTAMP_MIN_VALUE)) {
org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp outputWatermark = org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp.newBuilder().setSeconds(initialWatermark.getMillis() / 1000).setNanos((int) (initialWatermark.getMillis() % 1000) * 1000000).build();
for (String outputId : outputIds) {
outputWatermarkMapForUnprocessedWindows.put(outputId, outputWatermark);
}
}
residualInUnprocessedWindowsRoot.putAllOutputWatermarks(outputWatermarkMapForUnprocessedWindows);
residualRoots.add(DelayedBundleApplication.newBuilder().setApplication(residualInUnprocessedWindowsRoot).build());
}
ByteString.Output primaryBytes = ByteString.newOutput();
ByteString.Output residualBytes = ByteString.newOutput();
// that there is no element split.
if (windowedSplitResult != null && windowedSplitResult.getResidualSplitRoot() != null) {
// When there is element split in windowedSplitResult, the resumeDelay should not be null.
checkNotNull(resumeDelay);
try {
fullInputCoder.encode(windowedSplitResult.getPrimarySplitRoot(), primaryBytes);
fullInputCoder.encode(windowedSplitResult.getResidualSplitRoot(), residualBytes);
} catch (IOException e) {
throw new RuntimeException(e);
}
primaryRoots.add(BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(primaryBytes.toByteString()).build());
BundleApplication.Builder residualApplication = BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(residualBytes.toByteString());
Map<String, org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp> outputWatermarkMap = new HashMap<>();
if (!watermarkAndState.getKey().equals(GlobalWindow.TIMESTAMP_MIN_VALUE)) {
org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp outputWatermark = org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp.newBuilder().setSeconds(watermarkAndState.getKey().getMillis() / 1000).setNanos((int) (watermarkAndState.getKey().getMillis() % 1000) * 1000000).build();
for (String outputId : outputIds) {
outputWatermarkMap.put(outputId, outputWatermark);
}
}
residualApplication.putAllOutputWatermarks(outputWatermarkMap);
residualRoots.add(DelayedBundleApplication.newBuilder().setApplication(residualApplication).setRequestedTimeDelay(Durations.fromMillis(resumeDelay.getMillis())).build());
} else if (downstreamElementSplit != null) {
primaryRoots.add(Iterables.getOnlyElement(downstreamElementSplit.getPrimaryRoots()));
residualRoots.add(Iterables.getOnlyElement(downstreamElementSplit.getResidualRoots()));
}
return HandlesSplits.SplitResult.of(primaryRoots, residualRoots);
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.
the class FnHarness method main.
@VisibleForTesting
public static void main(Function<String, String> environmentVarGetter) throws Exception {
JvmInitializers.runOnStartup();
System.out.format("SDK Fn Harness started%n");
System.out.format("Harness ID %s%n", environmentVarGetter.apply(HARNESS_ID));
System.out.format("Logging location %s%n", environmentVarGetter.apply(LOGGING_API_SERVICE_DESCRIPTOR));
System.out.format("Control location %s%n", environmentVarGetter.apply(CONTROL_API_SERVICE_DESCRIPTOR));
System.out.format("Status location %s%n", environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR));
System.out.format("Pipeline options %s%n", environmentVarGetter.apply(PIPELINE_OPTIONS));
String id = environmentVarGetter.apply(HARNESS_ID);
PipelineOptions options = PipelineOptionsTranslation.fromJson(environmentVarGetter.apply(PIPELINE_OPTIONS));
Endpoints.ApiServiceDescriptor loggingApiServiceDescriptor = getApiServiceDescriptor(environmentVarGetter.apply(LOGGING_API_SERVICE_DESCRIPTOR));
Endpoints.ApiServiceDescriptor controlApiServiceDescriptor = getApiServiceDescriptor(environmentVarGetter.apply(CONTROL_API_SERVICE_DESCRIPTOR));
Endpoints.ApiServiceDescriptor statusApiServiceDescriptor = environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR) == null ? null : getApiServiceDescriptor(environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR));
String runnerCapabilitesOrNull = environmentVarGetter.apply(RUNNER_CAPABILITIES);
Set<String> runnerCapabilites = runnerCapabilitesOrNull == null ? Collections.emptySet() : ImmutableSet.copyOf(runnerCapabilitesOrNull.split("\\s+"));
main(id, options, runnerCapabilites, loggingApiServiceDescriptor, controlApiServiceDescriptor, statusApiServiceDescriptor);
}
Aggregations