use of org.apache.beam.model.pipeline.v1.RunnerApi.ArtifactInformation in project beam by apache.
the class Environments method getDeferredArtifacts.
public static List<ArtifactInformation> getDeferredArtifacts(PipelineOptions options) {
List<String> stagingFiles = options.as(PortablePipelineOptions.class).getFilesToStage();
if (stagingFiles == null || stagingFiles.isEmpty()) {
return ImmutableList.of();
}
String key = UUID.randomUUID().toString();
DefaultArtifactResolver.INSTANCE.register((info) -> {
if (BeamUrns.getUrn(StandardArtifacts.Types.DEFERRED).equals(info.getTypeUrn())) {
RunnerApi.DeferredArtifactPayload deferredArtifactPayload;
try {
deferredArtifactPayload = RunnerApi.DeferredArtifactPayload.parseFrom(info.getTypePayload());
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException("Error parsing deferred artifact payload.", e);
}
if (key.equals(deferredArtifactPayload.getKey())) {
return Optional.of(getArtifacts(stagingFiles));
} else {
return Optional.empty();
}
} else {
return Optional.empty();
}
});
return ImmutableList.of(ArtifactInformation.newBuilder().setTypeUrn(BeamUrns.getUrn(StandardArtifacts.Types.DEFERRED)).setTypePayload(RunnerApi.DeferredArtifactPayload.newBuilder().setKey(key).build().toByteString()).build());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.ArtifactInformation in project beam by apache.
the class Environments method getArtifacts.
public static List<ArtifactInformation> getArtifacts(List<String> stagingFiles) {
ImmutableList.Builder<ArtifactInformation> artifactsBuilder = ImmutableList.builder();
Set<String> deduplicatedStagingFiles = new LinkedHashSet<>(stagingFiles);
for (String path : deduplicatedStagingFiles) {
File file;
String stagedName = null;
if (path.contains("=")) {
String[] components = path.split("=", 2);
file = new File(components[1]);
stagedName = components[0];
} else {
file = new File(path);
}
// Spurious items get added to the classpath. Filter by just those that exist.
if (file.exists()) {
ArtifactInformation.Builder artifactBuilder = ArtifactInformation.newBuilder();
artifactBuilder.setTypeUrn(BeamUrns.getUrn(StandardArtifacts.Types.FILE));
artifactBuilder.setRoleUrn(BeamUrns.getUrn(StandardArtifacts.Roles.STAGING_TO));
HashCode hashCode;
if (file.isDirectory()) {
File zippedFile;
try {
zippedFile = zipDirectory(file);
hashCode = Files.asByteSource(zippedFile).hash(Hashing.sha256());
} catch (IOException e) {
throw new RuntimeException(e);
}
artifactBuilder.setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(zippedFile.getPath()).setSha256(hashCode.toString()).build().toByteString());
} else {
try {
hashCode = Files.asByteSource(file).hash(Hashing.sha256());
} catch (IOException e) {
throw new RuntimeException(e);
}
artifactBuilder.setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(file.getPath()).setSha256(hashCode.toString()).build().toByteString());
}
if (stagedName == null) {
stagedName = createStagingFileName(file, hashCode);
}
artifactBuilder.setRolePayload(RunnerApi.ArtifactStagingToRolePayload.newBuilder().setStagedName(stagedName).build().toByteString());
artifactsBuilder.add(artifactBuilder.build());
}
}
return artifactsBuilder.build();
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.ArtifactInformation in project beam by apache.
the class DataflowPipelineTranslatorTest method testPortablePipelineContainsExpectedDependenciesAndCapabilities.
@Test
public void testPortablePipelineContainsExpectedDependenciesAndCapabilities() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
options.setExperiments(Arrays.asList("beam_fn_api"));
DataflowRunner runner = DataflowRunner.fromOptions(options);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Impulse.create()).apply(MapElements.via(new SimpleFunction<byte[], String>() {
@Override
public String apply(byte[] input) {
return "";
}
})).apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))));
runner.replaceV1Transforms(pipeline);
File file1 = File.createTempFile("file1-", ".txt");
file1.deleteOnExit();
File file2 = File.createTempFile("file2-", ".txt");
file2.deleteOnExit();
SdkComponents sdkComponents = SdkComponents.create();
sdkComponents.registerEnvironment(Environments.createDockerEnvironment(DataflowRunner.getContainerImageForJob(options)).toBuilder().addAllDependencies(Environments.getArtifacts(ImmutableList.of("file1.txt=" + file1, "file2.txt=" + file2))).addAllCapabilities(Environments.getJavaCapabilities()).build());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
JobSpecification result = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());
Components componentsProto = result.getPipelineProto().getComponents();
assertThat(Iterables.getOnlyElement(componentsProto.getEnvironmentsMap().values()).getCapabilitiesList(), containsInAnyOrder(Environments.getJavaCapabilities().toArray(new String[0])));
assertThat(Iterables.getOnlyElement(componentsProto.getEnvironmentsMap().values()).getDependenciesList(), containsInAnyOrder(Environments.getArtifacts(ImmutableList.of("file1.txt=" + file1, "file2.txt=" + file2)).toArray(new ArtifactInformation[0])));
}
Aggregations