use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class LengthPrefixUnknownCodersTest method test.
@Test
public void test() throws IOException {
SdkComponents sdkComponents = SdkComponents.create();
sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));
String coderId = sdkComponents.registerCoder(original);
Components.Builder components = sdkComponents.toComponents().toBuilder();
String updatedCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(coderId, components, replaceWithByteArray);
assertEquals(expected, RehydratedComponents.forComponents(components.build()).getCoder(updatedCoderId));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class PipelineValidator method validateExecutableStage.
private static void validateExecutableStage(String id, PTransform transform, Components outerComponents, Set<String> requirements) throws Exception {
ExecutableStagePayload payload = ExecutableStagePayload.parseFrom(transform.getSpec().getPayload());
// Everything within an ExecutableStagePayload uses only the stage's components.
Components components = payload.getComponents();
checkArgument(transform.getInputsMap().values().contains(payload.getInput()), "ExecutableStage %s uses unknown input %s", id, payload.getInput());
checkArgument(!payload.getTransformsList().isEmpty(), "ExecutableStage %s contains no transforms", id);
for (String subtransformId : payload.getTransformsList()) {
checkArgument(components.containsTransforms(subtransformId), "ExecutableStage %s uses unknown transform %s", id, subtransformId);
}
for (String outputId : payload.getOutputsList()) {
checkArgument(components.containsPcollections(outputId), "ExecutableStage %s uses unknown output %s", id, outputId);
}
validateComponents("ExecutableStage " + id, components, requirements);
// TODO: Also validate that side inputs of all transforms within components.getTransforms()
// are contained within payload.getSideInputsList()
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class PipelineValidator method validate.
public static void validate(RunnerApi.Pipeline p) {
Components components = p.getComponents();
for (String transformId : p.getRootTransformIdsList()) {
checkArgument(components.containsTransforms(transformId), "Root transform id %s is unknown", transformId);
}
validateComponents("pipeline", components, ImmutableSet.copyOf(p.getRequirementsList()));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class Environments method getArtifacts.
public static List<ArtifactInformation> getArtifacts(List<String> stagingFiles) {
ImmutableList.Builder<ArtifactInformation> artifactsBuilder = ImmutableList.builder();
Set<String> deduplicatedStagingFiles = new LinkedHashSet<>(stagingFiles);
for (String path : deduplicatedStagingFiles) {
File file;
String stagedName = null;
if (path.contains("=")) {
String[] components = path.split("=", 2);
file = new File(components[1]);
stagedName = components[0];
} else {
file = new File(path);
}
// Spurious items get added to the classpath. Filter by just those that exist.
if (file.exists()) {
ArtifactInformation.Builder artifactBuilder = ArtifactInformation.newBuilder();
artifactBuilder.setTypeUrn(BeamUrns.getUrn(StandardArtifacts.Types.FILE));
artifactBuilder.setRoleUrn(BeamUrns.getUrn(StandardArtifacts.Roles.STAGING_TO));
HashCode hashCode;
if (file.isDirectory()) {
File zippedFile;
try {
zippedFile = zipDirectory(file);
hashCode = Files.asByteSource(zippedFile).hash(Hashing.sha256());
} catch (IOException e) {
throw new RuntimeException(e);
}
artifactBuilder.setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(zippedFile.getPath()).setSha256(hashCode.toString()).build().toByteString());
} else {
try {
hashCode = Files.asByteSource(file).hash(Hashing.sha256());
} catch (IOException e) {
throw new RuntimeException(e);
}
artifactBuilder.setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(file.getPath()).setSha256(hashCode.toString()).build().toByteString());
}
if (stagedName == null) {
stagedName = createStagingFileName(file, hashCode);
}
artifactBuilder.setRolePayload(RunnerApi.ArtifactStagingToRolePayload.newBuilder().setStagedName(stagedName).build().toByteString());
artifactsBuilder.add(artifactBuilder.build());
}
}
return artifactsBuilder.build();
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class GreedyPipelineFuser method sanitizeDanglingPTransformInputs.
private static ExecutableStage sanitizeDanglingPTransformInputs(ExecutableStage stage) {
/* Possible inputs to a PTransform can only be those which are:
* <ul>
* <li>Explicit input PCollection to the stage
* <li>Outputs of a PTransform within the same stage
* <li>Timer PCollections
* <li>Side input PCollections
* <li>Explicit outputs from the stage
* </ul>
*/
Set<String> possibleInputs = new HashSet<>();
possibleInputs.add(stage.getInputPCollection().getId());
possibleInputs.addAll(stage.getOutputPCollections().stream().map(PCollectionNode::getId).collect(Collectors.toSet()));
possibleInputs.addAll(stage.getSideInputs().stream().map(s -> s.collection().getId()).collect(Collectors.toSet()));
possibleInputs.addAll(stage.getTransforms().stream().flatMap(t -> t.getTransform().getOutputsMap().values().stream()).collect(Collectors.toSet()));
Set<String> danglingInputs = stage.getTransforms().stream().flatMap(t -> t.getTransform().getInputsMap().values().stream()).filter(in -> !possibleInputs.contains(in)).collect(Collectors.toSet());
ImmutableList.Builder<PTransformNode> pTransformNodesBuilder = ImmutableList.builder();
for (PTransformNode transformNode : stage.getTransforms()) {
PTransform transform = transformNode.getTransform();
Map<String, String> validInputs = transform.getInputsMap().entrySet().stream().filter(e -> !danglingInputs.contains(e.getValue())).collect(Collectors.toMap(Entry::getKey, Entry::getValue));
if (!validInputs.equals(transform.getInputsMap())) {
// Dangling inputs found so recreate pTransform without the dangling inputs.
transformNode = PipelineNode.pTransform(transformNode.getId(), transform.toBuilder().clearInputs().putAllInputs(validInputs).build());
}
pTransformNodesBuilder.add(transformNode);
}
ImmutableList<PTransformNode> pTransformNodes = pTransformNodesBuilder.build();
Components.Builder componentBuilder = stage.getComponents().toBuilder();
// Update the pTransforms in components.
componentBuilder.clearTransforms().putAllTransforms(pTransformNodes.stream().collect(Collectors.toMap(PTransformNode::getId, PTransformNode::getTransform)));
Map<String, PCollection> validPCollectionMap = stage.getComponents().getPcollectionsMap().entrySet().stream().filter(e -> !danglingInputs.contains(e.getKey())).collect(Collectors.toMap(Entry::getKey, Entry::getValue));
// Update pCollections in the components.
componentBuilder.clearPcollections().putAllPcollections(validPCollectionMap);
return ImmutableExecutableStage.of(componentBuilder.build(), stage.getEnvironment(), stage.getInputPCollection(), stage.getSideInputs(), stage.getUserStates(), stage.getTimers(), pTransformNodes, stage.getOutputPCollections(), stage.getWireCoderSettings());
}
Aggregations