use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class WindowUtilsTest method testGetWindowStrategy.
@Test
public void testGetWindowStrategy() throws IOException {
SdkComponents components = SdkComponents.create();
String environmentId = components.registerEnvironment(Environments.createDockerEnvironment("java"));
WindowingStrategy<Object, IntervalWindow> expected = WindowingStrategy.of(FixedWindows.of(Duration.standardMinutes(1))).withMode(WindowingStrategy.AccumulationMode.DISCARDING_FIRED_PANES).withTimestampCombiner(TimestampCombiner.END_OF_WINDOW).withAllowedLateness(Duration.ZERO).withEnvironmentId(environmentId);
components.registerWindowingStrategy(expected);
String collectionId = components.registerPCollection(PCollection.createPrimitiveOutputInternal(Pipeline.create(), expected, PCollection.IsBounded.BOUNDED, VoidCoder.of()).setName("name"));
WindowingStrategy<?, ?> actual = WindowUtils.getWindowStrategy(collectionId, components.toComponents());
assertEquals(expected, actual);
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class WorkerCustomSourcesTest method translateIOToCloudSource.
static com.google.api.services.dataflow.model.Source translateIOToCloudSource(BoundedSource<?> io, DataflowPipelineOptions options) throws Exception {
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline p = Pipeline.create(options);
p.begin().apply(Read.from(io));
// Note that we specifically perform this replacement since this is what the DataflowRunner
// does and the DataflowRunner class does not expose a way to perform these replacements
// without running the pipeline.
p.replaceAll(Collections.singletonList(SplittableParDo.PRIMITIVE_BOUNDED_READ_OVERRIDE));
DataflowRunner runner = DataflowRunner.fromOptions(options);
SdkComponents sdkComponents = SdkComponents.create();
RunnerApi.Environment defaultEnvironmentForDataflow = Environments.createDockerEnvironment("dummy-image-url");
sdkComponents.registerEnvironment(defaultEnvironmentForDataflow);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
Job workflow = translator.translate(p, pipelineProto, sdkComponents, runner, new ArrayList<DataflowPackage>()).getJob();
Step step = workflow.getSteps().get(0);
return stepToCloudSource(step);
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class LengthPrefixUnknownCodersTest method test.
@Test
public void test() throws IOException {
SdkComponents sdkComponents = SdkComponents.create();
sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));
String coderId = sdkComponents.registerCoder(original);
Components.Builder components = sdkComponents.toComponents().toBuilder();
String updatedCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(coderId, components, replaceWithByteArray);
assertEquals(expected, RehydratedComponents.forComponents(components.build()).getCoder(updatedCoderId));
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DataflowRunnerTest method testSdkHarnessConfiguration.
@Test
public void testSdkHarnessConfiguration() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
ExperimentalOptions.addExperiment(options, "use_runner_v2");
Pipeline p = Pipeline.create(options);
p.apply(Create.of(Arrays.asList(1, 2, 3)));
String defaultSdkContainerImage = DataflowRunner.getContainerImageForJob(options);
SdkComponents sdkComponents = SdkComponents.create();
RunnerApi.Environment defaultEnvironmentForDataflow = Environments.createDockerEnvironment(defaultSdkContainerImage);
sdkComponents.registerEnvironment(defaultEnvironmentForDataflow.toBuilder().build());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
DataflowRunner.configureSdkHarnessContainerImages(options, pipelineProto, job);
List<SdkHarnessContainerImage> sdks = job.getEnvironment().getWorkerPools().get(0).getSdkHarnessContainerImages();
Map<String, String> expectedEnvIdsAndContainerImages = pipelineProto.getComponents().getEnvironmentsMap().entrySet().stream().filter(x -> BeamUrns.getUrn(RunnerApi.StandardEnvironments.Environments.DOCKER).equals(x.getValue().getUrn())).collect(Collectors.toMap(x -> x.getKey(), x -> {
RunnerApi.DockerPayload payload;
try {
payload = RunnerApi.DockerPayload.parseFrom(x.getValue().getPayload());
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException(e);
}
return payload.getContainerImage();
}));
assertEquals(1, expectedEnvIdsAndContainerImages.size());
assertEquals(1, sdks.size());
assertEquals(expectedEnvIdsAndContainerImages, sdks.stream().collect(Collectors.toMap(SdkHarnessContainerImage::getEnvironmentId, SdkHarnessContainerImage::getContainerImage)));
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DataflowPipelineTranslatorTest method testMultiGraphPipelineSerialization.
@Test
public void testMultiGraphPipelineSerialization() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline p = Pipeline.create(options);
PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3));
input.apply(new UnrelatedOutputCreator());
input.apply(new UnboundOutputCreator());
DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(PipelineOptionsFactory.as(DataflowPipelineOptions.class));
// Check that translation doesn't fail.
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
JobSpecification jobSpecification = t.translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
}
Aggregations