use of org.apache.beam.sdk.options.PortablePipelineOptions in project beam by apache.
the class EnvironmentsTest method createEnvironmentProcessFromEnvironmentConfig.
@Test
public void createEnvironmentProcessFromEnvironmentConfig() throws IOException {
PortablePipelineOptions options = PipelineOptionsFactory.as(PortablePipelineOptions.class);
options.setDefaultEnvironmentType(Environments.ENVIRONMENT_PROCESS);
options.setDefaultEnvironmentConfig("{\"os\": \"linux\", \"arch\": \"amd64\", \"command\": \"run.sh\", \"env\":{\"k1\": \"v1\", \"k2\": \"v2\"} }");
assertThat(Environments.createOrGetDefaultEnvironment(options), is(Environment.newBuilder().setUrn(BeamUrns.getUrn(StandardEnvironments.Environments.PROCESS)).setPayload(ProcessPayload.newBuilder().setOs("linux").setArch("amd64").setCommand("run.sh").putEnv("k1", "v1").putEnv("k2", "v2").build().toByteString()).addAllCapabilities(Environments.getJavaCapabilities()).build()));
options.setDefaultEnvironmentType(Environments.ENVIRONMENT_PROCESS);
options.setDefaultEnvironmentConfig("{\"command\": \"run.sh\"}");
assertThat(Environments.createOrGetDefaultEnvironment(options), is(Environment.newBuilder().setUrn(BeamUrns.getUrn(StandardEnvironments.Environments.PROCESS)).setPayload(ProcessPayload.newBuilder().setCommand("run.sh").build().toByteString()).addAllCapabilities(Environments.getJavaCapabilities()).build()));
}
use of org.apache.beam.sdk.options.PortablePipelineOptions in project beam by apache.
the class TestUniversalRunner method run.
@Override
public PipelineResult run(Pipeline pipeline) {
Options testOptions = options.as(Options.class);
if (testOptions.getLocalJobServicePortFile() != null) {
String localServicePortFilePath = testOptions.getLocalJobServicePortFile();
try {
testOptions.setJobEndpoint("localhost:" + new String(Files.readAllBytes(Paths.get(localServicePortFilePath)), Charsets.UTF_8).trim());
} catch (IOException e) {
throw new RuntimeException(String.format("Error reading local job service port file %s", localServicePortFilePath), e);
}
}
PortablePipelineOptions portableOptions = options.as(PortablePipelineOptions.class);
portableOptions.setRunner(PortableRunner.class);
PortableRunner runner = PortableRunner.fromOptions(portableOptions);
PipelineResult result = runner.run(pipeline);
assertThat("Pipeline did not succeed.", result.waitUntilFinish(), Matchers.is(PipelineResult.State.DONE));
return result;
}
use of org.apache.beam.sdk.options.PortablePipelineOptions in project beam by apache.
the class PortablePipelineJarCreator method run.
/**
* <em>Does not actually run the pipeline.</em> Instead bundles the input pipeline along with all
* dependencies, artifacts, etc. required to run the pipeline into a jar that can be executed
* later.
*/
@Override
public PortablePipelineResult run(Pipeline pipeline, JobInfo jobInfo) throws Exception {
PortablePipelineOptions pipelineOptions = PipelineOptionsTranslation.fromProto(jobInfo.pipelineOptions()).as(PortablePipelineOptions.class);
final String jobName = jobInfo.jobName();
File outputFile = new File(checkArgumentNotNull(pipelineOptions.getOutputExecutablePath()));
LOG.info("Creating jar {} for job {}", outputFile.getAbsolutePath(), jobName);
outputStream = new JarOutputStream(new FileOutputStream(outputFile), createManifest(mainClass, jobName));
outputChannel = Channels.newChannel(outputStream);
PortablePipelineJarUtils.writeDefaultJobName(outputStream, jobName);
copyResourcesFromJar(new JarFile(mainClass.getProtectionDomain().getCodeSource().getLocation().getPath()));
writeAsJson(PipelineOptionsTranslation.toProto(pipelineOptions), PortablePipelineJarUtils.getPipelineOptionsUri(jobName));
Pipeline pipelineWithClasspathArtifacts = writeArtifacts(pipeline, jobName);
writeAsJson(pipelineWithClasspathArtifacts, PortablePipelineJarUtils.getPipelineUri(jobName));
// Closing the channel also closes the underlying stream.
outputChannel.close();
LOG.info("Jar {} created successfully.", outputFile.getAbsolutePath());
return new JarCreatorPipelineResult();
}
use of org.apache.beam.sdk.options.PortablePipelineOptions in project flink by apache.
the class BeamPythonFunctionRunner method open.
// ------------------------------------------------------------------------
@Override
public void open(PythonConfig config) throws Exception {
this.bundleStarted = false;
this.resultBuffer = new LinkedBlockingQueue<>();
this.reusableResultTuple = new Tuple2<>();
// The creation of stageBundleFactory depends on the initialized environment manager.
environmentManager.open();
PortablePipelineOptions portableOptions = PipelineOptionsFactory.as(PortablePipelineOptions.class);
if (jobOptions.containsKey(PythonOptions.STATE_CACHE_SIZE.key())) {
portableOptions.as(ExperimentalOptions.class).setExperiments(Collections.singletonList(ExperimentalOptions.STATE_CACHE_SIZE + "=" + jobOptions.get(PythonOptions.STATE_CACHE_SIZE.key())));
}
Struct pipelineOptions = PipelineOptionsTranslation.toProto(portableOptions);
if (memoryManager != null && config.isUsingManagedMemory()) {
Preconditions.checkArgument(managedMemoryFraction > 0 && managedMemoryFraction <= 1.0, "The configured managed memory fraction for Python worker process must be within (0, 1], was: %s. " + "It may be because the consumer type \"Python\" was missing or set to 0 for the config option \"taskmanager.memory.managed.consumer-weights\"." + managedMemoryFraction);
final LongFunctionWithException<PythonSharedResources, Exception> initializer = (size) -> new PythonSharedResources(createJobBundleFactory(pipelineOptions), createPythonExecutionEnvironment(size));
sharedResources = memoryManager.getSharedMemoryResourceForManagedMemory(MANAGED_MEMORY_RESOURCE_ID, initializer, managedMemoryFraction);
LOG.info("Obtained shared Python process of size {} bytes", sharedResources.getSize());
sharedResources.getResourceHandle().addPythonEnvironmentManager(environmentManager);
JobBundleFactory jobBundleFactory = sharedResources.getResourceHandle().getJobBundleFactory();
RunnerApi.Environment environment = sharedResources.getResourceHandle().getEnvironment();
stageBundleFactory = createStageBundleFactory(jobBundleFactory, environment);
} else {
// there is no way to access the MemoryManager for the batch job of old planner,
// fallback to the way that spawning a Python process for each Python operator
jobBundleFactory = createJobBundleFactory(pipelineOptions);
stageBundleFactory = createStageBundleFactory(jobBundleFactory, createPythonExecutionEnvironment(-1));
}
progressHandler = getProgressHandler(flinkMetricContainer);
}
use of org.apache.beam.sdk.options.PortablePipelineOptions in project beam by apache.
the class SdkComponents method create.
public static SdkComponents create(PipelineOptions options) {
SdkComponents sdkComponents = new SdkComponents(RunnerApi.Components.getDefaultInstance(), null, "");
PortablePipelineOptions portablePipelineOptions = options.as(PortablePipelineOptions.class);
sdkComponents.registerEnvironment(Environments.createOrGetDefaultEnvironment(portablePipelineOptions));
return sdkComponents;
}
Aggregations