use of com.google.api.services.dataflow.model.DataflowPackage in project beam by apache.
the class DataflowPipelineTranslatorTest method testNetworkConfig.
@Test
public void testNetworkConfig() throws IOException {
final String testNetwork = "test-network";
DataflowPipelineOptions options = buildPipelineOptions();
options.setNetwork(testNetwork);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertEquals(testNetwork, job.getEnvironment().getWorkerPools().get(0).getNetwork());
}
use of com.google.api.services.dataflow.model.DataflowPackage in project beam by apache.
the class DataflowPipelineTranslatorTest method testToIterableTranslationWithIsmSideInput.
@Test
public void testToIterableTranslationWithIsmSideInput() throws Exception {
// A "change detector" test that makes sure the translation
// of getting a PCollectionView<Iterable<T>> does not change
// in bad ways during refactor
DataflowPipelineOptions options = buildPipelineOptions();
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Create.of(1, 2, 3)).apply(View.<Integer>asIterable());
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceTransforms(pipeline);
Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
assertAllStepOutputsHaveUniqueIds(job);
List<Step> steps = job.getSteps();
assertEquals(3, steps.size());
@SuppressWarnings("unchecked") List<Map<String, Object>> toIsmRecordOutputs = (List<Map<String, Object>>) steps.get(1).getProperties().get(PropertyNames.OUTPUT_INFO);
assertTrue(Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format"));
Step collectionToSingletonStep = steps.get(2);
assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
}
use of com.google.api.services.dataflow.model.DataflowPackage in project beam by apache.
the class PackageUtil method createPackageAttributes.
/**
* Compute and cache the attributes of a classpath element that we will need to stage it.
*
* @param source the file or directory to be staged.
* @param stagingPath The base location for staged classpath elements.
* @param overridePackageName If non-null, use the given value as the package name
* instead of generating one automatically.
* @return a {@link PackageAttributes} that containing metadata about the object to be staged.
*/
static PackageAttributes createPackageAttributes(File source, String stagingPath, @Nullable String overridePackageName) {
boolean directory = source.isDirectory();
// Compute size and hash in one pass over file or directory.
Hasher hasher = Hashing.md5().newHasher();
OutputStream hashStream = Funnels.asOutputStream(hasher);
try (CountingOutputStream countingOutputStream = new CountingOutputStream(hashStream)) {
if (!directory) {
// Files are staged as-is.
Files.asByteSource(source).copyTo(countingOutputStream);
} else {
// Directories are recursively zipped.
ZipFiles.zipDirectory(source, countingOutputStream);
}
countingOutputStream.flush();
long size = countingOutputStream.getCount();
String hash = Base64Variants.MODIFIED_FOR_URL.encode(hasher.hash().asBytes());
// Create the DataflowPackage with staging name and location.
String uniqueName = getUniqueContentName(source, hash);
String resourcePath = FileSystems.matchNewResource(stagingPath, true).resolve(uniqueName, StandardResolveOptions.RESOLVE_FILE).toString();
DataflowPackage target = new DataflowPackage();
target.setName(overridePackageName != null ? overridePackageName : uniqueName);
target.setLocation(resourcePath);
return new PackageAttributes(size, hash, directory, target, source.getPath());
} catch (IOException e) {
throw new RuntimeException("Package setup failure for " + source, e);
}
}
use of com.google.api.services.dataflow.model.DataflowPackage in project beam by apache.
the class DataflowPipelineTranslatorTest method testNamesOverridden.
/**
* Test that in translation the name for a collection (in this case just a Create output) is
* overriden to be what the Dataflow service expects.
*/
@Test
public void testNamesOverridden() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowRunner runner = DataflowRunner.fromOptions(options);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply("Jazzy", Create.of(3)).setName("foobizzle");
runner.replaceTransforms(pipeline);
Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
// The Create step
Step step = job.getSteps().get(0);
// This is the name that is "set by the user" that the Dataflow translator must override
String userSpecifiedName = Structs.getString(Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null).get(0), PropertyNames.USER_NAME);
// This is the calculated name that must actually be used
String calculatedName = getString(step.getProperties(), PropertyNames.USER_NAME) + ".out0";
assertThat(userSpecifiedName, equalTo(calculatedName));
}
use of com.google.api.services.dataflow.model.DataflowPackage in project beam by apache.
the class DataflowPipelineTranslatorTest method testSubnetworkConfigMissing.
@Test
public void testSubnetworkConfigMissing() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertNull(job.getEnvironment().getWorkerPools().get(0).getSubnetwork());
}
Aggregations