Search in sources :

Example 1 with WriteFiles

use of org.apache.beam.sdk.io.WriteFiles in project beam by apache.

the class FlinkTransformOverridesTest method testRunnerDeterminedSharding.

@Test
public void testRunnerDeterminedSharding() {
    FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
    options.setRunner(TestFlinkRunner.class);
    options.setFlinkMaster("[auto]");
    options.setParallelism(5);
    TestPipeline p = TestPipeline.fromOptions(options);
    StreamingShardedWriteFactory<Object, Void, Object> factory = new StreamingShardedWriteFactory<>(p.getOptions());
    WriteFiles<Object, Void, Object> original = WriteFiles.to(new TestSink(tmpFolder.toString()));
    @SuppressWarnings("unchecked") PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of()));
    AppliedPTransform<PCollection<Object>, WriteFilesResult<Void>, WriteFiles<Object, Void, Object>> originalApplication = AppliedPTransform.of("writefiles", PValues.expandInput(objs), Collections.emptyMap(), original, ResourceHints.create(), p);
    WriteFiles<Object, Void, Object> replacement = (WriteFiles<Object, Void, Object>) factory.getReplacementTransform(originalApplication).getTransform();
    assertThat(replacement, not(equalTo((Object) original)));
    assertThat(replacement.getNumShardsProvider().get(), is(10));
}
Also used : StreamingShardedWriteFactory(org.apache.beam.runners.flink.FlinkStreamingPipelineTranslator.StreamingShardedWriteFactory) WriteFilesResult(org.apache.beam.sdk.io.WriteFilesResult) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) PCollection(org.apache.beam.sdk.values.PCollection) WriteFiles(org.apache.beam.sdk.io.WriteFiles) Test(org.junit.Test)

Example 2 with WriteFiles

use of org.apache.beam.sdk.io.WriteFiles in project beam by apache.

the class DataflowRunnerTest method testStreamingWriteOverride.

private void testStreamingWriteOverride(PipelineOptions options, int expectedNumShards) {
    TestPipeline p = TestPipeline.fromOptions(options);
    StreamingShardedWriteFactory<Object, Void, Object> factory = new StreamingShardedWriteFactory<>(p.getOptions());
    WriteFiles<Object, Void, Object> original = WriteFiles.to(new TestSink(tmpFolder.toString()));
    PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of()));
    AppliedPTransform<PCollection<Object>, WriteFilesResult<Void>, WriteFiles<Object, Void, Object>> originalApplication = AppliedPTransform.of("writefiles", PValues.expandInput(objs), Collections.emptyMap(), original, ResourceHints.create(), p);
    WriteFiles<Object, Void, Object> replacement = (WriteFiles<Object, Void, Object>) factory.getReplacementTransform(originalApplication).getTransform();
    assertThat(replacement, not(equalTo((Object) original)));
    assertThat(replacement.getNumShardsProvider().get(), equalTo(expectedNumShards));
    WriteFilesResult<Void> originalResult = objs.apply(original);
    WriteFilesResult<Void> replacementResult = objs.apply(replacement);
    Map<PCollection<?>, ReplacementOutput> res = factory.mapOutputs(PValues.expandOutput(originalResult), replacementResult);
    assertEquals(1, res.size());
    assertEquals(originalResult.getPerDestinationOutputFilenames(), res.get(replacementResult.getPerDestinationOutputFilenames()).getOriginal().getValue());
}
Also used : StreamingShardedWriteFactory(org.apache.beam.runners.dataflow.DataflowRunner.StreamingShardedWriteFactory) WriteFilesResult(org.apache.beam.sdk.io.WriteFilesResult) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) PCollection(org.apache.beam.sdk.values.PCollection) ReplacementOutput(org.apache.beam.sdk.runners.PTransformOverrideFactory.ReplacementOutput) StorageObject(com.google.api.services.storage.model.StorageObject) WriteFiles(org.apache.beam.sdk.io.WriteFiles)

Example 3 with WriteFiles

use of org.apache.beam.sdk.io.WriteFiles in project beam by apache.

the class TransformTreeTest method testCompositeCapture.

@Test
public void testCompositeCapture() throws Exception {
    p.enableAbandonedNodeEnforcement(false);
    File inputFile = tmpFolder.newFile();
    File outputFile = tmpFolder.newFile();
    final PTransform<PCollection<String>, PCollection<Iterable<String>>> sample = Sample.fixedSizeGlobally(10);
    p.apply("ReadMyFile", TextIO.read().from(inputFile.getPath())).apply(sample).apply(Flatten.iterables()).apply("WriteMyFile", TextIO.write().to(outputFile.getPath()));
    final EnumSet<TransformsSeen> visited = EnumSet.noneOf(TransformsSeen.class);
    final EnumSet<TransformsSeen> left = EnumSet.noneOf(TransformsSeen.class);
    p.traverseTopologically(new Pipeline.PipelineVisitor.Defaults() {

        @Override
        public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) {
            if (node.isRootNode()) {
                return CompositeBehavior.ENTER_TRANSFORM;
            }
            PTransform<?, ?> transform = node.getTransform();
            if (sample.getClass().equals(transform.getClass())) {
                assertTrue(visited.add(TransformsSeen.SAMPLE));
                assertNotNull(node.getEnclosingNode());
                assertTrue(node.isCompositeNode());
            } else if (transform instanceof WriteFiles) {
                assertTrue(visited.add(TransformsSeen.WRITE));
                assertNotNull(node.getEnclosingNode());
                assertTrue(node.isCompositeNode());
            }
            assertThat(transform, not(instanceOf(Impulse.class)));
            return CompositeBehavior.ENTER_TRANSFORM;
        }

        @Override
        public void leaveCompositeTransform(TransformHierarchy.Node node) {
            PTransform<?, ?> transform = node.getTransform();
            if (!node.isRootNode() && transform.getClass().equals(sample.getClass())) {
                assertTrue(left.add(TransformsSeen.SAMPLE));
            }
        }

        @Override
        public void visitPrimitiveTransform(TransformHierarchy.Node node) {
            PTransform<?, ?> transform = node.getTransform();
            // Composites should not be visited here.
            assertThat(transform, not(instanceOf(Combine.Globally.class)));
            assertThat(transform, not(instanceOf(WriteFiles.class)));
            assertThat(transform, not(instanceOf(TextIO.Read.class)));
            // There are multiple impulses in the graph so we don't validate that we haven't
            // seen one before.
            visited.add(TransformsSeen.IMPULSE);
        }
    });
    assertEquals(visited, EnumSet.allOf(TransformsSeen.class));
    assertEquals(left, EnumSet.of(TransformsSeen.SAMPLE));
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) WriteFiles(org.apache.beam.sdk.io.WriteFiles) File(java.io.File) Impulse(org.apache.beam.sdk.transforms.Impulse) PTransform(org.apache.beam.sdk.transforms.PTransform) Test(org.junit.Test)

Aggregations

WriteFiles (org.apache.beam.sdk.io.WriteFiles)3 PCollection (org.apache.beam.sdk.values.PCollection)3 WriteFilesResult (org.apache.beam.sdk.io.WriteFilesResult)2 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)2 Test (org.junit.Test)2 StorageObject (com.google.api.services.storage.model.StorageObject)1 File (java.io.File)1 StreamingShardedWriteFactory (org.apache.beam.runners.dataflow.DataflowRunner.StreamingShardedWriteFactory)1 StreamingShardedWriteFactory (org.apache.beam.runners.flink.FlinkStreamingPipelineTranslator.StreamingShardedWriteFactory)1 ReplacementOutput (org.apache.beam.sdk.runners.PTransformOverrideFactory.ReplacementOutput)1 Impulse (org.apache.beam.sdk.transforms.Impulse)1 PTransform (org.apache.beam.sdk.transforms.PTransform)1