use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class DataflowGroupByKeyTest method testGroupByKeyServiceUnbounded.
@Test
public void testGroupByKeyServiceUnbounded() {
Pipeline p = createTestServiceRunner();
PCollection<KV<String, Integer>> input = p.apply(new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
@Override
public PCollection<KV<String, Integer>> expand(PBegin input) {
return PCollection.<KV<String, Integer>>createPrimitiveOutputInternal(input.getPipeline(), WindowingStrategy.globalDefault(), PCollection.IsBounded.UNBOUNDED).setTypeDescriptor(new TypeDescriptor<KV<String, Integer>>() {
});
}
});
thrown.expect(IllegalStateException.class);
thrown.expectMessage("GroupByKey cannot be applied to non-bounded PCollection in the GlobalWindow without " + "a trigger. Use a Window.into or Window.triggering transform prior to GroupByKey.");
input.apply("GroupByKey", GroupByKey.<String, Integer>create());
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class WordCount method main.
public static void main(String[] args) {
WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(WordCountOptions.class);
Pipeline p = Pipeline.create(options);
// Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the
// static FormatAsTextFn() to the ParDo transform.
p.apply("ReadLines", TextIO.read().from(options.getInputFile())).apply(new CountWords()).apply(MapElements.via(new FormatAsTextFn())).apply("WriteCounts", TextIO.write().to(options.getOutput()));
p.run().waitUntilFinish();
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class TestDataflowRunnerTest method testStreamingOnCreateMatcher.
@Test
public void testStreamingOnCreateMatcher() throws Exception {
options.setStreaming(true);
Pipeline p = TestPipeline.create(options);
PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
PAssert.that(pc).containsInAnyOrder(1, 2, 3);
final DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
when(mockJob.getState()).thenReturn(State.DONE);
when(mockJob.getProjectId()).thenReturn("test-project");
when(mockJob.getJobId()).thenReturn("test-job");
DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
options.as(TestPipelineOptions.class).setOnCreateMatcher(new TestSuccessMatcher(mockJob, 0));
when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class))).thenReturn(State.DONE);
when(mockClient.getJobMetrics(anyString())).thenReturn(generateMockMetricResponse(true, /* success */
true));
runner.run(p, mockRunner);
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class TestDataflowRunnerTest method testRunBatchJobThatFails.
/**
* Tests that when a batch job terminates in a failure state even if all assertions
* passed, it throws an error to that effect.
*/
@Test
public void testRunBatchJobThatFails() throws Exception {
Pipeline p = TestPipeline.create(options);
PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
PAssert.that(pc).containsInAnyOrder(1, 2, 3);
DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
when(mockJob.getState()).thenReturn(State.FAILED);
when(mockJob.getProjectId()).thenReturn("test-project");
when(mockJob.getJobId()).thenReturn("test-job");
DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
when(mockClient.getJobMetrics(anyString())).thenReturn(generateMockMetricResponse(true, /* success */
false));
expectedException.expect(RuntimeException.class);
runner.run(p, mockRunner);
// Note that fail throws an AssertionError which is why it is placed out here
// instead of inside the try-catch block.
fail("AssertionError expected");
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class TestDataflowRunnerTest method testStreamingOnSuccessMatcherWhenPipelineSucceeds.
/**
* Tests that when a streaming pipeline terminates and doesn't fail due to {@link PAssert} that
* the {@link TestPipelineOptions#setOnSuccessMatcher(SerializableMatcher) on success matcher} is
* invoked.
*/
@Test
public void testStreamingOnSuccessMatcherWhenPipelineSucceeds() throws Exception {
options.setStreaming(true);
Pipeline p = TestPipeline.create(options);
PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
PAssert.that(pc).containsInAnyOrder(1, 2, 3);
final DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
when(mockJob.getState()).thenReturn(State.DONE);
when(mockJob.getProjectId()).thenReturn("test-project");
when(mockJob.getJobId()).thenReturn("test-job");
DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
options.as(TestPipelineOptions.class).setOnSuccessMatcher(new TestSuccessMatcher(mockJob, 1));
when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class))).thenReturn(State.DONE);
when(mockClient.getJobMetrics(anyString())).thenReturn(generateMockMetricResponse(true, /* success */
true));
runner.run(p, mockRunner);
}
Aggregations