use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class DirectRunnerTest method testUnencodableOutputElement.
@Test
public void testUnencodableOutputElement() throws Exception {
Pipeline p = getPipeline();
PCollection<Long> pcollection = p.apply(Create.of((Void) null)).apply(ParDo.of(new DoFn<Void, Long>() {
@ProcessElement
public void processElement(ProcessContext c) {
c.output(null);
}
})).setCoder(VarLongCoder.of());
pcollection.apply(ParDo.of(new DoFn<Long, Long>() {
@ProcessElement
public void unreachable(ProcessContext c) {
fail("Pipeline should fail to encode a null Long in VarLongCoder");
}
}));
thrown.expectCause(isA(CoderException.class));
thrown.expectMessage("cannot encode a null Long");
p.run();
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class DirectRunnerTest method testMutatingOutputThenOutputDoFnError.
/**
* Tests that a {@link DoFn} that mutates an output with a good equals() fails in the
* {@link DirectRunner}.
*/
@Test
public void testMutatingOutputThenOutputDoFnError() throws Exception {
Pipeline pipeline = getPipeline();
pipeline.apply(Create.of(42)).apply(ParDo.of(new DoFn<Integer, List<Integer>>() {
@ProcessElement
public void processElement(ProcessContext c) {
List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
c.output(outputList);
outputList.set(0, 37);
c.output(outputList);
}
}));
thrown.expect(IllegalMutationException.class);
thrown.expectMessage("output");
thrown.expectMessage("must not be mutated");
pipeline.run();
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class DirectRunnerTest method testMutatingOutputWithEnforcementDisabledSucceeds.
/**
* Tests that a {@link DoFn} that mutates an output with a good equals() fails in the
* {@link DirectRunner}.
*/
@Test
public void testMutatingOutputWithEnforcementDisabledSucceeds() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
options.setRunner(DirectRunner.class);
options.as(DirectOptions.class).setEnforceImmutability(false);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Create.of(42)).apply(ParDo.of(new DoFn<Integer, List<Integer>>() {
@ProcessElement
public void processElement(ProcessContext c) {
List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
c.output(outputList);
outputList.set(0, 37);
c.output(outputList);
}
}));
pipeline.run();
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class DirectRunnerTest method transformDisplayDataExceptionShouldFail.
@Test
public void transformDisplayDataExceptionShouldFail() {
DoFn<Integer, Integer> brokenDoFn = new DoFn<Integer, Integer>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
}
@Override
public void populateDisplayData(DisplayData.Builder builder) {
throw new RuntimeException("oh noes!");
}
};
Pipeline p = getPipeline();
p.apply(Create.of(1, 2, 3)).apply(ParDo.of(brokenDoFn));
thrown.expectMessage(brokenDoFn.getClass().getName());
thrown.expectCause(ThrowableMessageMatcher.hasMessage(is("oh noes!")));
p.run();
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class ResumeFromCheckpointStreamingTest method run.
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private SparkPipelineResult run(Optional<Instant> stopWatermarkOption, int expectedAssertions) {
KafkaIO.Read<String, Instant> read = KafkaIO.<String, Instant>read().withBootstrapServers(EMBEDDED_KAFKA_CLUSTER.getBrokerList()).withTopics(Collections.singletonList(TOPIC)).withKeyDeserializer(StringDeserializer.class).withValueDeserializer(InstantDeserializer.class).updateConsumerProperties(ImmutableMap.<String, Object>of("auto.offset.reset", "earliest")).withTimestampFn(new SerializableFunction<KV<String, Instant>, Instant>() {
@Override
public Instant apply(KV<String, Instant> kv) {
return kv.getValue();
}
}).withWatermarkFn(new SerializableFunction<KV<String, Instant>, Instant>() {
@Override
public Instant apply(KV<String, Instant> kv) {
// at EOF move WM to infinity.
String key = kv.getKey();
Instant instant = kv.getValue();
return key.equals("EOF") ? BoundedWindow.TIMESTAMP_MAX_VALUE : instant;
}
});
TestSparkPipelineOptions options = PipelineOptionsFactory.create().as(TestSparkPipelineOptions.class);
options.setSparkMaster("local[*]");
options.setCheckpointDurationMillis(options.getBatchIntervalMillis());
options.setExpectedAssertions(expectedAssertions);
options.setRunner(TestSparkRunner.class);
options.setEnableSparkMetricSinks(false);
options.setForceStreaming(true);
options.setCheckpointDir(temporaryFolder.getRoot().getPath());
// timeout is per execution so it can be injected by the caller.
if (stopWatermarkOption.isPresent()) {
options.setStopPipelineWatermark(stopWatermarkOption.get().getMillis());
}
Pipeline p = Pipeline.create(options);
PCollection<String> expectedCol = p.apply(Create.of(ImmutableList.of("side1", "side2")).withCoder(StringUtf8Coder.of()));
PCollectionView<List<String>> view = expectedCol.apply(View.<String>asList());
PCollection<KV<String, Instant>> kafkaStream = p.apply(read.withoutMetadata());
PCollection<Iterable<String>> grouped = kafkaStream.apply(Keys.<String>create()).apply("EOFShallNotPassFn", ParDo.of(new EOFShallNotPassFn(view)).withSideInputs(view)).apply(Window.<String>into(FixedWindows.of(Duration.millis(500))).triggering(AfterWatermark.pastEndOfWindow()).accumulatingFiredPanes().withAllowedLateness(Duration.ZERO)).apply(WithKeys.<Integer, String>of(1)).apply(GroupByKey.<Integer, String>create()).apply(Values.<Iterable<String>>create());
grouped.apply(new PAssertWithoutFlatten<>("k1", "k2", "k3", "k4", "k5"));
return (SparkPipelineResult) p.run();
}
Aggregations