use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class WithFailuresTest method testDirectException.
@Test
@Category(NeedsRunner.class)
public void testDirectException() {
List<PCollection<KV<Integer, EncodableThrowable>>> errorCollections = new ArrayList<>();
PCollection<Integer> output = pipeline.apply(Create.of(0, 1)).apply(MapElements.into(TypeDescriptors.integers()).via((Integer i) -> 1 / i).exceptionsVia(new ThrowableHandler<Integer>() {
})).failuresTo(errorCollections);
PAssert.that(output).containsInAnyOrder(1);
PAssert.thatSingleton(PCollectionList.of(errorCollections).apply(Flatten.pCollections())).satisfies(kv -> {
assertEquals(Integer.valueOf(0), kv.getKey());
Throwable throwable = kv.getValue().throwable();
assertEquals("java.lang.ArithmeticException", throwable.getClass().getName());
assertEquals("/ by zero", throwable.getMessage());
return null;
});
pipeline.run();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class PTransformTest method testPopulateDisplayDataDefaultBehavior.
@Test
public void testPopulateDisplayDataDefaultBehavior() {
PTransform<PCollection<String>, PCollection<String>> transform = new PTransform<PCollection<String>, PCollection<String>>() {
@Override
public PCollection<String> expand(PCollection<String> begin) {
throw new IllegalArgumentException("Should never be applied");
}
};
DisplayData displayData = DisplayData.from(transform);
assertThat(displayData.items(), empty());
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class ParDoSchemaTest method testRowBagState.
@Test
@Category({ NeedsRunner.class, UsesStatefulParDo.class })
public void testRowBagState() {
final String stateId = "foo";
Schema type = Stream.of(Schema.Field.of("f_string", FieldType.STRING)).collect(Schema.toSchema());
Schema outputType = Schema.of(Field.of("values", FieldType.array(FieldType.row(type))));
DoFn<KV<String, Row>, Row> fn = new DoFn<KV<String, Row>, Row>() {
@StateId(stateId)
private final StateSpec<BagState<Row>> bufferState = StateSpecs.rowBag(type);
@ProcessElement
public void processElement(@Element KV<String, Row> element, @StateId(stateId) BagState<Row> state, OutputReceiver<Row> o) {
state.add(element.getValue());
Iterable<Row> currentValue = state.read();
if (Iterables.size(currentValue) >= 4) {
List<Row> sorted = Lists.newArrayList(currentValue);
Collections.sort(sorted, Comparator.comparing(r -> r.getString(0)));
o.output(Row.withSchema(outputType).addArray(sorted).build());
}
}
};
PCollection<Row> output = pipeline.apply(Create.of(KV.of("hello", Row.withSchema(type).addValue("a").build()), KV.of("hello", Row.withSchema(type).addValue("b").build()), KV.of("hello", Row.withSchema(type).addValue("c").build()), KV.of("hello", Row.withSchema(type).addValue("d").build()))).apply(ParDo.of(fn)).setRowSchema(outputType);
PAssert.that(output).containsInAnyOrder(Row.withSchema(outputType).addArray(Lists.newArrayList(Row.withSchema(type).addValue("a").build(), Row.withSchema(type).addValue("b").build(), Row.withSchema(type).addValue("c").build(), Row.withSchema(type).addValue("d").build())).build());
pipeline.run();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class ReshuffleTest method testReshufflePreservesTimestamps.
/**
* Tests that timestamps are preserved after applying a {@link Reshuffle} with the default {@link
* WindowingStrategy}.
*/
@Test
@Category(ValidatesRunner.class)
public void testReshufflePreservesTimestamps() {
PCollection<KV<String, TimestampedValue<String>>> input = pipeline.apply(Create.timestamped(TimestampedValue.of("foo", BoundedWindow.TIMESTAMP_MIN_VALUE), TimestampedValue.of("foo", new Instant(0)), TimestampedValue.of("bar", new Instant(33)), TimestampedValue.of("bar", GlobalWindow.INSTANCE.maxTimestamp())).withCoder(StringUtf8Coder.of())).apply(WithKeys.<String, String>of(input12 -> input12).withKeyType(TypeDescriptors.strings())).apply("ReifyOriginalTimestamps", Reify.timestampsInValue());
// The outer TimestampedValue is the reified timestamp post-reshuffle. The inner
// TimestampedValue is the pre-reshuffle timestamp.
PCollection<TimestampedValue<TimestampedValue<String>>> output = input.apply(Reshuffle.of()).apply("ReifyReshuffledTimestamps", Reify.timestampsInValue()).apply(Values.create());
PAssert.that(output).satisfies(input1 -> {
for (TimestampedValue<TimestampedValue<String>> elem : input1) {
Instant originalTimestamp = elem.getValue().getTimestamp();
Instant afterReshuffleTimestamp = elem.getTimestamp();
assertThat("Reshuffle must preserve element timestamps", afterReshuffleTimestamp, equalTo(originalTimestamp));
}
return null;
});
pipeline.run();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class ReifyTimestampsTest method inValuesSucceeds.
@Test
@Category(ValidatesRunner.class)
public void inValuesSucceeds() {
PCollection<KV<String, Integer>> timestamped = pipeline.apply(Create.of(KV.of("foo", 0), KV.of("foo", 1), KV.of("bar", 2), KV.of("baz", 3))).apply(WithTimestamps.of(input -> new Instant(input.getValue().longValue())));
PCollection<KV<String, TimestampedValue<Integer>>> reified = timestamped.apply(ReifyTimestamps.inValues());
PAssert.that(reified).containsInAnyOrder(KV.of("foo", TimestampedValue.of(0, new Instant(0))), KV.of("foo", TimestampedValue.of(1, new Instant(1))), KV.of("bar", TimestampedValue.of(2, new Instant(2))), KV.of("baz", TimestampedValue.of(3, new Instant(3))));
pipeline.run();
}
Aggregations