Search in sources :

Example 66 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class WithFailuresTest method testDirectException.

@Test
@Category(NeedsRunner.class)
public void testDirectException() {
    List<PCollection<KV<Integer, EncodableThrowable>>> errorCollections = new ArrayList<>();
    PCollection<Integer> output = pipeline.apply(Create.of(0, 1)).apply(MapElements.into(TypeDescriptors.integers()).via((Integer i) -> 1 / i).exceptionsVia(new ThrowableHandler<Integer>() {
    })).failuresTo(errorCollections);
    PAssert.that(output).containsInAnyOrder(1);
    PAssert.thatSingleton(PCollectionList.of(errorCollections).apply(Flatten.pCollections())).satisfies(kv -> {
        assertEquals(Integer.valueOf(0), kv.getKey());
        Throwable throwable = kv.getValue().throwable();
        assertEquals("java.lang.ArithmeticException", throwable.getClass().getName());
        assertEquals("/ by zero", throwable.getMessage());
        return null;
    });
    pipeline.run();
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) EncodableThrowable(org.apache.beam.sdk.values.EncodableThrowable) ArrayList(java.util.ArrayList) EncodableThrowable(org.apache.beam.sdk.values.EncodableThrowable) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 67 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class PTransformTest method testPopulateDisplayDataDefaultBehavior.

@Test
public void testPopulateDisplayDataDefaultBehavior() {
    PTransform<PCollection<String>, PCollection<String>> transform = new PTransform<PCollection<String>, PCollection<String>>() {

        @Override
        public PCollection<String> expand(PCollection<String> begin) {
            throw new IllegalArgumentException("Should never be applied");
        }
    };
    DisplayData displayData = DisplayData.from(transform);
    assertThat(displayData.items(), empty());
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) Test(org.junit.Test)

Example 68 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class ParDoSchemaTest method testRowBagState.

@Test
@Category({ NeedsRunner.class, UsesStatefulParDo.class })
public void testRowBagState() {
    final String stateId = "foo";
    Schema type = Stream.of(Schema.Field.of("f_string", FieldType.STRING)).collect(Schema.toSchema());
    Schema outputType = Schema.of(Field.of("values", FieldType.array(FieldType.row(type))));
    DoFn<KV<String, Row>, Row> fn = new DoFn<KV<String, Row>, Row>() {

        @StateId(stateId)
        private final StateSpec<BagState<Row>> bufferState = StateSpecs.rowBag(type);

        @ProcessElement
        public void processElement(@Element KV<String, Row> element, @StateId(stateId) BagState<Row> state, OutputReceiver<Row> o) {
            state.add(element.getValue());
            Iterable<Row> currentValue = state.read();
            if (Iterables.size(currentValue) >= 4) {
                List<Row> sorted = Lists.newArrayList(currentValue);
                Collections.sort(sorted, Comparator.comparing(r -> r.getString(0)));
                o.output(Row.withSchema(outputType).addArray(sorted).build());
            }
        }
    };
    PCollection<Row> output = pipeline.apply(Create.of(KV.of("hello", Row.withSchema(type).addValue("a").build()), KV.of("hello", Row.withSchema(type).addValue("b").build()), KV.of("hello", Row.withSchema(type).addValue("c").build()), KV.of("hello", Row.withSchema(type).addValue("d").build()))).apply(ParDo.of(fn)).setRowSchema(outputType);
    PAssert.that(output).containsInAnyOrder(Row.withSchema(outputType).addArray(Lists.newArrayList(Row.withSchema(type).addValue("a").build(), Row.withSchema(type).addValue("b").build(), Row.withSchema(type).addValue("c").build(), Row.withSchema(type).addValue("d").build())).build());
    pipeline.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) Arrays(java.util.Arrays) UsesStatefulParDo(org.apache.beam.sdk.testing.UsesStatefulParDo) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) NoSuchSchemaException(org.apache.beam.sdk.schemas.NoSuchSchemaException) SetState(org.apache.beam.sdk.state.SetState) RunWith(org.junit.runner.RunWith) TupleTagList(org.apache.beam.sdk.values.TupleTagList) UsesMapState(org.apache.beam.sdk.testing.UsesMapState) UsesSetState(org.apache.beam.sdk.testing.UsesSetState) AutoValueSchema(org.apache.beam.sdk.schemas.AutoValueSchema) MapState(org.apache.beam.sdk.state.MapState) TupleTag(org.apache.beam.sdk.values.TupleTag) Map(java.util.Map) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) ValidatesRunner(org.apache.beam.sdk.testing.ValidatesRunner) Row(org.apache.beam.sdk.values.Row) ExpectedException(org.junit.rules.ExpectedException) Field(org.apache.beam.sdk.schemas.Schema.Field) PAssert(org.apache.beam.sdk.testing.PAssert) Assert.assertTrue(org.junit.Assert.assertTrue) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) NeedsRunner(org.apache.beam.sdk.testing.NeedsRunner) PCollection(org.apache.beam.sdk.values.PCollection) Category(org.junit.experimental.categories.Category) Collectors(java.util.stream.Collectors) Schema(org.apache.beam.sdk.schemas.Schema) Serializable(java.io.Serializable) List(java.util.List) BagState(org.apache.beam.sdk.state.BagState) StateSpecs(org.apache.beam.sdk.state.StateSpecs) Stream(java.util.stream.Stream) Rule(org.junit.Rule) CombiningState(org.apache.beam.sdk.state.CombiningState) AutoValue(com.google.auto.value.AutoValue) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) UsesSchema(org.apache.beam.sdk.testing.UsesSchema) DefaultSchema(org.apache.beam.sdk.schemas.annotations.DefaultSchema) Comparator(java.util.Comparator) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) AutoValueSchema(org.apache.beam.sdk.schemas.AutoValueSchema) Schema(org.apache.beam.sdk.schemas.Schema) UsesSchema(org.apache.beam.sdk.testing.UsesSchema) DefaultSchema(org.apache.beam.sdk.schemas.annotations.DefaultSchema) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) Row(org.apache.beam.sdk.values.Row) BagState(org.apache.beam.sdk.state.BagState) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 69 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class ReshuffleTest method testReshufflePreservesTimestamps.

/**
 * Tests that timestamps are preserved after applying a {@link Reshuffle} with the default {@link
 * WindowingStrategy}.
 */
@Test
@Category(ValidatesRunner.class)
public void testReshufflePreservesTimestamps() {
    PCollection<KV<String, TimestampedValue<String>>> input = pipeline.apply(Create.timestamped(TimestampedValue.of("foo", BoundedWindow.TIMESTAMP_MIN_VALUE), TimestampedValue.of("foo", new Instant(0)), TimestampedValue.of("bar", new Instant(33)), TimestampedValue.of("bar", GlobalWindow.INSTANCE.maxTimestamp())).withCoder(StringUtf8Coder.of())).apply(WithKeys.<String, String>of(input12 -> input12).withKeyType(TypeDescriptors.strings())).apply("ReifyOriginalTimestamps", Reify.timestampsInValue());
    // The outer TimestampedValue is the reified timestamp post-reshuffle. The inner
    // TimestampedValue is the pre-reshuffle timestamp.
    PCollection<TimestampedValue<TimestampedValue<String>>> output = input.apply(Reshuffle.of()).apply("ReifyReshuffledTimestamps", Reify.timestampsInValue()).apply(Values.create());
    PAssert.that(output).satisfies(input1 -> {
        for (TimestampedValue<TimestampedValue<String>> elem : input1) {
            Instant originalTimestamp = elem.getValue().getTimestamp();
            Instant afterReshuffleTimestamp = elem.getTimestamp();
            assertThat("Reshuffle must preserve element timestamps", afterReshuffleTimestamp, equalTo(originalTimestamp));
        }
        return null;
    });
    pipeline.run();
}
Also used : TypeDescriptors.integers(org.apache.beam.sdk.values.TypeDescriptors.integers) KV(org.apache.beam.sdk.values.KV) IsIterableContainingInAnyOrder.containsInAnyOrder(org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Sessions(org.apache.beam.sdk.transforms.windowing.Sessions) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Is.is(org.hamcrest.core.Is.is) Window(org.apache.beam.sdk.transforms.windowing.Window) ValidatesRunner(org.apache.beam.sdk.testing.ValidatesRunner) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) KvCoder(org.apache.beam.sdk.coders.KvCoder) KvMatcher.isKv(org.apache.beam.sdk.TestUtils.KvMatcher.isKv) PAssert(org.apache.beam.sdk.testing.PAssert) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) PCollection(org.apache.beam.sdk.values.PCollection) Category(org.junit.experimental.categories.Category) Serializable(java.io.Serializable) UsesTestStream(org.apache.beam.sdk.testing.UsesTestStream) AssignShardFn(org.apache.beam.sdk.transforms.Reshuffle.AssignShardFn) List(java.util.List) Rule(org.junit.Rule) Matchers.equalTo(org.hamcrest.Matchers.equalTo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) Instant(org.joda.time.Instant) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) TestStream(org.apache.beam.sdk.testing.TestStream) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Assert.assertEquals(org.junit.Assert.assertEquals) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 70 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class ReifyTimestampsTest method inValuesSucceeds.

@Test
@Category(ValidatesRunner.class)
public void inValuesSucceeds() {
    PCollection<KV<String, Integer>> timestamped = pipeline.apply(Create.of(KV.of("foo", 0), KV.of("foo", 1), KV.of("bar", 2), KV.of("baz", 3))).apply(WithTimestamps.of(input -> new Instant(input.getValue().longValue())));
    PCollection<KV<String, TimestampedValue<Integer>>> reified = timestamped.apply(ReifyTimestamps.inValues());
    PAssert.that(reified).containsInAnyOrder(KV.of("foo", TimestampedValue.of(0, new Instant(0))), KV.of("foo", TimestampedValue.of(1, new Instant(1))), KV.of("bar", TimestampedValue.of(2, new Instant(2))), KV.of("baz", TimestampedValue.of(3, new Instant(3))));
    pipeline.run();
}
Also used : KV(org.apache.beam.sdk.values.KV) PAssert(org.apache.beam.sdk.testing.PAssert) RunWith(org.junit.runner.RunWith) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Category(org.junit.experimental.categories.Category) Serializable(java.io.Serializable) Rule(org.junit.Rule) Matchers.equalTo(org.hamcrest.Matchers.equalTo) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Instant(org.joda.time.Instant) ValidatesRunner(org.apache.beam.sdk.testing.ValidatesRunner) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

PCollection (org.apache.beam.sdk.values.PCollection)199 Test (org.junit.Test)133 KV (org.apache.beam.sdk.values.KV)62 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)61 Map (java.util.Map)59 List (java.util.List)58 Rule (org.junit.Rule)57 RunWith (org.junit.runner.RunWith)54 PAssert (org.apache.beam.sdk.testing.PAssert)52 Instant (org.joda.time.Instant)46 Duration (org.joda.time.Duration)45 JUnit4 (org.junit.runners.JUnit4)45 ParDo (org.apache.beam.sdk.transforms.ParDo)44 TupleTag (org.apache.beam.sdk.values.TupleTag)42 Pipeline (org.apache.beam.sdk.Pipeline)41 Create (org.apache.beam.sdk.transforms.Create)41 ArrayList (java.util.ArrayList)40 Serializable (java.io.Serializable)39 PTransform (org.apache.beam.sdk.transforms.PTransform)37 Row (org.apache.beam.sdk.values.Row)37