Search in sources :

Example 36 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class BeamCalcRelTest method testNoFieldAccess.

@Test
public void testNoFieldAccess() throws IllegalAccessException {
    String sql = "SELECT 1 FROM ORDER_DETAILS_BOUNDED";
    PCollection<Row> rows = compilePipeline(sql, pipeline);
    final NodeGetter nodeGetter = new NodeGetter(rows);
    pipeline.traverseTopologically(nodeGetter);
    ParDo.MultiOutput<Row, Row> pardo = (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
    PCollection<Row> input = (PCollection<Row>) Iterables.getOnlyElement(nodeGetter.producer.getInputs().values());
    DoFnSchemaInformation info = ParDo.getDoFnSchemaInformation(pardo.getFn(), input);
    FieldAccessDescriptor fieldAccess = info.getFieldAccessDescriptor();
    Assert.assertFalse(fieldAccess.getAllFields());
    Assert.assertTrue(fieldAccess.getFieldsAccessed().isEmpty());
    Assert.assertTrue(fieldAccess.getNestedFieldsAccessed().isEmpty());
    pipeline.run().waitUntilFinish();
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) ParDo(org.apache.beam.sdk.transforms.ParDo) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 37 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class BeamZetaSqlCalcRelTest method testSingleFieldAccess.

@Test
public void testSingleFieldAccess() throws IllegalAccessException {
    String sql = "SELECT Key FROM KeyValue";
    PCollection<Row> rows = compile(sql);
    final NodeGetter nodeGetter = new NodeGetter(rows);
    pipeline.traverseTopologically(nodeGetter);
    ParDo.MultiOutput<Row, Row> pardo = (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
    PCollection<Row> input = (PCollection<Row>) Iterables.getOnlyElement(nodeGetter.producer.getInputs().values());
    DoFnSchemaInformation info = ParDo.getDoFnSchemaInformation(pardo.getFn(), input);
    FieldAccessDescriptor fieldAccess = info.getFieldAccessDescriptor();
    Assert.assertTrue(fieldAccess.referencesSingleField());
    Assert.assertEquals("Key", Iterables.getOnlyElement(fieldAccess.fieldNamesAccessed()));
    pipeline.run().waitUntilFinish();
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) ParDo(org.apache.beam.sdk.transforms.ParDo) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 38 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class PubsubMessageToRowTest method testSendsFlatRowInvalidToDLQ.

@Test
public void testSendsFlatRowInvalidToDLQ() {
    Schema messageSchema = Schema.builder().addDateTimeField("event_timestamp").addInt32Field("id").addStringField("name").build();
    PCollectionTuple outputs = pipeline.apply("create", Create.timestamped(message(1, map("attr1", "val1"), "{ \"invalid1\" : \"sdfsd\" }"), message(2, map("attr2", "val2"), "{ \"invalid2"), message(3, map("attr", "val"), "{ \"id\" : 3, \"name\" : \"foo\" }"), message(4, map("bttr", "vbl"), "{ \"name\" : \"baz\", \"id\" : 5 }"))).apply("convert", PubsubMessageToRow.builder().messageSchema(messageSchema).useDlq(true).useFlatSchema(true).serializerProvider(JSON_SERIALIZER_PROVIDER).build());
    PCollection<Row> rows = outputs.get(MAIN_TAG);
    PCollection<PubsubMessage> dlqMessages = outputs.get(DLQ_TAG);
    PAssert.that(dlqMessages).satisfies(messages -> {
        assertEquals(2, size(messages));
        assertEquals(ImmutableSet.of(map("attr1", "val1"), map("attr2", "val2")), convertToSet(messages, PubsubMessage::getAttributeMap));
        assertEquals(ImmutableSet.of("{ \"invalid1\" : \"sdfsd\" }", "{ \"invalid2"), convertToSet(messages, m -> new String(m.getPayload(), UTF_8)));
        return null;
    });
    PAssert.that(rows).containsInAnyOrder(Row.withSchema(messageSchema).addValues(ts(3), /* map("attr", "val"), */
    3, "foo").build(), Row.withSchema(messageSchema).addValues(ts(4), /* map("bttr", "vbl"), */
    5, "baz").build());
    pipeline.run();
}
Also used : ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) MAIN_TAG(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.MAIN_TAG) Function(java.util.function.Function) ImmutableSet(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet) PayloadSerializers(org.apache.beam.sdk.schemas.io.payloads.PayloadSerializers) Iterables.size(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables.size) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) DLQ_TAG(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.DLQ_TAG) StreamSupport(java.util.stream.StreamSupport) Row(org.apache.beam.sdk.values.Row) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Collectors.toSet(java.util.stream.Collectors.toSet) PAssert(org.apache.beam.sdk.testing.PAssert) UTF_8(java.nio.charset.StandardCharsets.UTF_8) DateTime(org.joda.time.DateTime) Set(java.util.Set) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test) SerializerProvider(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.SerializerProvider) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Serializable(java.io.Serializable) List(java.util.List) Rule(org.junit.Rule) ATTRIBUTE_ARRAY_ENTRY_SCHEMA(org.apache.beam.sdk.io.gcp.pubsub.PubsubSchemaIOProvider.ATTRIBUTE_ARRAY_ENTRY_SCHEMA) Instant(org.joda.time.Instant) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Assert(org.junit.Assert) Assert.assertEquals(org.junit.Assert.assertEquals) Schema(org.apache.beam.sdk.schemas.Schema) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 39 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class PubsubReadIT method testReadPublicData.

@Test
public void testReadPublicData() throws Exception {
    // The pipeline will never terminate on its own
    pipeline.getOptions().as(TestPipelineOptions.class).setBlockOnRun(false);
    PCollection<String> messages = pipeline.apply(PubsubIO.readStrings().fromTopic("projects/pubsub-public-data/topics/taxirides-realtime"));
    messages.apply("waitForAnyMessage", signal.signalSuccessWhen(messages.getCoder(), anyMessages -> true));
    Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
    pipeline.apply(signal.signalStart());
    PipelineResult job = pipeline.run();
    start.get();
    signal.waitForSuccess(Duration.standardMinutes(5));
    // A runner may not support cancel
    try {
        job.cancel();
    } catch (UnsupportedOperationException exc) {
    // noop
    }
}
Also used : TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) PipelineResult(org.apache.beam.sdk.PipelineResult) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Set(java.util.Set) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Supplier(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Supplier) Rule(org.junit.Rule) Strings(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) PipelineResult(org.apache.beam.sdk.PipelineResult) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) Test(org.junit.Test)

Example 40 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class StreamingSourceMetricsTest method testUnboundedSourceMetrics.

@Test
@Category(StreamingTest.class)
public void testUnboundedSourceMetrics() {
    final long minElements = 1000;
    // Use a GenerateSequence for the UnboundedSequence, but push the watermark to infinity at
    // minElements to let the test pipeline cleanly shut it down.  Shutdown will occur shortly
    // afterwards, but at least minElements will be reported in the metrics.
    PCollection<Long> pc = pipeline.apply(GenerateSequence.from(1).withRate(minElements / 10, Duration.millis(500L)).withTimestampFn(t -> t < minElements ? Instant.now() : BoundedWindow.TIMESTAMP_MAX_VALUE));
    assertThat(pc.isBounded(), is(PCollection.IsBounded.UNBOUNDED));
    PipelineResult pipelineResult = pipeline.run();
    MetricQueryResults metrics = pipelineResult.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(ELEMENTS_READ.getNamespace(), ELEMENTS_READ.getName())).build());
    assertThat(metrics.getCounters(), hasItem(metricsResult(ELEMENTS_READ.getNamespace(), ELEMENTS_READ.getName(), "GenerateSequence/Read(UnboundedCountingSource)", greaterThanOrEqualTo(minElements), false)));
}
Also used : MetricName(org.apache.beam.sdk.metrics.MetricName) MetricNameFilter(org.apache.beam.sdk.metrics.MetricNameFilter) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) SourceMetrics(org.apache.beam.sdk.metrics.SourceMetrics) MetricResultsMatchers.metricsResult(org.apache.beam.sdk.metrics.MetricResultsMatchers.metricsResult) StreamingTest(org.apache.beam.runners.spark.StreamingTest) PipelineResult(org.apache.beam.sdk.PipelineResult) Duration(org.joda.time.Duration) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) Test(org.junit.Test) PCollection(org.apache.beam.sdk.values.PCollection) Category(org.junit.experimental.categories.Category) Serializable(java.io.Serializable) MetricsFilter(org.apache.beam.sdk.metrics.MetricsFilter) Source(org.apache.beam.sdk.io.Source) Matchers.hasItem(org.hamcrest.Matchers.hasItem) Rule(org.junit.Rule) MetricQueryResults(org.apache.beam.sdk.metrics.MetricQueryResults) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) Matchers.is(org.hamcrest.Matchers.is) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) MetricQueryResults(org.apache.beam.sdk.metrics.MetricQueryResults) PipelineResult(org.apache.beam.sdk.PipelineResult) Category(org.junit.experimental.categories.Category) StreamingTest(org.apache.beam.runners.spark.StreamingTest) Test(org.junit.Test)

Aggregations

PCollection (org.apache.beam.sdk.values.PCollection)198 Test (org.junit.Test)133 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)61 KV (org.apache.beam.sdk.values.KV)61 Map (java.util.Map)59 List (java.util.List)58 Rule (org.junit.Rule)57 RunWith (org.junit.runner.RunWith)54 PAssert (org.apache.beam.sdk.testing.PAssert)52 Instant (org.joda.time.Instant)46 Duration (org.joda.time.Duration)45 JUnit4 (org.junit.runners.JUnit4)45 ParDo (org.apache.beam.sdk.transforms.ParDo)44 TupleTag (org.apache.beam.sdk.values.TupleTag)42 Pipeline (org.apache.beam.sdk.Pipeline)41 Create (org.apache.beam.sdk.transforms.Create)41 ArrayList (java.util.ArrayList)40 Serializable (java.io.Serializable)39 PTransform (org.apache.beam.sdk.transforms.PTransform)37 Row (org.apache.beam.sdk.values.Row)37