Search in sources :

Example 1 with SerializerProvider

use of org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.SerializerProvider in project beam by apache.

the class PubsubMessageToRowTest method testSendsFlatRowInvalidToDLQ.

@Test
public void testSendsFlatRowInvalidToDLQ() {
    Schema messageSchema = Schema.builder().addDateTimeField("event_timestamp").addInt32Field("id").addStringField("name").build();
    PCollectionTuple outputs = pipeline.apply("create", Create.timestamped(message(1, map("attr1", "val1"), "{ \"invalid1\" : \"sdfsd\" }"), message(2, map("attr2", "val2"), "{ \"invalid2"), message(3, map("attr", "val"), "{ \"id\" : 3, \"name\" : \"foo\" }"), message(4, map("bttr", "vbl"), "{ \"name\" : \"baz\", \"id\" : 5 }"))).apply("convert", PubsubMessageToRow.builder().messageSchema(messageSchema).useDlq(true).useFlatSchema(true).serializerProvider(JSON_SERIALIZER_PROVIDER).build());
    PCollection<Row> rows = outputs.get(MAIN_TAG);
    PCollection<PubsubMessage> dlqMessages = outputs.get(DLQ_TAG);
    PAssert.that(dlqMessages).satisfies(messages -> {
        assertEquals(2, size(messages));
        assertEquals(ImmutableSet.of(map("attr1", "val1"), map("attr2", "val2")), convertToSet(messages, PubsubMessage::getAttributeMap));
        assertEquals(ImmutableSet.of("{ \"invalid1\" : \"sdfsd\" }", "{ \"invalid2"), convertToSet(messages, m -> new String(m.getPayload(), UTF_8)));
        return null;
    });
    PAssert.that(rows).containsInAnyOrder(Row.withSchema(messageSchema).addValues(ts(3), /* map("attr", "val"), */
    3, "foo").build(), Row.withSchema(messageSchema).addValues(ts(4), /* map("bttr", "vbl"), */
    5, "baz").build());
    pipeline.run();
}
Also used : ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) MAIN_TAG(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.MAIN_TAG) Function(java.util.function.Function) ImmutableSet(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet) PayloadSerializers(org.apache.beam.sdk.schemas.io.payloads.PayloadSerializers) Iterables.size(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables.size) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) DLQ_TAG(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.DLQ_TAG) StreamSupport(java.util.stream.StreamSupport) Row(org.apache.beam.sdk.values.Row) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Collectors.toSet(java.util.stream.Collectors.toSet) PAssert(org.apache.beam.sdk.testing.PAssert) UTF_8(java.nio.charset.StandardCharsets.UTF_8) DateTime(org.joda.time.DateTime) Set(java.util.Set) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test) SerializerProvider(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.SerializerProvider) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Serializable(java.io.Serializable) List(java.util.List) Rule(org.junit.Rule) ATTRIBUTE_ARRAY_ENTRY_SCHEMA(org.apache.beam.sdk.io.gcp.pubsub.PubsubSchemaIOProvider.ATTRIBUTE_ARRAY_ENTRY_SCHEMA) Instant(org.joda.time.Instant) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Assert(org.junit.Assert) Assert.assertEquals(org.junit.Assert.assertEquals) Schema(org.apache.beam.sdk.schemas.Schema) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 2 with SerializerProvider

use of org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.SerializerProvider in project beam by apache.

the class PubsubMessageToRowTest method testSendsInvalidToDLQ.

@Test
public void testSendsInvalidToDLQ() {
    Schema payloadSchema = Schema.builder().addInt32Field("id").addStringField("name").build();
    Schema messageSchema = Schema.builder().addDateTimeField("event_timestamp").addMapField("attributes", FieldType.STRING, FieldType.STRING).addRowField("payload", payloadSchema).build();
    PCollectionTuple outputs = pipeline.apply("create", Create.timestamped(message(1, map("attr1", "val1"), "{ \"invalid1\" : \"sdfsd\" }"), message(2, map("attr2", "val2"), "{ \"invalid2"), message(3, map("attr", "val"), "{ \"id\" : 3, \"name\" : \"foo\" }"), message(4, map("bttr", "vbl"), "{ \"name\" : \"baz\", \"id\" : 5 }"))).apply("convert", PubsubMessageToRow.builder().messageSchema(messageSchema).useDlq(true).useFlatSchema(false).serializerProvider(JSON_SERIALIZER_PROVIDER).build());
    PCollection<Row> rows = outputs.get(MAIN_TAG);
    PCollection<PubsubMessage> dlqMessages = outputs.get(DLQ_TAG);
    PAssert.that(dlqMessages).satisfies(messages -> {
        assertEquals(2, size(messages));
        assertEquals(ImmutableSet.of(map("attr1", "val1"), map("attr2", "val2")), convertToSet(messages, PubsubMessage::getAttributeMap));
        assertEquals(ImmutableSet.of("{ \"invalid1\" : \"sdfsd\" }", "{ \"invalid2"), convertToSet(messages, m -> new String(m.getPayload(), UTF_8)));
        return null;
    });
    PAssert.that(rows).containsInAnyOrder(Row.withSchema(messageSchema).addValues(ts(3), map("attr", "val"), row(payloadSchema, 3, "foo")).build(), Row.withSchema(messageSchema).addValues(ts(4), map("bttr", "vbl"), row(payloadSchema, 5, "baz")).build());
    pipeline.run();
}
Also used : ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) MAIN_TAG(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.MAIN_TAG) Function(java.util.function.Function) ImmutableSet(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet) PayloadSerializers(org.apache.beam.sdk.schemas.io.payloads.PayloadSerializers) Iterables.size(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables.size) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) DLQ_TAG(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.DLQ_TAG) StreamSupport(java.util.stream.StreamSupport) Row(org.apache.beam.sdk.values.Row) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Collectors.toSet(java.util.stream.Collectors.toSet) PAssert(org.apache.beam.sdk.testing.PAssert) UTF_8(java.nio.charset.StandardCharsets.UTF_8) DateTime(org.joda.time.DateTime) Set(java.util.Set) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test) SerializerProvider(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.SerializerProvider) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Serializable(java.io.Serializable) List(java.util.List) Rule(org.junit.Rule) ATTRIBUTE_ARRAY_ENTRY_SCHEMA(org.apache.beam.sdk.io.gcp.pubsub.PubsubSchemaIOProvider.ATTRIBUTE_ARRAY_ENTRY_SCHEMA) Instant(org.joda.time.Instant) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Assert(org.junit.Assert) Assert.assertEquals(org.junit.Assert.assertEquals) Schema(org.apache.beam.sdk.schemas.Schema) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Aggregations

Serializable (java.io.Serializable)2 UTF_8 (java.nio.charset.StandardCharsets.UTF_8)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 Function (java.util.function.Function)2 Collectors.toSet (java.util.stream.Collectors.toSet)2 StreamSupport (java.util.stream.StreamSupport)2 DLQ_TAG (org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.DLQ_TAG)2 MAIN_TAG (org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.MAIN_TAG)2 SerializerProvider (org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.SerializerProvider)2 ATTRIBUTE_ARRAY_ENTRY_SCHEMA (org.apache.beam.sdk.io.gcp.pubsub.PubsubSchemaIOProvider.ATTRIBUTE_ARRAY_ENTRY_SCHEMA)2 Schema (org.apache.beam.sdk.schemas.Schema)2 FieldType (org.apache.beam.sdk.schemas.Schema.FieldType)2 PayloadSerializers (org.apache.beam.sdk.schemas.io.payloads.PayloadSerializers)2 PAssert (org.apache.beam.sdk.testing.PAssert)2 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)2 Create (org.apache.beam.sdk.transforms.Create)2 PCollection (org.apache.beam.sdk.values.PCollection)2 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)2