Search in sources :

Example 1 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class SqlBoundedSideInputJoin method expand.

@Override
public PCollection<Bid> expand(PCollection<Event> events) {
    PCollection<Row> bids = events.apply(Filter.by(NexmarkQueryUtil.IS_BID)).apply(getName() + ".SelectEvent", new SelectEvent(Event.Type.BID));
    checkState(getSideInput() != null, "Configuration error: side input is null");
    TupleTag<Row> sideTag = new TupleTag<Row>("side") {
    };
    TupleTag<Row> bidTag = new TupleTag<Row>("bid") {
    };
    Schema schema = Schema.of(Schema.Field.of("id", Schema.FieldType.INT64), Schema.Field.of("extra", Schema.FieldType.STRING));
    PCollection<Row> sideRows = getSideInput().setSchema(schema, TypeDescriptors.kvs(TypeDescriptors.longs(), TypeDescriptors.strings()), kv -> Row.withSchema(schema).addValues(kv.getKey(), kv.getValue()).build(), row -> KV.of(row.getInt64("id"), row.getString("extra"))).apply("SideToRows", Convert.toRows());
    return PCollectionTuple.of(bidTag, bids).and(sideTag, sideRows).apply(SqlTransform.query(String.format(query, configuration.sideInputRowCount)).withQueryPlannerClass(plannerClass)).apply("ResultToBid", Convert.fromRows(Bid.class));
}
Also used : NexmarkConfiguration(org.apache.beam.sdk.nexmark.NexmarkConfiguration) KV(org.apache.beam.sdk.values.KV) QueryPlanner(org.apache.beam.sdk.extensions.sql.impl.QueryPlanner) ZetaSQLQueryPlanner(org.apache.beam.sdk.extensions.sql.zetasql.ZetaSQLQueryPlanner) Bid(org.apache.beam.sdk.nexmark.model.Bid) SelectEvent(org.apache.beam.sdk.nexmark.model.sql.SelectEvent) PCollection(org.apache.beam.sdk.values.PCollection) SqlTransform(org.apache.beam.sdk.extensions.sql.SqlTransform) Schema(org.apache.beam.sdk.schemas.Schema) Convert(org.apache.beam.sdk.schemas.transforms.Convert) Filter(org.apache.beam.sdk.transforms.Filter) CalciteQueryPlanner(org.apache.beam.sdk.extensions.sql.impl.CalciteQueryPlanner) Event(org.apache.beam.sdk.nexmark.model.Event) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) TupleTag(org.apache.beam.sdk.values.TupleTag) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) NexmarkQueryTransform(org.apache.beam.sdk.nexmark.queries.NexmarkQueryTransform) NexmarkQueryUtil(org.apache.beam.sdk.nexmark.queries.NexmarkQueryUtil) Row(org.apache.beam.sdk.values.Row) Schema(org.apache.beam.sdk.schemas.Schema) TupleTag(org.apache.beam.sdk.values.TupleTag) Row(org.apache.beam.sdk.values.Row) Bid(org.apache.beam.sdk.nexmark.model.Bid) SelectEvent(org.apache.beam.sdk.nexmark.model.sql.SelectEvent)

Example 2 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class BoundedSideInputJoinTest method queryMatchesModel.

/**
 * Test {@code query} matches {@code model}.
 */
private <T extends KnownSize> void queryMatchesModel(String name, NexmarkConfiguration config, NexmarkQueryTransform<T> query, NexmarkQueryModel<T> model, boolean streamingMode) throws Exception {
    ResourceId sideInputResourceId = FileSystems.matchNewResource(String.format("%s/BoundedSideInputJoin-%s", p.getOptions().getTempLocation(), new Random().nextInt()), false);
    config.sideInputUrl = sideInputResourceId.toString();
    try {
        PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config);
        query.setSideInput(sideInput);
        PCollection<Event> events = p.apply(name + ".Read", streamingMode ? NexmarkUtils.streamEventsSource(config) : NexmarkUtils.batchEventsSource(config));
        PCollection<TimestampedValue<T>> results = (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(config, query));
        PAssert.that(results).satisfies(model.assertionFor());
        PipelineResult result = p.run();
        result.waitUntilFinish();
    } finally {
        NexmarkUtils.cleanUpSideInput(config);
    }
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) Random(java.util.Random) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) Event(org.apache.beam.sdk.nexmark.model.Event) PipelineResult(org.apache.beam.sdk.PipelineResult) KV(org.apache.beam.sdk.values.KV)

Example 3 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class BoundedSideInputJoinTest method inputOutputSameEvents.

/**
 * A smoke test that the count of input bids and outputs are the same, to help diagnose flakiness
 * in more complex tests.
 */
@Test
@Category(NeedsRunner.class)
public void inputOutputSameEvents() throws Exception {
    NexmarkConfiguration config = NexmarkConfiguration.DEFAULT.copy();
    config.sideInputType = NexmarkUtils.SideInputType.DIRECT;
    config.numEventGenerators = 1;
    config.numEvents = 5000;
    config.sideInputRowCount = 10;
    config.sideInputNumShards = 3;
    PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config);
    try {
        PCollection<Event> input = p.apply(NexmarkUtils.batchEventsSource(config));
        PCollection<Bid> justBids = input.apply(NexmarkQueryUtil.JUST_BIDS);
        PCollection<Long> bidCount = justBids.apply("Count Bids", Count.globally());
        NexmarkQueryTransform<Bid> query = new BoundedSideInputJoin(config);
        query.setSideInput(sideInput);
        PCollection<TimestampedValue<Bid>> output = (PCollection<TimestampedValue<Bid>>) input.apply(new NexmarkQuery(config, query));
        PCollection<Long> outputCount = output.apply("Count outputs", Count.globally());
        PAssert.that(PCollectionList.of(bidCount).and(outputCount).apply(Flatten.pCollections())).satisfies(counts -> {
            assertThat(Iterables.size(counts), equalTo(2));
            assertThat(Iterables.get(counts, 0), greaterThan(0L));
            assertThat(Iterables.get(counts, 0), equalTo(Iterables.get(counts, 1)));
            return null;
        });
        p.run();
    } finally {
        NexmarkUtils.cleanUpSideInput(config);
    }
}
Also used : KV(org.apache.beam.sdk.values.KV) PCollection(org.apache.beam.sdk.values.PCollection) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) NexmarkConfiguration(org.apache.beam.sdk.nexmark.NexmarkConfiguration) Event(org.apache.beam.sdk.nexmark.model.Event) Bid(org.apache.beam.sdk.nexmark.model.Bid) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 4 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class SessionSideInputJoinTest method queryMatchesModel.

/**
 * Test {@code query} matches {@code model}.
 */
private <T extends KnownSize> void queryMatchesModel(String name, NexmarkConfiguration config, NexmarkQueryTransform<T> query, NexmarkQueryModel<T> model, boolean streamingMode) throws Exception {
    ResourceId sideInputResourceId = FileSystems.matchNewResource(String.format("%s/SessionSideInputJoin-%s", p.getOptions().getTempLocation(), new Random().nextInt()), false);
    config.sideInputUrl = sideInputResourceId.toString();
    try {
        PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config);
        query.setSideInput(sideInput);
        PCollection<Event> events = p.apply(name + ".Read", streamingMode ? NexmarkUtils.streamEventsSource(config) : NexmarkUtils.batchEventsSource(config));
        PCollection<TimestampedValue<T>> results = (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(config, query));
        PAssert.that(results).satisfies(model.assertionFor());
        PipelineResult result = p.run();
        result.waitUntilFinish();
    } finally {
        NexmarkUtils.cleanUpSideInput(config);
    }
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) Random(java.util.Random) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) Event(org.apache.beam.sdk.nexmark.model.Event) PipelineResult(org.apache.beam.sdk.PipelineResult) KV(org.apache.beam.sdk.values.KV)

Example 5 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class SqlQuery1Test method testConvertsPriceToEur.

@Test
public void testConvertsPriceToEur() throws Exception {
    SchemaRegistry registry = SchemaRegistry.createDefault();
    PCollection<Event> bids = testPipeline.apply(TestStream.create(registry.getSchema(Event.class), TypeDescriptor.of(Event.class), registry.getToRowFunction(Event.class), registry.getFromRowFunction(Event.class)).addElements(new Event(BID1_USD)).addElements(new Event(BID2_USD)).advanceWatermarkToInfinity());
    PAssert.that(bids.apply(new SqlQuery1())).containsInAnyOrder(BID1_EUR, BID2_EUR);
    testPipeline.run();
}
Also used : Event(org.apache.beam.sdk.nexmark.model.Event) SchemaRegistry(org.apache.beam.sdk.schemas.SchemaRegistry) Test(org.junit.Test)

Aggregations

Event (org.apache.beam.sdk.nexmark.model.Event)17 KV (org.apache.beam.sdk.values.KV)8 PCollection (org.apache.beam.sdk.values.PCollection)8 TimestampedValue (org.apache.beam.sdk.values.TimestampedValue)6 Random (java.util.Random)5 DoFn (org.apache.beam.sdk.transforms.DoFn)4 Test (org.junit.Test)4 PipelineResult (org.apache.beam.sdk.PipelineResult)3 Counter (org.apache.beam.sdk.metrics.Counter)3 NexmarkConfiguration (org.apache.beam.sdk.nexmark.NexmarkConfiguration)3 Bid (org.apache.beam.sdk.nexmark.model.Bid)3 Pipeline (org.apache.beam.sdk.Pipeline)2 ResourceId (org.apache.beam.sdk.io.fs.ResourceId)2 NameCityStateId (org.apache.beam.sdk.nexmark.model.NameCityStateId)2 SelectEvent (org.apache.beam.sdk.nexmark.model.sql.SelectEvent)2 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)2 Row (org.apache.beam.sdk.values.Row)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2 Nullable (org.checkerframework.checker.nullness.qual.Nullable)2 Category (org.junit.experimental.categories.Category)2