use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class SqlBoundedSideInputJoin method expand.
@Override
public PCollection<Bid> expand(PCollection<Event> events) {
PCollection<Row> bids = events.apply(Filter.by(NexmarkQueryUtil.IS_BID)).apply(getName() + ".SelectEvent", new SelectEvent(Event.Type.BID));
checkState(getSideInput() != null, "Configuration error: side input is null");
TupleTag<Row> sideTag = new TupleTag<Row>("side") {
};
TupleTag<Row> bidTag = new TupleTag<Row>("bid") {
};
Schema schema = Schema.of(Schema.Field.of("id", Schema.FieldType.INT64), Schema.Field.of("extra", Schema.FieldType.STRING));
PCollection<Row> sideRows = getSideInput().setSchema(schema, TypeDescriptors.kvs(TypeDescriptors.longs(), TypeDescriptors.strings()), kv -> Row.withSchema(schema).addValues(kv.getKey(), kv.getValue()).build(), row -> KV.of(row.getInt64("id"), row.getString("extra"))).apply("SideToRows", Convert.toRows());
return PCollectionTuple.of(bidTag, bids).and(sideTag, sideRows).apply(SqlTransform.query(String.format(query, configuration.sideInputRowCount)).withQueryPlannerClass(plannerClass)).apply("ResultToBid", Convert.fromRows(Bid.class));
}
use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class BoundedSideInputJoinTest method queryMatchesModel.
/**
* Test {@code query} matches {@code model}.
*/
private <T extends KnownSize> void queryMatchesModel(String name, NexmarkConfiguration config, NexmarkQueryTransform<T> query, NexmarkQueryModel<T> model, boolean streamingMode) throws Exception {
ResourceId sideInputResourceId = FileSystems.matchNewResource(String.format("%s/BoundedSideInputJoin-%s", p.getOptions().getTempLocation(), new Random().nextInt()), false);
config.sideInputUrl = sideInputResourceId.toString();
try {
PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config);
query.setSideInput(sideInput);
PCollection<Event> events = p.apply(name + ".Read", streamingMode ? NexmarkUtils.streamEventsSource(config) : NexmarkUtils.batchEventsSource(config));
PCollection<TimestampedValue<T>> results = (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(config, query));
PAssert.that(results).satisfies(model.assertionFor());
PipelineResult result = p.run();
result.waitUntilFinish();
} finally {
NexmarkUtils.cleanUpSideInput(config);
}
}
use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class BoundedSideInputJoinTest method inputOutputSameEvents.
/**
* A smoke test that the count of input bids and outputs are the same, to help diagnose flakiness
* in more complex tests.
*/
@Test
@Category(NeedsRunner.class)
public void inputOutputSameEvents() throws Exception {
NexmarkConfiguration config = NexmarkConfiguration.DEFAULT.copy();
config.sideInputType = NexmarkUtils.SideInputType.DIRECT;
config.numEventGenerators = 1;
config.numEvents = 5000;
config.sideInputRowCount = 10;
config.sideInputNumShards = 3;
PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config);
try {
PCollection<Event> input = p.apply(NexmarkUtils.batchEventsSource(config));
PCollection<Bid> justBids = input.apply(NexmarkQueryUtil.JUST_BIDS);
PCollection<Long> bidCount = justBids.apply("Count Bids", Count.globally());
NexmarkQueryTransform<Bid> query = new BoundedSideInputJoin(config);
query.setSideInput(sideInput);
PCollection<TimestampedValue<Bid>> output = (PCollection<TimestampedValue<Bid>>) input.apply(new NexmarkQuery(config, query));
PCollection<Long> outputCount = output.apply("Count outputs", Count.globally());
PAssert.that(PCollectionList.of(bidCount).and(outputCount).apply(Flatten.pCollections())).satisfies(counts -> {
assertThat(Iterables.size(counts), equalTo(2));
assertThat(Iterables.get(counts, 0), greaterThan(0L));
assertThat(Iterables.get(counts, 0), equalTo(Iterables.get(counts, 1)));
return null;
});
p.run();
} finally {
NexmarkUtils.cleanUpSideInput(config);
}
}
use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class SessionSideInputJoinTest method queryMatchesModel.
/**
* Test {@code query} matches {@code model}.
*/
private <T extends KnownSize> void queryMatchesModel(String name, NexmarkConfiguration config, NexmarkQueryTransform<T> query, NexmarkQueryModel<T> model, boolean streamingMode) throws Exception {
ResourceId sideInputResourceId = FileSystems.matchNewResource(String.format("%s/SessionSideInputJoin-%s", p.getOptions().getTempLocation(), new Random().nextInt()), false);
config.sideInputUrl = sideInputResourceId.toString();
try {
PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config);
query.setSideInput(sideInput);
PCollection<Event> events = p.apply(name + ".Read", streamingMode ? NexmarkUtils.streamEventsSource(config) : NexmarkUtils.batchEventsSource(config));
PCollection<TimestampedValue<T>> results = (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(config, query));
PAssert.that(results).satisfies(model.assertionFor());
PipelineResult result = p.run();
result.waitUntilFinish();
} finally {
NexmarkUtils.cleanUpSideInput(config);
}
}
use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class SqlQuery1Test method testConvertsPriceToEur.
@Test
public void testConvertsPriceToEur() throws Exception {
SchemaRegistry registry = SchemaRegistry.createDefault();
PCollection<Event> bids = testPipeline.apply(TestStream.create(registry.getSchema(Event.class), TypeDescriptor.of(Event.class), registry.getToRowFunction(Event.class), registry.getFromRowFunction(Event.class)).addElements(new Event(BID1_USD)).addElements(new Event(BID2_USD)).advanceWatermarkToInfinity());
PAssert.that(bids.apply(new SqlQuery1())).containsInAnyOrder(BID1_EUR, BID2_EUR);
testPipeline.run();
}
Aggregations