use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class SessionSideInputJoinTest method inputOutputSameEvents.
/**
* A smoke test that the count of input bids and outputs are the same, to help diagnose flakiness
* in more complex tests.
*/
@Test
@Category(NeedsRunner.class)
public void inputOutputSameEvents() throws Exception {
NexmarkConfiguration config = NexmarkConfiguration.DEFAULT.copy();
config.sideInputType = NexmarkUtils.SideInputType.DIRECT;
config.numEventGenerators = 1;
config.numEvents = 5000;
config.sideInputRowCount = 10;
config.sideInputNumShards = 3;
PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config);
try {
PCollection<Event> input = p.apply(NexmarkUtils.batchEventsSource(config));
PCollection<Bid> justBids = input.apply(NexmarkQueryUtil.JUST_BIDS);
PCollection<Long> bidCount = justBids.apply("Count Bids", Count.globally());
NexmarkQueryTransform<Bid> query = new SessionSideInputJoin(config);
query.setSideInput(sideInput);
PCollection<TimestampedValue<Bid>> output = (PCollection<TimestampedValue<Bid>>) input.apply(new NexmarkQuery(config, query));
PCollection<Long> outputCount = output.apply(Window.into(new GlobalWindows())).apply("Count outputs", Count.globally());
PAssert.that(PCollectionList.of(bidCount).and(outputCount).apply(Flatten.pCollections())).satisfies(counts -> {
assertThat(Iterables.size(counts), equalTo(2));
assertThat(Iterables.get(counts, 0), greaterThan(0L));
assertThat(Iterables.get(counts, 0), equalTo(Iterables.get(counts, 1)));
return null;
});
p.run();
} finally {
NexmarkUtils.cleanUpSideInput(config);
}
}
use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class SqlQuery3 method expand.
@Override
public PCollection<NameCityStateId> expand(PCollection<Event> allEvents) {
PCollection<Event> windowed = allEvents.apply(Window.into(FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec))));
String auctionName = Auction.class.getSimpleName();
PCollection<Row> auctions = windowed.apply(getName() + ".Filter." + auctionName, Filter.by(e1 -> e1.newAuction != null)).apply(getName() + ".ToRecords." + auctionName, new SelectEvent(Event.Type.AUCTION));
String personName = Person.class.getSimpleName();
PCollection<Row> people = windowed.apply(getName() + ".Filter." + personName, Filter.by(e -> e.newPerson != null)).apply(getName() + ".ToRecords." + personName, new SelectEvent(Event.Type.PERSON));
PCollectionTuple inputStreams = PCollectionTuple.of(new TupleTag<>("Auction"), auctions).and(new TupleTag<>("Person"), people);
return inputStreams.apply(SqlTransform.query(QUERY).withQueryPlannerClass(plannerClass)).apply(Convert.fromRows(NameCityStateId.class));
}
use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class Generator method nextEvent.
/**
* Return the next event. The outer timestamp is in wallclock time and corresponds to when the
* event should fire. The inner timestamp is in event-time and represents the time the event is
* purported to have taken place in the simulation.
*/
public NextEvent nextEvent() {
if (wallclockBaseTime < 0) {
wallclockBaseTime = System.currentTimeMillis();
}
// When, in event time, we should generate the event. Monotonic.
long eventTimestamp = config.timestampAndInterEventDelayUsForEvent(config.nextEventNumber(eventsCountSoFar)).getKey();
// When, in event time, the event should say it was generated. Depending on outOfOrderGroupSize
// may have local jitter.
long adjustedEventTimestamp = config.timestampAndInterEventDelayUsForEvent(config.nextAdjustedEventNumber(eventsCountSoFar)).getKey();
// The minimum of this and all future adjusted event timestamps. Accounts for jitter in
// the event timestamp.
long watermark = config.timestampAndInterEventDelayUsForEvent(config.nextEventNumberForWatermark(eventsCountSoFar)).getKey();
// When, in wallclock time, we should emit the event.
long wallclockTimestamp = wallclockBaseTime + (eventTimestamp - getCurrentConfig().baseTime);
// Seed the random number generator with the next 'event id'.
Random random = new Random(getNextEventId());
long newEventId = getNextEventId();
long rem = newEventId % GeneratorConfig.PROPORTION_DENOMINATOR;
Event event;
if (rem < GeneratorConfig.PERSON_PROPORTION) {
event = new Event(nextPerson(newEventId, random, new DateTime(adjustedEventTimestamp), config));
} else if (rem < GeneratorConfig.PERSON_PROPORTION + GeneratorConfig.AUCTION_PROPORTION) {
event = new Event(nextAuction(eventsCountSoFar, newEventId, random, adjustedEventTimestamp, config));
} else {
event = new Event(nextBid(newEventId, random, adjustedEventTimestamp, config));
}
eventsCountSoFar++;
return new NextEvent(wallclockTimestamp, adjustedEventTimestamp, event, watermark);
}
use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class WinningBidsSimulator method run.
@Override
protected void run() {
if (lastTimestamp.compareTo(BoundedWindow.TIMESTAMP_MIN_VALUE) > 0) {
// We may have finally seen the auction a bid was intended for.
flushBidsWithoutAuctions();
TimestampedValue<AuctionBid> result = nextWinningBid(lastTimestamp);
if (result != null) {
addResult(result);
return;
}
}
TimestampedValue<Event> timestampedEvent = nextInput();
if (timestampedEvent == null) {
// No more events. Flush any still open auctions.
TimestampedValue<AuctionBid> result = nextWinningBid(BoundedWindow.TIMESTAMP_MAX_VALUE);
if (result == null) {
// We are done.
allDone();
return;
}
addResult(result);
return;
}
Event event = timestampedEvent.getValue();
if (event.newPerson != null) {
// Ignore new person events.
return;
}
lastTimestamp = timestampedEvent.getTimestamp();
if (event.newAuction != null) {
// Add this new open auction to our state.
openAuctions.put(event.newAuction.id, event.newAuction);
} else {
if (!captureBestBid(event.bid, true)) {
// We don't know what to do with this bid yet.
NexmarkUtils.info("bid not yet accounted for: %s", event.bid);
bidsWithoutAuctions.add(event.bid);
}
}
// Keep looking for winning bids.
}
use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class NexmarkLauncher method createSource.
// ================================================================================
// Construct overall pipeline
// ================================================================================
/**
* Return source of events for this run, or null if we are simply publishing events to Pubsub.
*/
private PCollection<Event> createSource(Pipeline p, final Instant now) throws IOException {
PCollection<Event> source = null;
switch(configuration.sourceType) {
case DIRECT:
source = sourceEventsFromSynthetic(p);
if (configuration.generateEventFilePathPrefix != null) {
PCollection<Event> events = source;
source = null;
sinkEventsToFile(events);
}
break;
case AVRO:
source = sourceEventsFromAvro(p);
break;
case KAFKA:
case PUBSUB:
if (configuration.sourceType == SourceType.PUBSUB) {
setupPubSubResources(now.getMillis());
}
// Setup the sink for the publisher.
switch(configuration.pubSubMode) {
case SUBSCRIBE_ONLY:
// Nothing to publish.
break;
case PUBLISH_ONLY:
{
// Send synthesized events to Kafka or Pubsub in this job.
PCollection<Event> events = sourceEventsFromSynthetic(p).apply(queryName + ".Snoop", NexmarkUtils.snoop(queryName));
if (configuration.sourceType == NexmarkUtils.SourceType.KAFKA) {
sinkEventsToKafka(events);
} else {
// pubsub
sinkEventsToPubsub(events);
}
}
break;
case COMBINED:
// Send synthesized events to Kafka or Pubsub in separate publisher job.
// We won't start the main pipeline until the publisher has sent the pre-load events.
// We'll shutdown the publisher job when we notice the main job has finished.
invokeBuilderForPublishOnlyPipeline(publishOnlyOptions -> {
Pipeline sp = Pipeline.create(publishOnlyOptions);
NexmarkUtils.setupPipeline(configuration.coderStrategy, sp);
publisherMonitor = new Monitor<>(queryName, "publisher");
PCollection<Event> events = sourceEventsFromSynthetic(sp).apply(queryName + ".Monitor", publisherMonitor.getTransform());
if (configuration.sourceType == NexmarkUtils.SourceType.KAFKA) {
sinkEventsToKafka(events);
} else {
// pubsub
sinkEventsToPubsub(events);
}
publisherResult = sp.run();
NexmarkUtils.console("Publisher job is started.");
});
break;
}
// Setup the source for the consumer.
switch(configuration.pubSubMode) {
case PUBLISH_ONLY:
// Nothing to consume. Leave source null.
break;
case SUBSCRIBE_ONLY:
case COMBINED:
{
// Read events from Kafka or Pubsub.
if (configuration.sourceType == NexmarkUtils.SourceType.KAFKA) {
// We need to have the same indexes for Publisher (sink) and Subscriber (source)
// pipelines in COMBINED mode (when we run them in sequence). It means that
// Subscriber should start reading from the same index as Publisher started to write
// pre-load events even if we run Subscriber right after Publisher has been
// finished. In other case. when pubSubMode=SUBSCRIBE_ONLY, now should be null and
// it will be ignored.
source = sourceEventsFromKafka(p, configuration.pubSubMode == COMBINED ? now : Instant.EPOCH);
} else {
source = sourceEventsFromPubsub(p);
}
}
break;
}
break;
}
return source;
}
Aggregations