Search in sources :

Example 11 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class SessionSideInputJoinTest method inputOutputSameEvents.

/**
 * A smoke test that the count of input bids and outputs are the same, to help diagnose flakiness
 * in more complex tests.
 */
@Test
@Category(NeedsRunner.class)
public void inputOutputSameEvents() throws Exception {
    NexmarkConfiguration config = NexmarkConfiguration.DEFAULT.copy();
    config.sideInputType = NexmarkUtils.SideInputType.DIRECT;
    config.numEventGenerators = 1;
    config.numEvents = 5000;
    config.sideInputRowCount = 10;
    config.sideInputNumShards = 3;
    PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config);
    try {
        PCollection<Event> input = p.apply(NexmarkUtils.batchEventsSource(config));
        PCollection<Bid> justBids = input.apply(NexmarkQueryUtil.JUST_BIDS);
        PCollection<Long> bidCount = justBids.apply("Count Bids", Count.globally());
        NexmarkQueryTransform<Bid> query = new SessionSideInputJoin(config);
        query.setSideInput(sideInput);
        PCollection<TimestampedValue<Bid>> output = (PCollection<TimestampedValue<Bid>>) input.apply(new NexmarkQuery(config, query));
        PCollection<Long> outputCount = output.apply(Window.into(new GlobalWindows())).apply("Count outputs", Count.globally());
        PAssert.that(PCollectionList.of(bidCount).and(outputCount).apply(Flatten.pCollections())).satisfies(counts -> {
            assertThat(Iterables.size(counts), equalTo(2));
            assertThat(Iterables.get(counts, 0), greaterThan(0L));
            assertThat(Iterables.get(counts, 0), equalTo(Iterables.get(counts, 1)));
            return null;
        });
        p.run();
    } finally {
        NexmarkUtils.cleanUpSideInput(config);
    }
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) KV(org.apache.beam.sdk.values.KV) PCollection(org.apache.beam.sdk.values.PCollection) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) NexmarkConfiguration(org.apache.beam.sdk.nexmark.NexmarkConfiguration) Event(org.apache.beam.sdk.nexmark.model.Event) Bid(org.apache.beam.sdk.nexmark.model.Bid) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 12 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class SqlQuery3 method expand.

@Override
public PCollection<NameCityStateId> expand(PCollection<Event> allEvents) {
    PCollection<Event> windowed = allEvents.apply(Window.into(FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec))));
    String auctionName = Auction.class.getSimpleName();
    PCollection<Row> auctions = windowed.apply(getName() + ".Filter." + auctionName, Filter.by(e1 -> e1.newAuction != null)).apply(getName() + ".ToRecords." + auctionName, new SelectEvent(Event.Type.AUCTION));
    String personName = Person.class.getSimpleName();
    PCollection<Row> people = windowed.apply(getName() + ".Filter." + personName, Filter.by(e -> e.newPerson != null)).apply(getName() + ".ToRecords." + personName, new SelectEvent(Event.Type.PERSON));
    PCollectionTuple inputStreams = PCollectionTuple.of(new TupleTag<>("Auction"), auctions).and(new TupleTag<>("Person"), people);
    return inputStreams.apply(SqlTransform.query(QUERY).withQueryPlannerClass(plannerClass)).apply(Convert.fromRows(NameCityStateId.class));
}
Also used : NameCityStateId(org.apache.beam.sdk.nexmark.model.NameCityStateId) SelectEvent(org.apache.beam.sdk.nexmark.model.sql.SelectEvent) Event(org.apache.beam.sdk.nexmark.model.Event) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) TupleTag(org.apache.beam.sdk.values.TupleTag) Row(org.apache.beam.sdk.values.Row) SelectEvent(org.apache.beam.sdk.nexmark.model.sql.SelectEvent)

Example 13 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class Generator method nextEvent.

/**
 * Return the next event. The outer timestamp is in wallclock time and corresponds to when the
 * event should fire. The inner timestamp is in event-time and represents the time the event is
 * purported to have taken place in the simulation.
 */
public NextEvent nextEvent() {
    if (wallclockBaseTime < 0) {
        wallclockBaseTime = System.currentTimeMillis();
    }
    // When, in event time, we should generate the event. Monotonic.
    long eventTimestamp = config.timestampAndInterEventDelayUsForEvent(config.nextEventNumber(eventsCountSoFar)).getKey();
    // When, in event time, the event should say it was generated. Depending on outOfOrderGroupSize
    // may have local jitter.
    long adjustedEventTimestamp = config.timestampAndInterEventDelayUsForEvent(config.nextAdjustedEventNumber(eventsCountSoFar)).getKey();
    // The minimum of this and all future adjusted event timestamps. Accounts for jitter in
    // the event timestamp.
    long watermark = config.timestampAndInterEventDelayUsForEvent(config.nextEventNumberForWatermark(eventsCountSoFar)).getKey();
    // When, in wallclock time, we should emit the event.
    long wallclockTimestamp = wallclockBaseTime + (eventTimestamp - getCurrentConfig().baseTime);
    // Seed the random number generator with the next 'event id'.
    Random random = new Random(getNextEventId());
    long newEventId = getNextEventId();
    long rem = newEventId % GeneratorConfig.PROPORTION_DENOMINATOR;
    Event event;
    if (rem < GeneratorConfig.PERSON_PROPORTION) {
        event = new Event(nextPerson(newEventId, random, new DateTime(adjustedEventTimestamp), config));
    } else if (rem < GeneratorConfig.PERSON_PROPORTION + GeneratorConfig.AUCTION_PROPORTION) {
        event = new Event(nextAuction(eventsCountSoFar, newEventId, random, adjustedEventTimestamp, config));
    } else {
        event = new Event(nextBid(newEventId, random, adjustedEventTimestamp, config));
    }
    eventsCountSoFar++;
    return new NextEvent(wallclockTimestamp, adjustedEventTimestamp, event, watermark);
}
Also used : Random(java.util.Random) Event(org.apache.beam.sdk.nexmark.model.Event) DateTime(org.joda.time.DateTime)

Example 14 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class WinningBidsSimulator method run.

@Override
protected void run() {
    if (lastTimestamp.compareTo(BoundedWindow.TIMESTAMP_MIN_VALUE) > 0) {
        // We may have finally seen the auction a bid was intended for.
        flushBidsWithoutAuctions();
        TimestampedValue<AuctionBid> result = nextWinningBid(lastTimestamp);
        if (result != null) {
            addResult(result);
            return;
        }
    }
    TimestampedValue<Event> timestampedEvent = nextInput();
    if (timestampedEvent == null) {
        // No more events. Flush any still open auctions.
        TimestampedValue<AuctionBid> result = nextWinningBid(BoundedWindow.TIMESTAMP_MAX_VALUE);
        if (result == null) {
            // We are done.
            allDone();
            return;
        }
        addResult(result);
        return;
    }
    Event event = timestampedEvent.getValue();
    if (event.newPerson != null) {
        // Ignore new person events.
        return;
    }
    lastTimestamp = timestampedEvent.getTimestamp();
    if (event.newAuction != null) {
        // Add this new open auction to our state.
        openAuctions.put(event.newAuction.id, event.newAuction);
    } else {
        if (!captureBestBid(event.bid, true)) {
            // We don't know what to do with this bid yet.
            NexmarkUtils.info("bid not yet accounted for: %s", event.bid);
            bidsWithoutAuctions.add(event.bid);
        }
    }
// Keep looking for winning bids.
}
Also used : AuctionBid(org.apache.beam.sdk.nexmark.model.AuctionBid) Event(org.apache.beam.sdk.nexmark.model.Event)

Example 15 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class NexmarkLauncher method createSource.

// ================================================================================
// Construct overall pipeline
// ================================================================================
/**
 * Return source of events for this run, or null if we are simply publishing events to Pubsub.
 */
private PCollection<Event> createSource(Pipeline p, final Instant now) throws IOException {
    PCollection<Event> source = null;
    switch(configuration.sourceType) {
        case DIRECT:
            source = sourceEventsFromSynthetic(p);
            if (configuration.generateEventFilePathPrefix != null) {
                PCollection<Event> events = source;
                source = null;
                sinkEventsToFile(events);
            }
            break;
        case AVRO:
            source = sourceEventsFromAvro(p);
            break;
        case KAFKA:
        case PUBSUB:
            if (configuration.sourceType == SourceType.PUBSUB) {
                setupPubSubResources(now.getMillis());
            }
            // Setup the sink for the publisher.
            switch(configuration.pubSubMode) {
                case SUBSCRIBE_ONLY:
                    // Nothing to publish.
                    break;
                case PUBLISH_ONLY:
                    {
                        // Send synthesized events to Kafka or Pubsub in this job.
                        PCollection<Event> events = sourceEventsFromSynthetic(p).apply(queryName + ".Snoop", NexmarkUtils.snoop(queryName));
                        if (configuration.sourceType == NexmarkUtils.SourceType.KAFKA) {
                            sinkEventsToKafka(events);
                        } else {
                            // pubsub
                            sinkEventsToPubsub(events);
                        }
                    }
                    break;
                case COMBINED:
                    // Send synthesized events to Kafka or Pubsub in separate publisher job.
                    // We won't start the main pipeline until the publisher has sent the pre-load events.
                    // We'll shutdown the publisher job when we notice the main job has finished.
                    invokeBuilderForPublishOnlyPipeline(publishOnlyOptions -> {
                        Pipeline sp = Pipeline.create(publishOnlyOptions);
                        NexmarkUtils.setupPipeline(configuration.coderStrategy, sp);
                        publisherMonitor = new Monitor<>(queryName, "publisher");
                        PCollection<Event> events = sourceEventsFromSynthetic(sp).apply(queryName + ".Monitor", publisherMonitor.getTransform());
                        if (configuration.sourceType == NexmarkUtils.SourceType.KAFKA) {
                            sinkEventsToKafka(events);
                        } else {
                            // pubsub
                            sinkEventsToPubsub(events);
                        }
                        publisherResult = sp.run();
                        NexmarkUtils.console("Publisher job is started.");
                    });
                    break;
            }
            // Setup the source for the consumer.
            switch(configuration.pubSubMode) {
                case PUBLISH_ONLY:
                    // Nothing to consume. Leave source null.
                    break;
                case SUBSCRIBE_ONLY:
                case COMBINED:
                    {
                        // Read events from Kafka or Pubsub.
                        if (configuration.sourceType == NexmarkUtils.SourceType.KAFKA) {
                            // We need to have the same indexes for Publisher (sink) and Subscriber (source)
                            // pipelines in COMBINED mode (when we run them in sequence). It means that
                            // Subscriber should start reading from the same index as Publisher started to write
                            // pre-load events even if we run Subscriber right after Publisher has been
                            // finished. In other case. when pubSubMode=SUBSCRIBE_ONLY, now should be null and
                            // it will be ignored.
                            source = sourceEventsFromKafka(p, configuration.pubSubMode == COMBINED ? now : Instant.EPOCH);
                        } else {
                            source = sourceEventsFromPubsub(p);
                        }
                    }
                    break;
            }
            break;
    }
    return source;
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) Event(org.apache.beam.sdk.nexmark.model.Event) Pipeline(org.apache.beam.sdk.Pipeline)

Aggregations

Event (org.apache.beam.sdk.nexmark.model.Event)17 KV (org.apache.beam.sdk.values.KV)8 PCollection (org.apache.beam.sdk.values.PCollection)8 TimestampedValue (org.apache.beam.sdk.values.TimestampedValue)6 Random (java.util.Random)5 DoFn (org.apache.beam.sdk.transforms.DoFn)4 Test (org.junit.Test)4 PipelineResult (org.apache.beam.sdk.PipelineResult)3 Counter (org.apache.beam.sdk.metrics.Counter)3 NexmarkConfiguration (org.apache.beam.sdk.nexmark.NexmarkConfiguration)3 Bid (org.apache.beam.sdk.nexmark.model.Bid)3 Pipeline (org.apache.beam.sdk.Pipeline)2 ResourceId (org.apache.beam.sdk.io.fs.ResourceId)2 NameCityStateId (org.apache.beam.sdk.nexmark.model.NameCityStateId)2 SelectEvent (org.apache.beam.sdk.nexmark.model.sql.SelectEvent)2 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)2 Row (org.apache.beam.sdk.values.Row)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2 Nullable (org.checkerframework.checker.nullness.qual.Nullable)2 Category (org.junit.experimental.categories.Category)2