Search in sources :

Example 6 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class QueryTest method queryMatchesModel.

/**
 * Test {@code query} matches {@code model}.
 */
private <T extends KnownSize> void queryMatchesModel(String name, NexmarkQueryTransform<T> query, NexmarkQueryModel<T> model, boolean streamingMode) {
    NexmarkUtils.setupPipeline(NexmarkUtils.CoderStrategy.HAND, p);
    PCollection<Event> events = p.apply(name + ".Read", streamingMode ? NexmarkUtils.streamEventsSource(CONFIG) : NexmarkUtils.batchEventsSource(CONFIG));
    PCollection<TimestampedValue<T>> results = (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(CONFIG, query));
    PAssert.that(results).satisfies(model.assertionFor());
    PipelineResult result = p.run();
    result.waitUntilFinish();
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Event(org.apache.beam.sdk.nexmark.model.Event) PipelineResult(org.apache.beam.sdk.PipelineResult)

Example 7 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class NexmarkLauncher method run.

/**
 * Run {@code configuration} and return its performance if possible.
 */
@Nullable
public NexmarkPerf run() throws IOException {
    if (options.getManageResources() && !options.getMonitorJobs()) {
        throw new RuntimeException("If using --manageResources then must also use --monitorJobs.");
    }
    // 
    // Setup per-run state.
    // 
    checkState(queryName == null);
    if (configuration.sourceType.equals(SourceType.PUBSUB)) {
        pubsubHelper = PubsubHelper.create(options);
    }
    try {
        NexmarkUtils.console("Running %s", configuration.toShortString());
        if (configuration.numEvents < 0) {
            NexmarkUtils.console("skipping since configuration is disabled");
            return null;
        }
        NexmarkQuery<? extends KnownSize> query = getNexmarkQuery();
        if (query == null) {
            NexmarkUtils.console("skipping since configuration is not implemented");
            return null;
        }
        if (configuration.query == PORTABILITY_BATCH && options.isStreaming()) {
            NexmarkUtils.console("skipping PORTABILITY_BATCH since it does not support streaming mode");
            return null;
        }
        queryName = query.getName();
        // Append queryName to temp location
        if (!"".equals(options.getTempLocation())) {
            options.setTempLocation(options.getTempLocation() + "/" + queryName);
        }
        NexmarkQueryModel model = getNexmarkQueryModel();
        if (options.getJustModelResultRate()) {
            if (model == null) {
                throw new RuntimeException(String.format("No model for %s", queryName));
            }
            modelResultRates(model);
            return null;
        }
        final Instant now = Instant.now();
        Pipeline p = Pipeline.create(options);
        NexmarkUtils.setupPipeline(configuration.coderStrategy, p);
        // Generate events.
        PCollection<Event> source = createSource(p, now);
        if (query.getTransform().needsSideInput()) {
            query.getTransform().setSideInput(NexmarkUtils.prepareSideInput(p, configuration));
        }
        if (options.getLogEvents()) {
            source = source.apply(queryName + ".Events.Log", NexmarkUtils.log(queryName + ".Events"));
        }
        // In that case there's nothing more to add to pipeline.
        if (source != null) {
            // (Query results are ignored).
            if (configuration.sinkType == NexmarkUtils.SinkType.AVRO) {
                sinkEventsToAvro(source);
            }
            // so, set parallelism. Also set the output path where to write log files.
            if (configuration.query == NexmarkQueryName.LOG_TO_SHARDED_FILES) {
                String path = null;
                if (options.getOutputPath() != null && !options.getOutputPath().isEmpty()) {
                    path = logsDir(now.getMillis());
                }
                ((Query10) query.getTransform()).setOutputPath(path);
                ((Query10) query.getTransform()).setMaxNumWorkers(maxNumWorkers());
            }
            // Apply query.
            PCollection<TimestampedValue<KnownSize>> results = (PCollection<TimestampedValue<KnownSize>>) source.apply(query);
            if (options.getAssertCorrectness()) {
                if (model == null) {
                    throw new RuntimeException(String.format("No model for %s", queryName));
                }
                // We know all our streams have a finite number of elements.
                results.setIsBoundedInternal(PCollection.IsBounded.BOUNDED);
                // If we have a finite number of events then assert our pipeline's
                // results match those of a model using the same sequence of events.
                PAssert.that(results).satisfies(model.assertionFor());
            }
            // Output results.
            sink(results, now.getMillis());
        }
        mainResult = p.run();
        mainResult.waitUntilFinish(Duration.standardSeconds(configuration.streamTimeout));
        return monitor(query);
    } finally {
        if (pubsubHelper != null) {
            pubsubHelper.cleanup();
            pubsubHelper = null;
        }
        configuration = null;
        queryName = null;
    }
}
Also used : Query10(org.apache.beam.sdk.nexmark.queries.Query10) KnownSize(org.apache.beam.sdk.nexmark.model.KnownSize) Instant(org.joda.time.Instant) Pipeline(org.apache.beam.sdk.Pipeline) PCollection(org.apache.beam.sdk.values.PCollection) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) NexmarkQueryModel(org.apache.beam.sdk.nexmark.queries.NexmarkQueryModel) Event(org.apache.beam.sdk.nexmark.model.Event) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Example 8 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class Query13 method expand.

@Override
public PCollection<Event> expand(PCollection<Event> events) {
    final Coder<Event> coder = events.getCoder();
    return events.apply("Pair with random key", ParDo.of(new AssignShardFn<>(configuration.numKeyBuckets))).apply(GroupByKey.create()).apply("ExpandIterable", ParDo.of(new DoFn<KV<Integer, Iterable<Event>>, Event>() {

        @ProcessElement
        public void processElement(@Element KV<Integer, Iterable<Event>> element, OutputReceiver<Event> r) {
            for (Event value : element.getValue()) {
                r.output(value);
            }
        }
    })).apply(name + ".Serialize", ParDo.of(new DoFn<Event, Event>() {

        private final Counter bytesMetric = Metrics.counter(name, "serde-bytes");

        private final Random random = new Random();

        private double pardoCPUFactor = (configuration.pardoCPUFactor >= 0.0 && configuration.pardoCPUFactor <= 1.0) ? configuration.pardoCPUFactor : 1.0;

        @ProcessElement
        public void processElement(ProcessContext c) throws CoderException, IOException {
            Event event;
            if (random.nextDouble() <= pardoCPUFactor) {
                event = encodeDecode(coder, c.element(), bytesMetric);
            } else {
                event = c.element();
            }
            c.output(event);
        }
    }));
}
Also used : KV(org.apache.beam.sdk.values.KV) DoFn(org.apache.beam.sdk.transforms.DoFn) Counter(org.apache.beam.sdk.metrics.Counter) Random(java.util.Random) Event(org.apache.beam.sdk.nexmark.model.Event)

Example 9 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class Query3 method expand.

@Override
public PCollection<NameCityStateId> expand(PCollection<Event> events) {
    PCollection<KV<Long, Event>> auctionsBySellerId = events.apply(NexmarkQueryUtil.JUST_NEW_AUCTIONS).apply(name + ".InCategory", Filter.by(auction -> auction.category == 10)).apply("EventByAuctionSeller", ParDo.of(new DoFn<Auction, KV<Long, Event>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            Event e = new Event();
            e.newAuction = c.element();
            c.output(KV.of(c.element().seller, e));
        }
    }));
    PCollection<KV<Long, Event>> personsById = events.apply(NexmarkQueryUtil.JUST_NEW_PERSONS).apply(name + ".InState", Filter.by(person -> "OR".equals(person.state) || "ID".equals(person.state) || "CA".equals(person.state))).apply("EventByPersonId", ParDo.of(new DoFn<Person, KV<Long, Event>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            Event e = new Event();
            e.newPerson = c.element();
            c.output(KV.of(c.element().id, e));
        }
    }));
    // Join auctions and people.
    return PCollectionList.of(auctionsBySellerId).and(personsById).apply(Flatten.pCollections()).apply(name + ".Join", ParDo.of(joinDoFn)).apply(name + ".Project", ParDo.of(new DoFn<KV<Auction, Person>, NameCityStateId>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            Auction auction = c.element().getKey();
            Person person = c.element().getValue();
            c.output(new NameCityStateId(person.name, person.city, person.state, auction.id));
        }
    }));
}
Also used : DoFn(org.apache.beam.sdk.transforms.DoFn) NameCityStateId(org.apache.beam.sdk.nexmark.model.NameCityStateId) Auction(org.apache.beam.sdk.nexmark.model.Auction) Event(org.apache.beam.sdk.nexmark.model.Event) KV(org.apache.beam.sdk.values.KV) Person(org.apache.beam.sdk.nexmark.model.Person)

Example 10 with Event

use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.

the class UnboundedEventSourceTest method resumeFromCheckpoint.

/**
 * Check aggressively checkpointing and resuming a reader gives us exactly the same event stream
 * as reading directly.
 */
@Ignore("TODO(BEAM-5070) Test is flaky. Fix before reenabling.")
@Test
public void resumeFromCheckpoint() throws IOException {
    Random random = new Random(297);
    int n = 47293;
    GeneratorConfig config = makeConfig(n);
    Generator modelGenerator = new Generator(config);
    EventIdChecker checker = new EventIdChecker();
    PipelineOptions options = TestPipeline.testingPipelineOptions();
    UnboundedEventSource source = new UnboundedEventSource(config, 1, 0, false);
    UnboundedReader<Event> reader = source.createReader(options, null);
    while (n > 0) {
        int m = Math.min(459 + random.nextInt(455), n);
        System.out.printf("reading %d...%n", m);
        checker.add(m, reader, modelGenerator);
        n -= m;
        System.out.printf("splitting with %d remaining...%n", n);
        CheckpointMark checkpointMark = reader.getCheckpointMark();
        reader = source.createReader(options, (GeneratorCheckpoint) checkpointMark);
    }
    assertFalse(reader.advance());
}
Also used : Random(java.util.Random) GeneratorCheckpoint(org.apache.beam.sdk.nexmark.sources.generator.GeneratorCheckpoint) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) CheckpointMark(org.apache.beam.sdk.io.UnboundedSource.CheckpointMark) Event(org.apache.beam.sdk.nexmark.model.Event) GeneratorConfig(org.apache.beam.sdk.nexmark.sources.generator.GeneratorConfig) GeneratorCheckpoint(org.apache.beam.sdk.nexmark.sources.generator.GeneratorCheckpoint) Generator(org.apache.beam.sdk.nexmark.sources.generator.Generator) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

Event (org.apache.beam.sdk.nexmark.model.Event)17 KV (org.apache.beam.sdk.values.KV)8 PCollection (org.apache.beam.sdk.values.PCollection)8 TimestampedValue (org.apache.beam.sdk.values.TimestampedValue)6 Random (java.util.Random)5 DoFn (org.apache.beam.sdk.transforms.DoFn)4 Test (org.junit.Test)4 PipelineResult (org.apache.beam.sdk.PipelineResult)3 Counter (org.apache.beam.sdk.metrics.Counter)3 NexmarkConfiguration (org.apache.beam.sdk.nexmark.NexmarkConfiguration)3 Bid (org.apache.beam.sdk.nexmark.model.Bid)3 Pipeline (org.apache.beam.sdk.Pipeline)2 ResourceId (org.apache.beam.sdk.io.fs.ResourceId)2 NameCityStateId (org.apache.beam.sdk.nexmark.model.NameCityStateId)2 SelectEvent (org.apache.beam.sdk.nexmark.model.sql.SelectEvent)2 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)2 Row (org.apache.beam.sdk.values.Row)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2 Nullable (org.checkerframework.checker.nullness.qual.Nullable)2 Category (org.junit.experimental.categories.Category)2