Search in sources :

Example 1 with KnownSize

use of org.apache.beam.sdk.nexmark.model.KnownSize in project beam by apache.

the class NexmarkLauncher method run.

/**
 * Run {@code configuration} and return its performance if possible.
 */
@Nullable
public NexmarkPerf run() throws IOException {
    if (options.getManageResources() && !options.getMonitorJobs()) {
        throw new RuntimeException("If using --manageResources then must also use --monitorJobs.");
    }
    // 
    // Setup per-run state.
    // 
    checkState(queryName == null);
    if (configuration.sourceType.equals(SourceType.PUBSUB)) {
        pubsubHelper = PubsubHelper.create(options);
    }
    try {
        NexmarkUtils.console("Running %s", configuration.toShortString());
        if (configuration.numEvents < 0) {
            NexmarkUtils.console("skipping since configuration is disabled");
            return null;
        }
        NexmarkQuery<? extends KnownSize> query = getNexmarkQuery();
        if (query == null) {
            NexmarkUtils.console("skipping since configuration is not implemented");
            return null;
        }
        if (configuration.query == PORTABILITY_BATCH && options.isStreaming()) {
            NexmarkUtils.console("skipping PORTABILITY_BATCH since it does not support streaming mode");
            return null;
        }
        queryName = query.getName();
        // Append queryName to temp location
        if (!"".equals(options.getTempLocation())) {
            options.setTempLocation(options.getTempLocation() + "/" + queryName);
        }
        NexmarkQueryModel model = getNexmarkQueryModel();
        if (options.getJustModelResultRate()) {
            if (model == null) {
                throw new RuntimeException(String.format("No model for %s", queryName));
            }
            modelResultRates(model);
            return null;
        }
        final Instant now = Instant.now();
        Pipeline p = Pipeline.create(options);
        NexmarkUtils.setupPipeline(configuration.coderStrategy, p);
        // Generate events.
        PCollection<Event> source = createSource(p, now);
        if (query.getTransform().needsSideInput()) {
            query.getTransform().setSideInput(NexmarkUtils.prepareSideInput(p, configuration));
        }
        if (options.getLogEvents()) {
            source = source.apply(queryName + ".Events.Log", NexmarkUtils.log(queryName + ".Events"));
        }
        // In that case there's nothing more to add to pipeline.
        if (source != null) {
            // (Query results are ignored).
            if (configuration.sinkType == NexmarkUtils.SinkType.AVRO) {
                sinkEventsToAvro(source);
            }
            // so, set parallelism. Also set the output path where to write log files.
            if (configuration.query == NexmarkQueryName.LOG_TO_SHARDED_FILES) {
                String path = null;
                if (options.getOutputPath() != null && !options.getOutputPath().isEmpty()) {
                    path = logsDir(now.getMillis());
                }
                ((Query10) query.getTransform()).setOutputPath(path);
                ((Query10) query.getTransform()).setMaxNumWorkers(maxNumWorkers());
            }
            // Apply query.
            PCollection<TimestampedValue<KnownSize>> results = (PCollection<TimestampedValue<KnownSize>>) source.apply(query);
            if (options.getAssertCorrectness()) {
                if (model == null) {
                    throw new RuntimeException(String.format("No model for %s", queryName));
                }
                // We know all our streams have a finite number of elements.
                results.setIsBoundedInternal(PCollection.IsBounded.BOUNDED);
                // If we have a finite number of events then assert our pipeline's
                // results match those of a model using the same sequence of events.
                PAssert.that(results).satisfies(model.assertionFor());
            }
            // Output results.
            sink(results, now.getMillis());
        }
        mainResult = p.run();
        mainResult.waitUntilFinish(Duration.standardSeconds(configuration.streamTimeout));
        return monitor(query);
    } finally {
        if (pubsubHelper != null) {
            pubsubHelper.cleanup();
            pubsubHelper = null;
        }
        configuration = null;
        queryName = null;
    }
}
Also used : Query10(org.apache.beam.sdk.nexmark.queries.Query10) KnownSize(org.apache.beam.sdk.nexmark.model.KnownSize) Instant(org.joda.time.Instant) Pipeline(org.apache.beam.sdk.Pipeline) PCollection(org.apache.beam.sdk.values.PCollection) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) NexmarkQueryModel(org.apache.beam.sdk.nexmark.queries.NexmarkQueryModel) Event(org.apache.beam.sdk.nexmark.model.Event) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Aggregations

Pipeline (org.apache.beam.sdk.Pipeline)1 Event (org.apache.beam.sdk.nexmark.model.Event)1 KnownSize (org.apache.beam.sdk.nexmark.model.KnownSize)1 NexmarkQueryModel (org.apache.beam.sdk.nexmark.queries.NexmarkQueryModel)1 Query10 (org.apache.beam.sdk.nexmark.queries.Query10)1 PCollection (org.apache.beam.sdk.values.PCollection)1 TimestampedValue (org.apache.beam.sdk.values.TimestampedValue)1 Nullable (org.checkerframework.checker.nullness.qual.Nullable)1 Instant (org.joda.time.Instant)1