use of org.apache.beam.sdk.nexmark.queries.NexmarkQueryModel in project beam by apache.
the class NexmarkLauncher method run.
/**
* Run {@code configuration} and return its performance if possible.
*/
@Nullable
public NexmarkPerf run() throws IOException {
if (options.getManageResources() && !options.getMonitorJobs()) {
throw new RuntimeException("If using --manageResources then must also use --monitorJobs.");
}
//
// Setup per-run state.
//
checkState(queryName == null);
if (configuration.sourceType.equals(SourceType.PUBSUB)) {
pubsubHelper = PubsubHelper.create(options);
}
try {
NexmarkUtils.console("Running %s", configuration.toShortString());
if (configuration.numEvents < 0) {
NexmarkUtils.console("skipping since configuration is disabled");
return null;
}
NexmarkQuery<? extends KnownSize> query = getNexmarkQuery();
if (query == null) {
NexmarkUtils.console("skipping since configuration is not implemented");
return null;
}
if (configuration.query == PORTABILITY_BATCH && options.isStreaming()) {
NexmarkUtils.console("skipping PORTABILITY_BATCH since it does not support streaming mode");
return null;
}
queryName = query.getName();
// Append queryName to temp location
if (!"".equals(options.getTempLocation())) {
options.setTempLocation(options.getTempLocation() + "/" + queryName);
}
NexmarkQueryModel model = getNexmarkQueryModel();
if (options.getJustModelResultRate()) {
if (model == null) {
throw new RuntimeException(String.format("No model for %s", queryName));
}
modelResultRates(model);
return null;
}
final Instant now = Instant.now();
Pipeline p = Pipeline.create(options);
NexmarkUtils.setupPipeline(configuration.coderStrategy, p);
// Generate events.
PCollection<Event> source = createSource(p, now);
if (query.getTransform().needsSideInput()) {
query.getTransform().setSideInput(NexmarkUtils.prepareSideInput(p, configuration));
}
if (options.getLogEvents()) {
source = source.apply(queryName + ".Events.Log", NexmarkUtils.log(queryName + ".Events"));
}
// In that case there's nothing more to add to pipeline.
if (source != null) {
// (Query results are ignored).
if (configuration.sinkType == NexmarkUtils.SinkType.AVRO) {
sinkEventsToAvro(source);
}
// so, set parallelism. Also set the output path where to write log files.
if (configuration.query == NexmarkQueryName.LOG_TO_SHARDED_FILES) {
String path = null;
if (options.getOutputPath() != null && !options.getOutputPath().isEmpty()) {
path = logsDir(now.getMillis());
}
((Query10) query.getTransform()).setOutputPath(path);
((Query10) query.getTransform()).setMaxNumWorkers(maxNumWorkers());
}
// Apply query.
PCollection<TimestampedValue<KnownSize>> results = (PCollection<TimestampedValue<KnownSize>>) source.apply(query);
if (options.getAssertCorrectness()) {
if (model == null) {
throw new RuntimeException(String.format("No model for %s", queryName));
}
// We know all our streams have a finite number of elements.
results.setIsBoundedInternal(PCollection.IsBounded.BOUNDED);
// If we have a finite number of events then assert our pipeline's
// results match those of a model using the same sequence of events.
PAssert.that(results).satisfies(model.assertionFor());
}
// Output results.
sink(results, now.getMillis());
}
mainResult = p.run();
mainResult.waitUntilFinish(Duration.standardSeconds(configuration.streamTimeout));
return monitor(query);
} finally {
if (pubsubHelper != null) {
pubsubHelper.cleanup();
pubsubHelper = null;
}
configuration = null;
queryName = null;
}
}
Aggregations