use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class QueryTest method queryMatchesModel.
/**
* Test {@code query} matches {@code model}.
*/
private <T extends KnownSize> void queryMatchesModel(String name, NexmarkQueryTransform<T> query, NexmarkQueryModel<T> model, boolean streamingMode) {
NexmarkUtils.setupPipeline(NexmarkUtils.CoderStrategy.HAND, p);
PCollection<Event> events = p.apply(name + ".Read", streamingMode ? NexmarkUtils.streamEventsSource(CONFIG) : NexmarkUtils.batchEventsSource(CONFIG));
PCollection<TimestampedValue<T>> results = (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(CONFIG, query));
PAssert.that(results).satisfies(model.assertionFor());
PipelineResult result = p.run();
result.waitUntilFinish();
}
use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class NexmarkLauncher method run.
/**
* Run {@code configuration} and return its performance if possible.
*/
@Nullable
public NexmarkPerf run() throws IOException {
if (options.getManageResources() && !options.getMonitorJobs()) {
throw new RuntimeException("If using --manageResources then must also use --monitorJobs.");
}
//
// Setup per-run state.
//
checkState(queryName == null);
if (configuration.sourceType.equals(SourceType.PUBSUB)) {
pubsubHelper = PubsubHelper.create(options);
}
try {
NexmarkUtils.console("Running %s", configuration.toShortString());
if (configuration.numEvents < 0) {
NexmarkUtils.console("skipping since configuration is disabled");
return null;
}
NexmarkQuery<? extends KnownSize> query = getNexmarkQuery();
if (query == null) {
NexmarkUtils.console("skipping since configuration is not implemented");
return null;
}
if (configuration.query == PORTABILITY_BATCH && options.isStreaming()) {
NexmarkUtils.console("skipping PORTABILITY_BATCH since it does not support streaming mode");
return null;
}
queryName = query.getName();
// Append queryName to temp location
if (!"".equals(options.getTempLocation())) {
options.setTempLocation(options.getTempLocation() + "/" + queryName);
}
NexmarkQueryModel model = getNexmarkQueryModel();
if (options.getJustModelResultRate()) {
if (model == null) {
throw new RuntimeException(String.format("No model for %s", queryName));
}
modelResultRates(model);
return null;
}
final Instant now = Instant.now();
Pipeline p = Pipeline.create(options);
NexmarkUtils.setupPipeline(configuration.coderStrategy, p);
// Generate events.
PCollection<Event> source = createSource(p, now);
if (query.getTransform().needsSideInput()) {
query.getTransform().setSideInput(NexmarkUtils.prepareSideInput(p, configuration));
}
if (options.getLogEvents()) {
source = source.apply(queryName + ".Events.Log", NexmarkUtils.log(queryName + ".Events"));
}
// In that case there's nothing more to add to pipeline.
if (source != null) {
// (Query results are ignored).
if (configuration.sinkType == NexmarkUtils.SinkType.AVRO) {
sinkEventsToAvro(source);
}
// so, set parallelism. Also set the output path where to write log files.
if (configuration.query == NexmarkQueryName.LOG_TO_SHARDED_FILES) {
String path = null;
if (options.getOutputPath() != null && !options.getOutputPath().isEmpty()) {
path = logsDir(now.getMillis());
}
((Query10) query.getTransform()).setOutputPath(path);
((Query10) query.getTransform()).setMaxNumWorkers(maxNumWorkers());
}
// Apply query.
PCollection<TimestampedValue<KnownSize>> results = (PCollection<TimestampedValue<KnownSize>>) source.apply(query);
if (options.getAssertCorrectness()) {
if (model == null) {
throw new RuntimeException(String.format("No model for %s", queryName));
}
// We know all our streams have a finite number of elements.
results.setIsBoundedInternal(PCollection.IsBounded.BOUNDED);
// If we have a finite number of events then assert our pipeline's
// results match those of a model using the same sequence of events.
PAssert.that(results).satisfies(model.assertionFor());
}
// Output results.
sink(results, now.getMillis());
}
mainResult = p.run();
mainResult.waitUntilFinish(Duration.standardSeconds(configuration.streamTimeout));
return monitor(query);
} finally {
if (pubsubHelper != null) {
pubsubHelper.cleanup();
pubsubHelper = null;
}
configuration = null;
queryName = null;
}
}
use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class Query13 method expand.
@Override
public PCollection<Event> expand(PCollection<Event> events) {
final Coder<Event> coder = events.getCoder();
return events.apply("Pair with random key", ParDo.of(new AssignShardFn<>(configuration.numKeyBuckets))).apply(GroupByKey.create()).apply("ExpandIterable", ParDo.of(new DoFn<KV<Integer, Iterable<Event>>, Event>() {
@ProcessElement
public void processElement(@Element KV<Integer, Iterable<Event>> element, OutputReceiver<Event> r) {
for (Event value : element.getValue()) {
r.output(value);
}
}
})).apply(name + ".Serialize", ParDo.of(new DoFn<Event, Event>() {
private final Counter bytesMetric = Metrics.counter(name, "serde-bytes");
private final Random random = new Random();
private double pardoCPUFactor = (configuration.pardoCPUFactor >= 0.0 && configuration.pardoCPUFactor <= 1.0) ? configuration.pardoCPUFactor : 1.0;
@ProcessElement
public void processElement(ProcessContext c) throws CoderException, IOException {
Event event;
if (random.nextDouble() <= pardoCPUFactor) {
event = encodeDecode(coder, c.element(), bytesMetric);
} else {
event = c.element();
}
c.output(event);
}
}));
}
use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class Query3 method expand.
@Override
public PCollection<NameCityStateId> expand(PCollection<Event> events) {
PCollection<KV<Long, Event>> auctionsBySellerId = events.apply(NexmarkQueryUtil.JUST_NEW_AUCTIONS).apply(name + ".InCategory", Filter.by(auction -> auction.category == 10)).apply("EventByAuctionSeller", ParDo.of(new DoFn<Auction, KV<Long, Event>>() {
@ProcessElement
public void processElement(ProcessContext c) {
Event e = new Event();
e.newAuction = c.element();
c.output(KV.of(c.element().seller, e));
}
}));
PCollection<KV<Long, Event>> personsById = events.apply(NexmarkQueryUtil.JUST_NEW_PERSONS).apply(name + ".InState", Filter.by(person -> "OR".equals(person.state) || "ID".equals(person.state) || "CA".equals(person.state))).apply("EventByPersonId", ParDo.of(new DoFn<Person, KV<Long, Event>>() {
@ProcessElement
public void processElement(ProcessContext c) {
Event e = new Event();
e.newPerson = c.element();
c.output(KV.of(c.element().id, e));
}
}));
// Join auctions and people.
return PCollectionList.of(auctionsBySellerId).and(personsById).apply(Flatten.pCollections()).apply(name + ".Join", ParDo.of(joinDoFn)).apply(name + ".Project", ParDo.of(new DoFn<KV<Auction, Person>, NameCityStateId>() {
@ProcessElement
public void processElement(ProcessContext c) {
Auction auction = c.element().getKey();
Person person = c.element().getValue();
c.output(new NameCityStateId(person.name, person.city, person.state, auction.id));
}
}));
}
use of org.apache.beam.sdk.nexmark.model.Event in project beam by apache.
the class UnboundedEventSourceTest method resumeFromCheckpoint.
/**
* Check aggressively checkpointing and resuming a reader gives us exactly the same event stream
* as reading directly.
*/
@Ignore("TODO(BEAM-5070) Test is flaky. Fix before reenabling.")
@Test
public void resumeFromCheckpoint() throws IOException {
Random random = new Random(297);
int n = 47293;
GeneratorConfig config = makeConfig(n);
Generator modelGenerator = new Generator(config);
EventIdChecker checker = new EventIdChecker();
PipelineOptions options = TestPipeline.testingPipelineOptions();
UnboundedEventSource source = new UnboundedEventSource(config, 1, 0, false);
UnboundedReader<Event> reader = source.createReader(options, null);
while (n > 0) {
int m = Math.min(459 + random.nextInt(455), n);
System.out.printf("reading %d...%n", m);
checker.add(m, reader, modelGenerator);
n -= m;
System.out.printf("splitting with %d remaining...%n", n);
CheckpointMark checkpointMark = reader.getCheckpointMark();
reader = source.createReader(options, (GeneratorCheckpoint) checkpointMark);
}
assertFalse(reader.advance());
}
Aggregations