use of org.apache.beam.sdk.nexmark.model.Done in project beam by apache.
the class Query10 method expand.
@Override
public PCollection<Done> expand(PCollection<Event> events) {
final int numLogShards = maxNumWorkers * NUM_SHARDS_PER_WORKER;
return events.apply(name + ".ShardEvents", ParDo.of(new DoFn<Event, KV<String, Event>>() {
private final Counter lateCounter = Metrics.counter(name, "actuallyLateEvent");
private final Counter onTimeCounter = Metrics.counter(name, "onTimeCounter");
@ProcessElement
public void processElement(ProcessContext c) {
if (c.element().hasAnnotation("LATE")) {
lateCounter.inc();
LOG.debug("Observed late: {}", c.element());
} else {
onTimeCounter.inc();
}
int shardNum = (int) Math.abs((long) c.element().hashCode() % numLogShards);
String shard = String.format("shard-%05d-of-%05d", shardNum, numLogShards);
c.output(KV.of(shard, c.element()));
}
})).apply(name + ".WindowEvents", Window.<KV<String, Event>>into(FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec))).triggering(AfterEach.inOrder(Repeatedly.forever(AfterPane.elementCountAtLeast(configuration.maxLogEvents)).orFinally(AfterWatermark.pastEndOfWindow()), Repeatedly.forever(AfterFirst.of(AfterPane.elementCountAtLeast(configuration.maxLogEvents), AfterProcessingTime.pastFirstElementInPane().plusDelayOf(LATE_BATCHING_PERIOD))))).discardingFiredPanes().withAllowedLateness(Duration.standardDays(1))).apply(name + ".GroupByKey", GroupByKey.create()).apply(name + ".CheckForLateEvents", ParDo.of(new DoFn<KV<String, Iterable<Event>>, KV<String, Iterable<Event>>>() {
private final Counter earlyCounter = Metrics.counter(name, "earlyShard");
private final Counter onTimeCounter = Metrics.counter(name, "onTimeShard");
private final Counter lateCounter = Metrics.counter(name, "lateShard");
private final Counter unexpectedLatePaneCounter = Metrics.counter(name, "ERROR_unexpectedLatePane");
private final Counter unexpectedOnTimeElementCounter = Metrics.counter(name, "ERROR_unexpectedOnTimeElement");
@ProcessElement
public void processElement(ProcessContext c, BoundedWindow window) {
int numLate = 0;
int numOnTime = 0;
for (Event event : c.element().getValue()) {
if (event.hasAnnotation("LATE")) {
numLate++;
} else {
numOnTime++;
}
}
String shard = c.element().getKey();
LOG.debug(String.format("%s with timestamp %s has %d actually late and %d on-time " + "elements in pane %s for window %s", shard, c.timestamp(), numLate, numOnTime, c.pane(), window.maxTimestamp()));
if (c.pane().getTiming() == PaneInfo.Timing.LATE) {
if (numLate == 0) {
LOG.error("ERROR! No late events in late pane for {}", shard);
unexpectedLatePaneCounter.inc();
}
if (numOnTime > 0) {
LOG.error("ERROR! Have {} on-time events in late pane for {}", numOnTime, shard);
unexpectedOnTimeElementCounter.inc();
}
lateCounter.inc();
} else if (c.pane().getTiming() == PaneInfo.Timing.EARLY) {
if (numOnTime + numLate < configuration.maxLogEvents) {
LOG.error("ERROR! Only have {} events in early pane for {}", numOnTime + numLate, shard);
}
earlyCounter.inc();
} else {
onTimeCounter.inc();
}
c.output(c.element());
}
})).apply(name + ".UploadEvents", ParDo.of(new DoFn<KV<String, Iterable<Event>>, KV<Void, OutputFile>>() {
private final Counter savedFileCounter = Metrics.counter(name, "savedFile");
private final Counter writtenRecordsCounter = Metrics.counter(name, "writtenRecords");
@ProcessElement
public void processElement(ProcessContext c, BoundedWindow window) throws IOException {
String shard = c.element().getKey();
GcsOptions options = c.getPipelineOptions().as(GcsOptions.class);
OutputFile outputFile = outputFileFor(window, shard, c.pane());
LOG.debug(String.format("Writing %s with record timestamp %s, window timestamp %s, pane %s", shard, c.timestamp(), window.maxTimestamp(), c.pane()));
if (outputFile.filename != null) {
LOG.info("Beginning write to '{}'", outputFile.filename);
int n = 0;
try (OutputStream output = Channels.newOutputStream(openWritableGcsFile(options, outputFile.filename))) {
for (Event event : c.element().getValue()) {
Event.CODER.encode(event, output, Coder.Context.OUTER);
writtenRecordsCounter.inc();
if (++n % 10000 == 0) {
LOG.info("So far written {} records to '{}'", n, outputFile.filename);
}
}
}
LOG.info("Written all {} records to '{}'", n, outputFile.filename);
}
savedFileCounter.inc();
c.output(KV.of(null, outputFile));
}
})).apply(name + ".WindowLogFiles", Window.<KV<Void, OutputFile>>into(FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec))).triggering(DefaultTrigger.of()).withAllowedLateness(Duration.standardDays(1)).discardingFiredPanes()).apply(name + ".GroupByKey2", GroupByKey.create()).apply(name + ".Index", ParDo.of(new DoFn<KV<Void, Iterable<OutputFile>>, Done>() {
private final Counter unexpectedLateCounter = Metrics.counter(name, "ERROR_unexpectedLate");
private final Counter unexpectedEarlyCounter = Metrics.counter(name, "ERROR_unexpectedEarly");
private final Counter unexpectedIndexCounter = Metrics.counter(name, "ERROR_unexpectedIndex");
private final Counter finalizedCounter = Metrics.counter(name, "indexed");
@ProcessElement
public void processElement(ProcessContext c, BoundedWindow window) throws IOException {
if (c.pane().getTiming() == Timing.LATE) {
unexpectedLateCounter.inc();
LOG.error("ERROR! Unexpected LATE pane: {}", c.pane());
} else if (c.pane().getTiming() == Timing.EARLY) {
unexpectedEarlyCounter.inc();
LOG.error("ERROR! Unexpected EARLY pane: {}", c.pane());
} else if (c.pane().getTiming() == Timing.ON_TIME && c.pane().getIndex() != 0) {
unexpectedIndexCounter.inc();
LOG.error("ERROR! Unexpected ON_TIME pane index: {}", c.pane());
} else {
GcsOptions options = c.getPipelineOptions().as(GcsOptions.class);
LOG.debug("Index with record timestamp {}, window timestamp {}, pane {}", c.timestamp(), window.maxTimestamp(), c.pane());
@Nullable String filename = indexPathFor(window);
if (filename != null) {
LOG.info("Beginning write to '{}'", filename);
int n = 0;
try (OutputStream output = Channels.newOutputStream(openWritableGcsFile(options, filename))) {
for (OutputFile outputFile : c.element().getValue()) {
output.write(outputFile.toString().getBytes(StandardCharsets.UTF_8));
n++;
}
}
LOG.info("Written all {} lines to '{}'", n, filename);
}
c.output(new Done("written for timestamp " + window.maxTimestamp()));
finalizedCounter.inc();
}
}
}));
}
Aggregations