use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class AutoComplete method runAutocompletePipeline.
public static void runAutocompletePipeline(Options options) throws IOException {
options.setBigQuerySchema(FormatForBigquery.getSchema());
ExampleUtils exampleUtils = new ExampleUtils(options);
// We support running the same pipeline in either
// batch or windowed streaming mode.
WindowFn<Object, ?> windowFn;
if (options.isStreaming()) {
checkArgument(!options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
} else {
windowFn = new GlobalWindows();
}
// Create the pipeline.
Pipeline p = Pipeline.create(options);
PCollection<KV<String, List<CompletionCandidate>>> toWrite = p.apply(TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractHashtags())).apply(Window.into(windowFn)).apply(ComputeTopCompletions.top(10, options.getRecursive()));
if (options.getOutputToDatastore()) {
toWrite.apply("FormatForDatastore", ParDo.of(new FormatForDatastore(options.getKind(), options.getDatastoreAncestorKey()))).apply(DatastoreIO.v1().write().withProjectId(MoreObjects.firstNonNull(options.getOutputProject(), options.getProject())));
}
if (options.getOutputToBigQuery()) {
exampleUtils.setupBigQueryTable();
TableReference tableRef = new TableReference();
tableRef.setProjectId(options.getProject());
tableRef.setDatasetId(options.getBigQueryDataset());
tableRef.setTableId(options.getBigQueryTable());
toWrite.apply(ParDo.of(new FormatForBigquery())).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatForBigquery.getSchema()).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(options.isStreaming() ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND : BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
}
if (options.getOutputToChecksum()) {
PCollection<Long> checksum = toWrite.apply(ParDo.of(new DoFn<KV<String, List<CompletionCandidate>>, Long>() {
@ProcessElement
public void process(ProcessContext c) {
KV<String, List<CompletionCandidate>> elm = c.element();
Long listHash = c.element().getValue().stream().mapToLong(cc -> cc.hashCode()).sum();
c.output(Long.valueOf(elm.getKey().hashCode()) + listHash);
}
})).apply(Sum.longsGlobally());
PAssert.that(checksum).containsInAnyOrder(options.getExpectedChecksum());
}
// Run the pipeline.
PipelineResult result = p.run();
// ExampleUtils will try to cancel the pipeline and the injector before the program exists.
exampleUtils.waitToFinish(result);
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class TrafficMaxLaneFlow method runTrafficMaxLaneFlow.
public static void runTrafficMaxLaneFlow(TrafficMaxLaneFlowOptions options) throws IOException {
// Using ExampleUtils to set up required resources.
ExampleUtils exampleUtils = new ExampleUtils(options);
exampleUtils.setup();
Pipeline pipeline = Pipeline.create(options);
TableReference tableRef = new TableReference();
tableRef.setProjectId(options.getProject());
tableRef.setDatasetId(options.getBigQueryDataset());
tableRef.setTableId(options.getBigQueryTable());
pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile())).apply(ParDo.of(new ExtractFlowInfoFn())).apply(Window.into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration())).every(Duration.standardMinutes(options.getWindowSlideEvery())))).apply(new MaxLaneFlow()).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatMaxesFn.getSchema()));
// Run the pipeline.
PipelineResult result = pipeline.run();
// ExampleUtils will try to cancel the pipeline and the injector before the program exists.
exampleUtils.waitToFinish(result);
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class StreamingWordExtract method main.
/**
* Sets up and starts streaming pipeline.
*
* @throws IOException if there is a problem setting up resources
*/
public static void main(String[] args) throws IOException {
StreamingWordExtractOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(StreamingWordExtractOptions.class);
options.setStreaming(true);
options.setBigQuerySchema(StringToRowConverter.getSchema());
ExampleUtils exampleUtils = new ExampleUtils(options);
exampleUtils.setup();
Pipeline pipeline = Pipeline.create(options);
String tableSpec = new StringBuilder().append(options.getProject()).append(":").append(options.getBigQueryDataset()).append(".").append(options.getBigQueryTable()).toString();
pipeline.apply("ReadLines", TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractWords())).apply(ParDo.of(new Uppercase())).apply(ParDo.of(new StringToRowConverter())).apply(BigQueryIO.writeTableRows().to(tableSpec).withSchema(StringToRowConverter.getSchema()));
PipelineResult result = pipeline.run();
// ExampleUtils will try to cancel the pipeline before the program exists.
exampleUtils.waitToFinish(result);
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class DirectRunnerTest method tearsDownFnsBeforeFinishing.
@Test
public void tearsDownFnsBeforeFinishing() {
TEARDOWN_CALL.set(-1);
final Pipeline pipeline = getPipeline();
pipeline.apply(Create.of("a")).apply(ParDo.of(new DoFn<String, String>() {
@ProcessElement
public void onElement(final ProcessContext ctx) {
// no-op
}
@Teardown
public void teardown() {
// callback
try {
Thread.sleep(1000);
} catch (final InterruptedException e) {
throw new AssertionError(e);
}
TEARDOWN_CALL.set(System.nanoTime());
}
}));
final PipelineResult pipelineResult = pipeline.run();
pipelineResult.waitUntilFinish();
final long doneTs = System.nanoTime();
final long tearDownTs = TEARDOWN_CALL.get();
assertThat(tearDownTs, greaterThan(0L));
assertThat(doneTs, greaterThan(tearDownTs));
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class DirectRunnerTest method cancelShouldStopPipeline.
@Test
public void cancelShouldStopPipeline() throws Exception {
PipelineOptions opts = TestPipeline.testingPipelineOptions();
opts.as(DirectOptions.class).setBlockOnRun(false);
opts.setRunner(DirectRunner.class);
final Pipeline p = Pipeline.create(opts);
p.apply(GenerateSequence.from(0).withRate(1L, Duration.standardSeconds(1)));
final BlockingQueue<PipelineResult> resultExchange = new ArrayBlockingQueue<>(1);
Runnable cancelRunnable = () -> {
try {
resultExchange.take().cancel();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IllegalStateException(e);
} catch (IOException e) {
throw new IllegalStateException(e);
}
};
Callable<PipelineResult> runPipelineRunnable = () -> {
PipelineResult res = p.run();
try {
resultExchange.put(res);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IllegalStateException(e);
}
return res;
};
ExecutorService executor = Executors.newCachedThreadPool();
Future<?> cancelResult = executor.submit(cancelRunnable);
Future<PipelineResult> result = executor.submit(runPipelineRunnable);
cancelResult.get();
// If cancel doesn't work, this will hang forever
result.get().waitUntilFinish();
}
Aggregations