use of com.google.cloud.dataflow.sdk.Pipeline in project spark-dataflow by cloudera.
the class FlattenStreamingTest method testRun.
@Test
public void testRun() throws Exception {
SparkStreamingPipelineOptions options = SparkStreamingPipelineOptionsFactory.create();
options.setAppName(this.getClass().getSimpleName());
options.setRunner(SparkPipelineRunner.class);
// run for one interval
options.setTimeout(TEST_TIMEOUT_MSEC);
Pipeline p = Pipeline.create(options);
PCollection<String> w1 = p.apply(CreateStream.fromQueue(WORDS_QUEUE_1)).setCoder(StringUtf8Coder.of());
PCollection<String> windowedW1 = w1.apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(1))));
PCollection<String> w2 = p.apply(CreateStream.fromQueue(WORDS_QUEUE_2)).setCoder(StringUtf8Coder.of());
PCollection<String> windowedW2 = w2.apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(1))));
PCollectionList<String> list = PCollectionList.of(windowedW1).and(windowedW2);
PCollection<String> union = list.apply(Flatten.<String>pCollections());
DataflowAssert.thatIterable(union.apply(View.<String>asIterable())).containsInAnyOrder(EXPECTED_UNION);
EvaluationResult res = SparkPipelineRunner.create(options).run(p);
res.close();
DataflowAssertStreaming.assertNoFailures(res);
}
use of com.google.cloud.dataflow.sdk.Pipeline in project spark-dataflow by cloudera.
the class SimpleStreamingWordCountTest method testRun.
@Test
public void testRun() throws Exception {
SparkStreamingPipelineOptions options = SparkStreamingPipelineOptionsFactory.create();
options.setAppName(this.getClass().getSimpleName());
options.setRunner(SparkPipelineRunner.class);
// run for one interval
options.setTimeout(TEST_TIMEOUT_MSEC);
Pipeline p = Pipeline.create(options);
PCollection<String> inputWords = p.apply(CreateStream.fromQueue(WORDS_QUEUE)).setCoder(StringUtf8Coder.of());
PCollection<String> windowedWords = inputWords.apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(1))));
PCollection<String> output = windowedWords.apply(new SimpleWordCountTest.CountWords());
DataflowAssert.thatIterable(output.apply(View.<String>asIterable())).containsInAnyOrder(EXPECTED_COUNT_SET);
EvaluationResult res = SparkPipelineRunner.create(options).run(p);
res.close();
DataflowAssertStreaming.assertNoFailures(res);
}
use of com.google.cloud.dataflow.sdk.Pipeline in project spark-dataflow by cloudera.
the class SideEffectsTest method test.
@Test
public void test() throws Exception {
SparkPipelineOptions options = SparkPipelineOptionsFactory.create();
options.setRunner(SparkPipelineRunner.class);
Pipeline pipeline = Pipeline.create(options);
pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));
pipeline.apply(Create.of("a")).apply(ParDo.of(new DoFn<String, String>() {
@Override
public void processElement(ProcessContext c) throws Exception {
throw new UserException();
}
}));
try {
pipeline.run();
fail("Run should thrown an exception");
} catch (RuntimeException e) {
assertNotNull(e.getCause());
// TODO: remove the version check (and the setup and teardown methods) when we no
// longer support Spark 1.3 or 1.4
String version = SparkContextFactory.getSparkContext(options.getSparkMaster(), options.getAppName()).version();
if (!version.startsWith("1.3.") && !version.startsWith("1.4.")) {
assertTrue(e.getCause() instanceof UserException);
}
}
}
use of com.google.cloud.dataflow.sdk.Pipeline in project spark-dataflow by cloudera.
the class SimpleWordCountTest method testRun.
@Test
public void testRun() throws Exception {
SparkPipelineOptions options = SparkPipelineOptionsFactory.create();
options.setRunner(SparkPipelineRunner.class);
Pipeline p = Pipeline.create(options);
PCollection<String> inputWords = p.apply(Create.of(WORDS)).setCoder(StringUtf8Coder.of());
PCollection<String> output = inputWords.apply(new CountWords());
DataflowAssert.that(output).containsInAnyOrder(EXPECTED_COUNT_SET);
EvaluationResult res = SparkPipelineRunner.create().run(p);
res.close();
}
use of com.google.cloud.dataflow.sdk.Pipeline in project spark-dataflow by cloudera.
the class TfIdfTest method testTfIdf.
@Test
public void testTfIdf() throws Exception {
Pipeline pipeline = Pipeline.create(PipelineOptionsFactory.create());
pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));
PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = pipeline.apply(Create.of(KV.of(new URI("x"), "a b c d"), KV.of(new URI("y"), "a b c"), KV.of(new URI("z"), "a m n"))).apply(new TfIdf.ComputeTfIdf());
PCollection<String> words = wordToUriAndTfIdf.apply(Keys.<String>create()).apply(RemoveDuplicates.<String>create());
DataflowAssert.that(words).containsInAnyOrder(Arrays.asList("a", "m", "n", "b", "c", "d"));
EvaluationResult res = SparkPipelineRunner.create().run(pipeline);
res.close();
}
Aggregations