use of com.cloudera.dataflow.spark.EvaluationResult in project spark-dataflow by cloudera.
the class FlattenStreamingTest method testRun.
@Test
public void testRun() throws Exception {
SparkStreamingPipelineOptions options = SparkStreamingPipelineOptionsFactory.create();
options.setAppName(this.getClass().getSimpleName());
options.setRunner(SparkPipelineRunner.class);
// run for one interval
options.setTimeout(TEST_TIMEOUT_MSEC);
Pipeline p = Pipeline.create(options);
PCollection<String> w1 = p.apply(CreateStream.fromQueue(WORDS_QUEUE_1)).setCoder(StringUtf8Coder.of());
PCollection<String> windowedW1 = w1.apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(1))));
PCollection<String> w2 = p.apply(CreateStream.fromQueue(WORDS_QUEUE_2)).setCoder(StringUtf8Coder.of());
PCollection<String> windowedW2 = w2.apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(1))));
PCollectionList<String> list = PCollectionList.of(windowedW1).and(windowedW2);
PCollection<String> union = list.apply(Flatten.<String>pCollections());
DataflowAssert.thatIterable(union.apply(View.<String>asIterable())).containsInAnyOrder(EXPECTED_UNION);
EvaluationResult res = SparkPipelineRunner.create(options).run(p);
res.close();
DataflowAssertStreaming.assertNoFailures(res);
}
use of com.cloudera.dataflow.spark.EvaluationResult in project spark-dataflow by cloudera.
the class SimpleStreamingWordCountTest method testRun.
@Test
public void testRun() throws Exception {
SparkStreamingPipelineOptions options = SparkStreamingPipelineOptionsFactory.create();
options.setAppName(this.getClass().getSimpleName());
options.setRunner(SparkPipelineRunner.class);
// run for one interval
options.setTimeout(TEST_TIMEOUT_MSEC);
Pipeline p = Pipeline.create(options);
PCollection<String> inputWords = p.apply(CreateStream.fromQueue(WORDS_QUEUE)).setCoder(StringUtf8Coder.of());
PCollection<String> windowedWords = inputWords.apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(1))));
PCollection<String> output = windowedWords.apply(new SimpleWordCountTest.CountWords());
DataflowAssert.thatIterable(output.apply(View.<String>asIterable())).containsInAnyOrder(EXPECTED_COUNT_SET);
EvaluationResult res = SparkPipelineRunner.create(options).run(p);
res.close();
DataflowAssertStreaming.assertNoFailures(res);
}
use of com.cloudera.dataflow.spark.EvaluationResult in project spark-dataflow by cloudera.
the class KafkaStreamingTest method testRun.
@Test
public void testRun() throws Exception {
// test read from Kafka
SparkStreamingPipelineOptions options = SparkStreamingPipelineOptionsFactory.create();
options.setAppName(this.getClass().getSimpleName());
options.setRunner(SparkPipelineRunner.class);
// run for one interval
options.setTimeout(TEST_TIMEOUT_MSEC);
Pipeline p = Pipeline.create(options);
Map<String, String> kafkaParams = ImmutableMap.of("metadata.broker.list", EMBEDDED_KAFKA_CLUSTER.getBrokerList(), "auto.offset.reset", "smallest");
PCollection<KV<String, String>> kafkaInput = p.apply(KafkaIO.Read.from(StringDecoder.class, StringDecoder.class, String.class, String.class, Collections.singleton(TOPIC), kafkaParams));
PCollection<KV<String, String>> windowedWords = kafkaInput.apply(Window.<KV<String, String>>into(FixedWindows.of(Duration.standardSeconds(1))));
PCollection<String> formattedKV = windowedWords.apply(ParDo.of(new FormatKVFn()));
DataflowAssert.thatIterable(formattedKV.apply(View.<String>asIterable())).containsInAnyOrder(EXPECTED);
EvaluationResult res = SparkPipelineRunner.create(options).run(p);
res.close();
DataflowAssertStreaming.assertNoFailures(res);
}
Aggregations