use of org.apache.spark.streaming.api.java.JavaStreamingContext in project spark-dataflow by cloudera.
the class StreamingTransformTranslator method kafka.
private static <K, V> TransformEvaluator<KafkaIO.Read.Unbound<K, V>> kafka() {
return new TransformEvaluator<KafkaIO.Read.Unbound<K, V>>() {
@Override
public void evaluate(KafkaIO.Read.Unbound<K, V> transform, EvaluationContext context) {
StreamingEvaluationContext sec = (StreamingEvaluationContext) context;
JavaStreamingContext jssc = sec.getStreamingContext();
Class<K> keyClazz = transform.getKeyClass();
Class<V> valueClazz = transform.getValueClass();
Class<? extends Decoder<K>> keyDecoderClazz = transform.getKeyDecoderClass();
Class<? extends Decoder<V>> valueDecoderClazz = transform.getValueDecoderClass();
Map<String, String> kafkaParams = transform.getKafkaParams();
Set<String> topics = transform.getTopics();
JavaPairInputDStream<K, V> inputPairStream = KafkaUtils.createDirectStream(jssc, keyClazz, valueClazz, keyDecoderClazz, valueDecoderClazz, kafkaParams, topics);
JavaDStream<WindowedValue<KV<K, V>>> inputStream = inputPairStream.map(new Function<Tuple2<K, V>, KV<K, V>>() {
@Override
public KV<K, V> call(Tuple2<K, V> t2) throws Exception {
return KV.of(t2._1(), t2._2());
}
}).map(WindowingHelpers.<KV<K, V>>windowFunction());
sec.setStream(transform, inputStream);
}
};
}
use of org.apache.spark.streaming.api.java.JavaStreamingContext in project hbase by apache.
the class JavaHBaseStreamingBulkPutExample method main.
public static void main(String[] args) {
if (args.length < 4) {
System.out.println("JavaHBaseBulkPutExample " + "{host} {port} {tableName}");
return;
}
String host = args[0];
String port = args[1];
String tableName = args[2];
SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseStreamingBulkPutExample " + tableName + ":" + port + ":" + tableName);
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
try {
JavaStreamingContext jssc = new JavaStreamingContext(jsc, new Duration(1000));
JavaReceiverInputDStream<String> javaDstream = jssc.socketTextStream(host, Integer.parseInt(port));
Configuration conf = HBaseConfiguration.create();
JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
hbaseContext.streamBulkPut(javaDstream, TableName.valueOf(tableName), new PutFunction());
} finally {
jsc.stop();
}
}
use of org.apache.spark.streaming.api.java.JavaStreamingContext in project learning-spark by databricks.
the class StreamingLogInput method main.
public static void main(String[] args) throws Exception {
String master = args[0];
JavaSparkContext sc = new JavaSparkContext(master, "StreamingLogInput");
// Create a StreamingContext with a 1 second batch size
JavaStreamingContext jssc = new JavaStreamingContext(sc, new Duration(1000));
// Create a DStream from all the input on port 7777
JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777);
// Filter our DStream for lines with "error"
JavaDStream<String> errorLines = lines.filter(new Function<String, Boolean>() {
public Boolean call(String line) {
return line.contains("error");
}
});
// Print out the lines with errors, which causes this DStream to be evaluated
errorLines.print();
// start our streaming context and wait for it to "finish"
jssc.start();
// Wait for 10 seconds then exit. To run forever call without a timeout
jssc.awaitTermination(10000);
// Stop the streaming context
jssc.stop();
}
use of org.apache.spark.streaming.api.java.JavaStreamingContext in project deeplearning4j by deeplearning4j.
the class JavaQueueStream method main.
public static void main(String[] args) throws Exception {
SparkConf sparkConf = new SparkConf().setMaster("local[*]");
// Create the context
JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000));
// Create the queue through which RDDs can be pushed to
// a QueueInputDStream
Queue<JavaRDD<Integer>> rddQueue = new LinkedList<>();
// Create and push some RDDs into the queue
List<Integer> list = Lists.newArrayList();
for (int i = 0; i < 1000; i++) {
list.add(i);
}
for (int i = 0; i < 30; i++) {
rddQueue.add(ssc.sparkContext().parallelize(list));
}
// Create the QueueInputDStream and use it do some processing
JavaDStream<Integer> inputStream = ssc.queueStream(rddQueue);
JavaPairDStream<Integer, Integer> mappedStream = inputStream.mapToPair(new PairFunction<Integer, Integer, Integer>() {
@Override
public Tuple2<Integer, Integer> call(Integer i) {
return new Tuple2<>(i % 10, 1);
}
});
JavaPairDStream<Integer, Integer> reducedStream = mappedStream.reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
});
reducedStream.print();
ssc.start();
ssc.awaitTermination();
}
use of org.apache.spark.streaming.api.java.JavaStreamingContext in project beam by apache.
the class StreamingTransformTranslator method createFromQueue.
private static <T> TransformEvaluator<CreateStream<T>> createFromQueue() {
return new TransformEvaluator<CreateStream<T>>() {
@Override
public void evaluate(CreateStream<T> transform, EvaluationContext context) {
Coder<T> coder = context.getOutput(transform).getCoder();
JavaStreamingContext jssc = context.getStreamingContext();
Queue<Iterable<TimestampedValue<T>>> values = transform.getBatches();
WindowedValue.FullWindowedValueCoder<T> windowCoder = WindowedValue.FullWindowedValueCoder.of(coder, GlobalWindow.Coder.INSTANCE);
// create the DStream from queue.
Queue<JavaRDD<WindowedValue<T>>> rddQueue = new LinkedBlockingQueue<>();
for (Iterable<TimestampedValue<T>> tv : values) {
Iterable<WindowedValue<T>> windowedValues = Iterables.transform(tv, new com.google.common.base.Function<TimestampedValue<T>, WindowedValue<T>>() {
@Override
public WindowedValue<T> apply(@Nonnull TimestampedValue<T> timestampedValue) {
return WindowedValue.of(timestampedValue.getValue(), timestampedValue.getTimestamp(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
}
});
JavaRDD<WindowedValue<T>> rdd = jssc.sparkContext().parallelize(CoderHelpers.toByteArrays(windowedValues, windowCoder)).map(CoderHelpers.fromByteFunction(windowCoder));
rddQueue.offer(rdd);
}
JavaInputDStream<WindowedValue<T>> inputDStream = jssc.queueStream(rddQueue, true);
UnboundedDataset<T> unboundedDataset = new UnboundedDataset<T>(inputDStream, Collections.singletonList(inputDStream.inputDStream().id()));
// add pre-baked Watermarks for the pre-baked batches.
Queue<GlobalWatermarkHolder.SparkWatermarks> times = transform.getTimes();
GlobalWatermarkHolder.addAll(ImmutableMap.of(unboundedDataset.getStreamSources().get(0), times));
context.putDataset(transform, unboundedDataset);
}
@Override
public String toNativeString() {
return "streamingContext.queueStream(...)";
}
};
}
Aggregations