use of org.apache.spark.api.java.function.VoidFunction in project microservices by pwillhan.
the class GeoLocationJob method main.
public static void main(String[] args) throws Exception {
SparkConf conf = new SparkConf().setAppName("geolocationJob").setMaster("local[1]");
JavaStreamingContext context = new JavaStreamingContext(conf, new Duration(2000));
Map<String, Object> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", "192.168.99.100:9092");
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", StringDeserializer.class);
kafkaParams.put("group.id", "geolocationJob");
kafkaParams.put("auto.offset.reset", "latest");
kafkaParams.put("enable.auto.commit", false);
Collection<String> topics = Arrays.asList("geolocationJob");
final JavaInputDStream<ConsumerRecord<String, String>> dstream = KafkaUtils.createDirectStream(context, LocationStrategies.PreferConsistent(), ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams));
dstream.map(new // map to GeoLocation
Function<ConsumerRecord<String, String>, GeoLocation>() {
private static final long serialVersionUID = -5289370913799710097L;
@Override
public GeoLocation call(ConsumerRecord<String, String> record) throws Exception {
return new Gson().fromJson(record.value(), GeoLocation.class);
}
}).filter(new // filter out invalid geolocations
Function<GeoLocation, Boolean>() {
private static final long serialVersionUID = 6980980875802694946L;
@Override
public Boolean call(GeoLocation geolocation) throws Exception {
System.out.println("Spark Job received => " + geolocation);
return geolocation.getLatitude() >= -90 && geolocation.getLatitude() < 90 && geolocation.getLongitude() >= -180 && geolocation.getLongitude() < 180;
}
}).foreachRDD(new // iterate over RDD
VoidFunction<JavaRDD<GeoLocation>>() {
private static final long serialVersionUID = -4161320579495422870L;
@Override
public void call(JavaRDD<GeoLocation> rdd) throws Exception {
rdd.foreach(new // send valid geolocations to another topic
VoidFunction<GeoLocation>() {
private static final long serialVersionUID = -3282778715126743482L;
@Override
public void call(GeoLocation geolocation) throws Exception {
ProducerRecord<String, String> record = new ProducerRecord<>("geolocations", geolocation.toString());
getProducer().send(record);
}
});
}
});
context.start();
context.awaitTermination();
}
use of org.apache.spark.api.java.function.VoidFunction in project rocketmq-externals by apache.
the class RocketMqUtilsTest method testGetOffsets.
@Test
public void testGetOffsets() throws MQBrokerException, MQClientException, InterruptedException, UnsupportedEncodingException {
Map<String, String> optionParams = new HashMap<>();
optionParams.put(RocketMQConfig.NAME_SERVER_ADDR, NAME_SERVER);
SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver").setMaster("local[*]");
JavaStreamingContext sc = new JavaStreamingContext(sparkConf, new Duration(1000));
List<String> topics = new ArrayList<>();
topics.add(TOPIC_DEFAULT);
LocationStrategy locationStrategy = LocationStrategy.PreferConsistent();
JavaInputDStream<MessageExt> dStream = RocketMqUtils.createJavaMQPullStream(sc, UUID.randomUUID().toString(), topics, ConsumerStrategy.earliest(), false, false, false, locationStrategy, optionParams);
// hold a reference to the current offset ranges, so it can be used downstream
final AtomicReference<Map<TopicQueueId, OffsetRange[]>> offsetRanges = new AtomicReference<>();
final Set<MessageExt> result = Collections.synchronizedSet(new HashSet<MessageExt>());
dStream.transform(new Function<JavaRDD<MessageExt>, JavaRDD<MessageExt>>() {
@Override
public JavaRDD<MessageExt> call(JavaRDD<MessageExt> v1) throws Exception {
Map<TopicQueueId, OffsetRange[]> offsets = ((HasOffsetRanges) v1.rdd()).offsetRanges();
offsetRanges.set(offsets);
return v1;
}
}).foreachRDD(new VoidFunction<JavaRDD<MessageExt>>() {
@Override
public void call(JavaRDD<MessageExt> messageExtJavaRDD) throws Exception {
result.addAll(messageExtJavaRDD.collect());
}
});
sc.start();
long startTime = System.currentTimeMillis();
boolean matches = false;
while (!matches && System.currentTimeMillis() - startTime < 10000) {
matches = MESSAGE_NUM == result.size();
Thread.sleep(50);
}
sc.stop();
}
use of org.apache.spark.api.java.function.VoidFunction in project auratrainingproject by liuqinghua666.
the class JavaKafkaShopCityAnalytics method main.
public static void main(String[] args) throws Exception {
SparkConf conf = new SparkConf().setAppName("JavaKafkaShopCityAnalytics");
if (args.length == 0) {
conf.setMaster("local[1]");
} else {
dataPath = args[0];
}
JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(5));
// 从MySQL或文本中读取数据库,作为广播变量共享使用
Map<String, String> shopCityMap = getShopCityMap(dataPath);
JavaSparkContext jsc = ssc.sparkContext();
Broadcast<Map<String, String>> broadcastCountryMap = jsc.broadcast(shopCityMap);
// Kafka configurations
String[] topics = KafkaRedisConfig.KAFKA_USER_PAY_TOPIC.split("\\,");
System.out.println("Topics: " + Arrays.toString(topics));
String brokers = KafkaRedisConfig.KAFKA_ADDR;
Map<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("metadata.broker.list", brokers);
kafkaParams.put("serializer.class", "kafka.serializer.StringEncoder");
final String clickHashKey = "app::shop::paycount";
// Create a direct stream
JavaPairInputDStream<String, String> kafkaStream = KafkaUtils.createDirectStream(ssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, new HashSet<String>(Arrays.asList(topics)));
JavaDStream events = kafkaStream.map(new Function<Tuple2<String, String>, String[]>() {
@Override
public String[] call(Tuple2<String, String> line) throws Exception {
System.out.println("line:" + line._1() + "=>" + line._2().split(",")[0]);
String[] data = new String[] { line._1(), line._2().split(",")[0] };
return data;
}
});
// Compute user click times
JavaPairDStream<String, Long> shopClicks = events.mapToPair(new PairFunction<String[], String, Long>() {
@Override
public Tuple2<String, Long> call(String[] x) {
return new Tuple2<>(x[1], new Long(1));
}
}).reduceByKey(new Function2<Long, Long, Long>() {
@Override
public Long call(Long i1, Long i2) {
return i1 + i2;
}
});
shopClicks.foreachRDD(new VoidFunction<JavaPairRDD<String, Long>>() {
@Override
public void call(JavaPairRDD<String, Long> rdd) throws Exception {
rdd.foreachPartition(new VoidFunction<Iterator<Tuple2<String, Long>>>() {
@Override
public void call(Iterator<Tuple2<String, Long>> partitionOfRecords) throws Exception {
Jedis jedis = JavaRedisClient.get().getResource();
while (partitionOfRecords.hasNext()) {
try {
Tuple2<String, Long> pair = partitionOfRecords.next();
String shopidKey = "jiaoyi" + pair._1();
// 读取广播变量Map,根据shopd获取cityName
String cityName = broadcastCountryMap.getValue().get(pair._1());
String cityKey = "交易" + cityName;
// String cityKey = "交易"+getCityOfShop(pair._1 (),dataPath);
// 交易量
long clickCount = pair._2();
// 将店铺交易增量写入Redis
jedis.incrBy(shopidKey, clickCount);
System.out.println("Update shop " + shopidKey + " inc " + clickCount);
// 将城市交易增量写入Redis
jedis.incrBy(cityKey, clickCount);
System.out.println("Update city " + cityKey + " inc " + clickCount);
} catch (Exception e) {
System.out.println("error:" + e);
}
}
jedis.close();
}
});
}
});
ssc.start();
ssc.awaitTermination();
}
use of org.apache.spark.api.java.function.VoidFunction in project beijingThirdPeriod by weidongcao.
the class SparkOperateBcp method bcpWriteIntoSolr.
public static void bcpWriteIntoSolr(JavaRDD<String[]> javaRDD, TaskBean task) {
logger.info("开始将 {} 的BCP数据索引到Solr", task.getContentType());
/*
* 数据写入Solr
*/
javaRDD.foreachPartition((VoidFunction<Iterator<String[]>>) iterator -> {
List<SolrInputDocument> list = new ArrayList<>();
while (iterator.hasNext()) {
String[] str = iterator.next();
SolrInputDocument doc = new SolrInputDocument();
String rowkey = str[0];
doc.addField("ID", rowkey.split("_")[1]);
doc.addField(BigDataConstants.SOLR_CONTENT_ID.toUpperCase(), rowkey);
doc.addField(BigDataConstants.SOLR_DOC_TYPE_KEY, FieldConstants.DOC_TYPE_MAP.get(task.getContentType()));
doc.addField("capture_time", rowkey.split("_")[0]);
doc.addField("import_time".toUpperCase(), DateFormatUtils.DATE_TIME_FORMAT.format(new Date()));
String[] values = ArrayUtils.subarray(str, 1, str.length);
for (int i = 0; i < values.length; i++) {
String value = values[i];
String key = task.getColumns()[i].toUpperCase();
if ((null != value) && (!"".equals(value))) {
if (!"FILE_URL".equalsIgnoreCase(key) && !"FILE_SIZE".equalsIgnoreCase(key)) {
doc.addField(key, value);
}
}
}
list.add(doc);
}
SolrUtil.submitToSolr(client, list, 0, new Date());
});
logger.info("####### {}的BCP数据索引Solr完成 #######", task.getContentType());
}
Aggregations