Search in sources :

Example 6 with VoidFunction

use of org.apache.spark.api.java.function.VoidFunction in project microservices by pwillhan.

the class GeoLocationJob method main.

public static void main(String[] args) throws Exception {
    SparkConf conf = new SparkConf().setAppName("geolocationJob").setMaster("local[1]");
    JavaStreamingContext context = new JavaStreamingContext(conf, new Duration(2000));
    Map<String, Object> kafkaParams = new HashMap<>();
    kafkaParams.put("bootstrap.servers", "192.168.99.100:9092");
    kafkaParams.put("key.deserializer", StringDeserializer.class);
    kafkaParams.put("value.deserializer", StringDeserializer.class);
    kafkaParams.put("group.id", "geolocationJob");
    kafkaParams.put("auto.offset.reset", "latest");
    kafkaParams.put("enable.auto.commit", false);
    Collection<String> topics = Arrays.asList("geolocationJob");
    final JavaInputDStream<ConsumerRecord<String, String>> dstream = KafkaUtils.createDirectStream(context, LocationStrategies.PreferConsistent(), ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams));
    dstream.map(new // map to GeoLocation
    Function<ConsumerRecord<String, String>, GeoLocation>() {

        private static final long serialVersionUID = -5289370913799710097L;

        @Override
        public GeoLocation call(ConsumerRecord<String, String> record) throws Exception {
            return new Gson().fromJson(record.value(), GeoLocation.class);
        }
    }).filter(new // filter out invalid geolocations
    Function<GeoLocation, Boolean>() {

        private static final long serialVersionUID = 6980980875802694946L;

        @Override
        public Boolean call(GeoLocation geolocation) throws Exception {
            System.out.println("Spark Job received => " + geolocation);
            return geolocation.getLatitude() >= -90 && geolocation.getLatitude() < 90 && geolocation.getLongitude() >= -180 && geolocation.getLongitude() < 180;
        }
    }).foreachRDD(new // iterate over RDD
    VoidFunction<JavaRDD<GeoLocation>>() {

        private static final long serialVersionUID = -4161320579495422870L;

        @Override
        public void call(JavaRDD<GeoLocation> rdd) throws Exception {
            rdd.foreach(new // send valid geolocations to another topic
            VoidFunction<GeoLocation>() {

                private static final long serialVersionUID = -3282778715126743482L;

                @Override
                public void call(GeoLocation geolocation) throws Exception {
                    ProducerRecord<String, String> record = new ProducerRecord<>("geolocations", geolocation.toString());
                    getProducer().send(record);
                }
            });
        }
    });
    context.start();
    context.awaitTermination();
}
Also used : HashMap(java.util.HashMap) Gson(com.google.gson.Gson) Duration(org.apache.spark.streaming.Duration) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) JavaRDD(org.apache.spark.api.java.JavaRDD) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) VoidFunction(org.apache.spark.api.java.function.VoidFunction) Function(org.apache.spark.api.java.function.Function) VoidFunction(org.apache.spark.api.java.function.VoidFunction) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) SparkConf(org.apache.spark.SparkConf)

Example 7 with VoidFunction

use of org.apache.spark.api.java.function.VoidFunction in project rocketmq-externals by apache.

the class RocketMqUtilsTest method testGetOffsets.

@Test
public void testGetOffsets() throws MQBrokerException, MQClientException, InterruptedException, UnsupportedEncodingException {
    Map<String, String> optionParams = new HashMap<>();
    optionParams.put(RocketMQConfig.NAME_SERVER_ADDR, NAME_SERVER);
    SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver").setMaster("local[*]");
    JavaStreamingContext sc = new JavaStreamingContext(sparkConf, new Duration(1000));
    List<String> topics = new ArrayList<>();
    topics.add(TOPIC_DEFAULT);
    LocationStrategy locationStrategy = LocationStrategy.PreferConsistent();
    JavaInputDStream<MessageExt> dStream = RocketMqUtils.createJavaMQPullStream(sc, UUID.randomUUID().toString(), topics, ConsumerStrategy.earliest(), false, false, false, locationStrategy, optionParams);
    // hold a reference to the current offset ranges, so it can be used downstream
    final AtomicReference<Map<TopicQueueId, OffsetRange[]>> offsetRanges = new AtomicReference<>();
    final Set<MessageExt> result = Collections.synchronizedSet(new HashSet<MessageExt>());
    dStream.transform(new Function<JavaRDD<MessageExt>, JavaRDD<MessageExt>>() {

        @Override
        public JavaRDD<MessageExt> call(JavaRDD<MessageExt> v1) throws Exception {
            Map<TopicQueueId, OffsetRange[]> offsets = ((HasOffsetRanges) v1.rdd()).offsetRanges();
            offsetRanges.set(offsets);
            return v1;
        }
    }).foreachRDD(new VoidFunction<JavaRDD<MessageExt>>() {

        @Override
        public void call(JavaRDD<MessageExt> messageExtJavaRDD) throws Exception {
            result.addAll(messageExtJavaRDD.collect());
        }
    });
    sc.start();
    long startTime = System.currentTimeMillis();
    boolean matches = false;
    while (!matches && System.currentTimeMillis() - startTime < 10000) {
        matches = MESSAGE_NUM == result.size();
        Thread.sleep(50);
    }
    sc.stop();
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HasOffsetRanges(org.apache.rocketmq.spark.HasOffsetRanges) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) VoidFunction(org.apache.spark.api.java.function.VoidFunction) Function(org.apache.spark.api.java.function.Function) Duration(org.apache.spark.streaming.Duration) AtomicReference(java.util.concurrent.atomic.AtomicReference) MQClientException(org.apache.rocketmq.client.exception.MQClientException) MQBrokerException(org.apache.rocketmq.client.exception.MQBrokerException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) JavaRDD(org.apache.spark.api.java.JavaRDD) OffsetRange(org.apache.rocketmq.spark.OffsetRange) MessageExt(org.apache.rocketmq.common.message.MessageExt) LocationStrategy(org.apache.rocketmq.spark.LocationStrategy) SparkConf(org.apache.spark.SparkConf) HashMap(java.util.HashMap) Map(java.util.Map) TopicQueueId(org.apache.rocketmq.spark.TopicQueueId) Test(org.junit.Test)

Example 8 with VoidFunction

use of org.apache.spark.api.java.function.VoidFunction in project auratrainingproject by liuqinghua666.

the class JavaKafkaShopCityAnalytics method main.

public static void main(String[] args) throws Exception {
    SparkConf conf = new SparkConf().setAppName("JavaKafkaShopCityAnalytics");
    if (args.length == 0) {
        conf.setMaster("local[1]");
    } else {
        dataPath = args[0];
    }
    JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(5));
    // 从MySQL或文本中读取数据库,作为广播变量共享使用
    Map<String, String> shopCityMap = getShopCityMap(dataPath);
    JavaSparkContext jsc = ssc.sparkContext();
    Broadcast<Map<String, String>> broadcastCountryMap = jsc.broadcast(shopCityMap);
    // Kafka configurations
    String[] topics = KafkaRedisConfig.KAFKA_USER_PAY_TOPIC.split("\\,");
    System.out.println("Topics: " + Arrays.toString(topics));
    String brokers = KafkaRedisConfig.KAFKA_ADDR;
    Map<String, String> kafkaParams = new HashMap<>();
    kafkaParams.put("metadata.broker.list", brokers);
    kafkaParams.put("serializer.class", "kafka.serializer.StringEncoder");
    final String clickHashKey = "app::shop::paycount";
    // Create a direct stream
    JavaPairInputDStream<String, String> kafkaStream = KafkaUtils.createDirectStream(ssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, new HashSet<String>(Arrays.asList(topics)));
    JavaDStream events = kafkaStream.map(new Function<Tuple2<String, String>, String[]>() {

        @Override
        public String[] call(Tuple2<String, String> line) throws Exception {
            System.out.println("line:" + line._1() + "=>" + line._2().split(",")[0]);
            String[] data = new String[] { line._1(), line._2().split(",")[0] };
            return data;
        }
    });
    // Compute user click times
    JavaPairDStream<String, Long> shopClicks = events.mapToPair(new PairFunction<String[], String, Long>() {

        @Override
        public Tuple2<String, Long> call(String[] x) {
            return new Tuple2<>(x[1], new Long(1));
        }
    }).reduceByKey(new Function2<Long, Long, Long>() {

        @Override
        public Long call(Long i1, Long i2) {
            return i1 + i2;
        }
    });
    shopClicks.foreachRDD(new VoidFunction<JavaPairRDD<String, Long>>() {

        @Override
        public void call(JavaPairRDD<String, Long> rdd) throws Exception {
            rdd.foreachPartition(new VoidFunction<Iterator<Tuple2<String, Long>>>() {

                @Override
                public void call(Iterator<Tuple2<String, Long>> partitionOfRecords) throws Exception {
                    Jedis jedis = JavaRedisClient.get().getResource();
                    while (partitionOfRecords.hasNext()) {
                        try {
                            Tuple2<String, Long> pair = partitionOfRecords.next();
                            String shopidKey = "jiaoyi" + pair._1();
                            // 读取广播变量Map,根据shopd获取cityName
                            String cityName = broadcastCountryMap.getValue().get(pair._1());
                            String cityKey = "交易" + cityName;
                            // String cityKey = "交易"+getCityOfShop(pair._1 (),dataPath);
                            // 交易量
                            long clickCount = pair._2();
                            // 将店铺交易增量写入Redis
                            jedis.incrBy(shopidKey, clickCount);
                            System.out.println("Update shop " + shopidKey + " inc " + clickCount);
                            // 将城市交易增量写入Redis
                            jedis.incrBy(cityKey, clickCount);
                            System.out.println("Update city " + cityKey + " inc " + clickCount);
                        } catch (Exception e) {
                            System.out.println("error:" + e);
                        }
                    }
                    jedis.close();
                }
            });
        }
    });
    ssc.start();
    ssc.awaitTermination();
}
Also used : JavaDStream(org.apache.spark.streaming.api.java.JavaDStream) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) Jedis(redis.clients.jedis.Jedis) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) PairFunction(org.apache.spark.api.java.function.PairFunction) IOException(java.io.IOException) Tuple2(scala.Tuple2) VoidFunction(org.apache.spark.api.java.function.VoidFunction) SparkConf(org.apache.spark.SparkConf)

Example 9 with VoidFunction

use of org.apache.spark.api.java.function.VoidFunction in project beijingThirdPeriod by weidongcao.

the class SparkOperateBcp method bcpWriteIntoSolr.

public static void bcpWriteIntoSolr(JavaRDD<String[]> javaRDD, TaskBean task) {
    logger.info("开始将 {} 的BCP数据索引到Solr", task.getContentType());
    /*
         * 数据写入Solr
         */
    javaRDD.foreachPartition((VoidFunction<Iterator<String[]>>) iterator -> {
        List<SolrInputDocument> list = new ArrayList<>();
        while (iterator.hasNext()) {
            String[] str = iterator.next();
            SolrInputDocument doc = new SolrInputDocument();
            String rowkey = str[0];
            doc.addField("ID", rowkey.split("_")[1]);
            doc.addField(BigDataConstants.SOLR_CONTENT_ID.toUpperCase(), rowkey);
            doc.addField(BigDataConstants.SOLR_DOC_TYPE_KEY, FieldConstants.DOC_TYPE_MAP.get(task.getContentType()));
            doc.addField("capture_time", rowkey.split("_")[0]);
            doc.addField("import_time".toUpperCase(), DateFormatUtils.DATE_TIME_FORMAT.format(new Date()));
            String[] values = ArrayUtils.subarray(str, 1, str.length);
            for (int i = 0; i < values.length; i++) {
                String value = values[i];
                String key = task.getColumns()[i].toUpperCase();
                if ((null != value) && (!"".equals(value))) {
                    if (!"FILE_URL".equalsIgnoreCase(key) && !"FILE_SIZE".equalsIgnoreCase(key)) {
                        doc.addField(key, value);
                    }
                }
            }
            list.add(doc);
        }
        SolrUtil.submitToSolr(client, list, 0, new Date());
    });
    logger.info("####### {}的BCP数据索引Solr完成 #######", task.getContentType());
}
Also used : PairFlatMapFunction(org.apache.spark.api.java.function.PairFlatMapFunction) Date(java.util.Date) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) LoggerFactory(org.slf4j.LoggerFactory) ArrayUtils(org.apache.commons.lang3.ArrayUtils) VoidFunction(org.apache.spark.api.java.function.VoidFunction) DateFormatUtils(com.rainsoft.utils.DateFormatUtils) ArrayList(java.util.ArrayList) TaskBean(com.rainsoft.domain.TaskBean) ClassPathXmlApplicationContext(org.springframework.context.support.ClassPathXmlApplicationContext) BigDataConstants(com.rainsoft.BigDataConstants) JavaRDD(org.apache.spark.api.java.JavaRDD) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) SolrUtil(com.rainsoft.utils.SolrUtil) SparkConf(org.apache.spark.SparkConf) RowkeyColumnSecondarySort(com.rainsoft.hbase.RowkeyColumnSecondarySort) Tuple2(scala.Tuple2) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) SolrClient(org.apache.solr.client.solrj.SolrClient) Serializable(java.io.Serializable) HBaseUtils(com.rainsoft.utils.HBaseUtils) List(java.util.List) AbstractApplicationContext(org.springframework.context.support.AbstractApplicationContext) FieldConstants(com.rainsoft.FieldConstants) Function(org.apache.spark.api.java.function.Function) SolrInputDocument(org.apache.solr.common.SolrInputDocument) SolrInputDocument(org.apache.solr.common.SolrInputDocument) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) Date(java.util.Date)

Aggregations

VoidFunction (org.apache.spark.api.java.function.VoidFunction)9 SparkConf (org.apache.spark.SparkConf)8 JavaRDD (org.apache.spark.api.java.JavaRDD)8 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)6 Function (org.apache.spark.api.java.function.Function)6 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)5 FieldConstants (com.rainsoft.FieldConstants)4 ArrayList (java.util.ArrayList)4 Tuple2 (scala.Tuple2)4 BigDataConstants (com.rainsoft.BigDataConstants)3 RowkeyColumnSecondarySort (com.rainsoft.hbase.RowkeyColumnSecondarySort)3 List (java.util.List)3 StringUtils (org.apache.commons.lang3.StringUtils)3 SolrClient (org.apache.solr.client.solrj.SolrClient)3 SolrInputDocument (org.apache.solr.common.SolrInputDocument)3 JavaStreamingContext (org.apache.spark.streaming.api.java.JavaStreamingContext)3 ConfigurationManager (com.rainsoft.conf.ConfigurationManager)2 TaskBean (com.rainsoft.domain.TaskBean)2 DateFormatUtils (com.rainsoft.utils.DateFormatUtils)2 HBaseUtils (com.rainsoft.utils.HBaseUtils)2