use of org.apache.spark.api.java.function.VoidFunction in project beijingThirdPeriod by weidongcao.
the class BaseOracleDataExport method export2Solr.
/**
* Oracle内容表数据导入到Solr
*
* @param javaRDD JavaRDD<String[]>
* @param task 任务名
*/
private static void export2Solr(JavaRDD<Row> javaRDD, String task) {
// 字段名数组
String[] columns = FieldConstants.ORACLE_TABLE_COLUMN_MAP.get(NamingRuleUtils.getOracleContentTableName(task));
javaRDD.foreachPartition((VoidFunction<Iterator<Row>>) iterator -> {
List<SolrInputDocument> docList = new ArrayList<>();
java.util.Optional<Object> importTime = java.util.Optional.empty();
while (iterator.hasNext()) {
Row row = iterator.next();
SolrInputDocument doc = new SolrInputDocument();
String id = UUID.randomUUID().toString().replace("-", "");
doc.addField("ID", id);
doc.addField(BigDataConstants.SOLR_DOC_TYPE_KEY, FieldConstants.DOC_TYPE_MAP.get(task));
for (int i = 0; i < row.length(); i++) {
if (i >= columns.length)
break;
if ("import_time".equalsIgnoreCase(columns[i])) {
importTime = Optional.of(row.getString(i));
}
SolrUtil.addSolrFieldValue(doc, columns[i].toUpperCase(), row.getString(i));
}
docList.add(doc);
SolrUtil.submitToSolr(client, docList, writeSize, importTime);
}
SolrUtil.submitToSolr(client, docList, 1, importTime);
});
logger.info("####### {}的数据索引Solr完成 #######", NamingRuleUtils.getOracleContentTableName(task));
}
use of org.apache.spark.api.java.function.VoidFunction in project beijingThirdPeriod by weidongcao.
the class BcpImportHBaseSolrService method bcpWriteIntoSolr.
public static void bcpWriteIntoSolr(JavaRDD<String[]> javaRDD, TaskBean task) {
logger.info("开始将 {} 的BCP数据索引到Solr", task.getContentType());
/*
* 数据写入Solr
*/
javaRDD.foreachPartition((VoidFunction<Iterator<String[]>>) iterator -> {
List<SolrInputDocument> list = new ArrayList<>();
while (iterator.hasNext()) {
String[] str = iterator.next();
SolrInputDocument doc = new SolrInputDocument();
String rowkey = str[0];
doc.addField("ID", rowkey.split("_")[1]);
doc.addField(BigDataConstants.SOLR_CONTENT_ID.toUpperCase(), rowkey);
doc.addField(BigDataConstants.SOLR_DOC_TYPE_KEY, FieldConstants.DOC_TYPE_MAP.get(task.getContentType()));
doc.addField(BigDataConstants.CAPTURE_TIME, rowkey.split("_")[0]);
Date curDate = new Date();
doc.addField("import_time".toUpperCase(), DateFormatUtils.DATE_TIME_FORMAT.format(curDate));
doc.addField("import_time".toLowerCase(), curDate.getTime());
for (int i = 1; i < str.length; i++) {
String value = str[i];
if (task.getColumns().length <= i - 1) {
break;
}
String key = task.getColumns()[i - 1].toUpperCase();
if ((null != value) && (!"".equals(value))) {
doc.addField(key, value);
}
}
list.add(doc);
}
if (list.size() > 0) {
SolrUtil.setCloudSolrClientDefaultCollection(client, new Date());
client.add(list, 1000);
logger.info("---->写入Solr成功数据量:{}", list.size());
} else {
logger.info("{} 此Spark Partition 数据为空", task.getContentType());
}
});
logger.info("####### {}的BCP数据索引Solr完成 #######", task.getContentType());
}
use of org.apache.spark.api.java.function.VoidFunction in project beijingThirdPeriod by weidongcao.
the class TestSpark method main.
public static void main(String[] args) {
String[] arr = FieldConstants.BCP_FILE_COLUMN_MAP.get("bcp_ftp");
List<Integer[]> list = new ArrayList<>();
Random rand = new Random();
for (int i = 0; i < 9; i++) {
Integer[] ints = new Integer[31];
for (int j = 0; j < 31; j++) {
ints[j] = rand.nextInt();
}
list.add(ints);
}
SparkSession spark = SparkSession.builder().appName(TestSpark.class.getSimpleName()).master("local").getOrCreate();
JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
JavaRDD<Integer[]> dataRDD = jsc.parallelize(list);
JavaPairRDD<String, Integer> pairRDD = dataRDD.flatMapToPair((PairFlatMapFunction<Integer[], String, Integer>) ints -> {
List<Tuple2<String, Integer>> list1 = new ArrayList<>();
for (int i = 0; i < ints.length; i++) {
String key = arr[i];
Integer value = ints[i];
list1.add(new Tuple2<>(key, value));
}
return list1.iterator();
});
pairRDD.foreach((VoidFunction<Tuple2<String, Integer>>) tuple -> System.out.println(tuple.toString()));
jsc.close();
}
use of org.apache.spark.api.java.function.VoidFunction in project beijingThirdPeriod by weidongcao.
the class TestHbase method main.
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("aaa").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
Random random = new Random();
List<String> list = new ArrayList<>();
for (int i = 0; i < 10; i++) {
int[] aaa = new int[4];
for (int j = 0; j < 4; j++) {
aaa[j] = random.nextInt(900) + 100;
}
list.add(StringUtils.join(aaa, ","));
}
JavaRDD<String> originalRDD = sc.parallelize(list);
originalRDD.foreach((VoidFunction<String>) s -> System.out.println(s));
/*JavaPairRDD<RowkeyColumnSecondarySort, String> hfileRDD = originalRDD.flatMapToPair(
new PairFlatMapFunction<String, RowkeyColumnSecondarySort, String>() {
@Override
public Iterable<Tuple2<RowkeyColumnSecondarySort, String>> call(String s) throws Exception {
String[] aa = s.split(",");
}
}
)*/
sc.close();
}
use of org.apache.spark.api.java.function.VoidFunction in project net.jgp.labs.spark by jgperrin.
the class StreamingIngestionFileSystemTextFileToDataframeApp method start.
private void start() {
// Create a local StreamingContext with two working thread and batch
// interval of
// 1 second
SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
msgDataStream.print();
// Create JavaRDD<Row>
msgDataStream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
private static final long serialVersionUID = -590010339928376829L;
@Override
public void call(JavaRDD<String> rdd) {
JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {
private static final long serialVersionUID = 5167089361335095997L;
@Override
public Row call(String msg) {
Row row = RowFactory.create(msg);
return row;
}
});
// Create Schema
StructType schema = DataTypes.createStructType(new StructField[] { DataTypes.createStructField("Message", DataTypes.StringType, true) });
// Get Spark 2.0 session
SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());
Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
msgDataFrame.show();
}
});
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Aggregations