use of org.apache.spark.api.java.JavaRDD in project rocketmq-externals by apache.
the class RocketMqUtilsTest method testConsumer.
@Test
public void testConsumer() throws MQBrokerException, MQClientException, InterruptedException, UnsupportedEncodingException {
// start up spark
Map<String, String> optionParams = new HashMap<>();
optionParams.put(RocketMQConfig.NAME_SERVER_ADDR, NAME_SERVER);
SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver").setMaster("local[*]");
JavaStreamingContext sc = new JavaStreamingContext(sparkConf, new Duration(1000));
List<String> topics = new ArrayList<>();
topics.add(TOPIC_DEFAULT);
LocationStrategy locationStrategy = LocationStrategy.PreferConsistent();
JavaInputDStream<MessageExt> stream = RocketMqUtils.createJavaMQPullStream(sc, UUID.randomUUID().toString(), topics, ConsumerStrategy.earliest(), false, false, false, locationStrategy, optionParams);
final Set<MessageExt> result = Collections.synchronizedSet(new HashSet<MessageExt>());
stream.foreachRDD(new VoidFunction<JavaRDD<MessageExt>>() {
@Override
public void call(JavaRDD<MessageExt> messageExtJavaRDD) throws Exception {
result.addAll(messageExtJavaRDD.collect());
}
});
sc.start();
long startTime = System.currentTimeMillis();
boolean matches = false;
while (!matches && System.currentTimeMillis() - startTime < 20000) {
matches = MESSAGE_NUM == result.size();
Thread.sleep(50);
}
sc.stop();
}
use of org.apache.spark.api.java.JavaRDD in project net.jgp.labs.spark by jgperrin.
the class StreamingIngestionFileSystemTextFileToDataframeApp method start.
private void start() {
// Create a local StreamingContext with two working thread and batch interval of
// 1 second
SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
msgDataStream.print();
// Create JavaRDD<Row>
msgDataStream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
private static final long serialVersionUID = -590010339928376829L;
@Override
public void call(JavaRDD<String> rdd) {
JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {
private static final long serialVersionUID = 5167089361335095997L;
@Override
public Row call(String msg) {
Row row = RowFactory.create(msg);
return row;
}
});
// Create Schema
StructType schema = DataTypes.createStructType(new StructField[] { DataTypes.createStructField("Message", DataTypes.StringType, true) });
// Get Spark 2.0 session
SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());
Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
msgDataFrame.show();
}
});
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of org.apache.spark.api.java.JavaRDD in project net.jgp.labs.spark by jgperrin.
the class PiApp method start.
private void start() {
SparkSession spark = SparkSession.builder().appName("JavaSparkPi").master("spark://10.0.100.81:7077").config("spark.executor.memory", "1g").config("spark.executor.cores", "1").config("spark.cores.max", "2").config("spark.driver.host", "10.0.100.182").config("spark.executor.extraClassPath", "/home/jgp/net.jgp.labs.spark/target/labs-spark-2.2.0-jar-with-dependencies.jar").getOrCreate();
JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
int n = 1 * NUM_SAMPLES;
List<Integer> l = new ArrayList<>(n);
for (int i = 0; i < n; i++) {
l.add(i);
}
JavaRDD<Integer> dataSet = jsc.parallelize(l, NUM_SAMPLES);
long t0 = System.currentTimeMillis();
long count = dataSet.map(integer -> {
double x = Math.random() * 2 - 1;
double y = Math.random() * 2 - 1;
return (x * x + y * y <= 1) ? 1 : 0;
}).reduce((integer, integer2) -> integer + integer2);
long t1 = System.currentTimeMillis();
log.info("Pi is roughly ..... {}", 4.0 * count / n);
log.info("Processing time ... {} ms", t1 - t0);
spark.stop();
}
use of org.apache.spark.api.java.JavaRDD in project incubator-systemml by apache.
the class MLContextUtil method convertInputType.
/**
* Convert input types to internal SystemML representations
*
* @param parameterName
* The name of the input parameter
* @param parameterValue
* The value of the input parameter
* @param metadata
* matrix/frame metadata
* @return input in SystemML data representation
*/
public static Data convertInputType(String parameterName, Object parameterValue, Metadata metadata) {
String name = parameterName;
Object value = parameterValue;
boolean hasMetadata = (metadata != null) ? true : false;
boolean hasMatrixMetadata = hasMetadata && (metadata instanceof MatrixMetadata) ? true : false;
boolean hasFrameMetadata = hasMetadata && (metadata instanceof FrameMetadata) ? true : false;
if (name == null) {
throw new MLContextException("Input parameter name is null");
} else if (value == null) {
throw new MLContextException("Input parameter value is null for: " + parameterName);
} else if (value instanceof JavaRDD<?>) {
@SuppressWarnings("unchecked") JavaRDD<String> javaRDD = (JavaRDD<String>) value;
if (hasMatrixMetadata) {
MatrixMetadata matrixMetadata = (MatrixMetadata) metadata;
if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
return MLContextConversionUtil.javaRDDStringIJVToMatrixObject(javaRDD, matrixMetadata);
} else {
return MLContextConversionUtil.javaRDDStringCSVToMatrixObject(javaRDD, matrixMetadata);
}
} else if (hasFrameMetadata) {
FrameMetadata frameMetadata = (FrameMetadata) metadata;
if (frameMetadata.getFrameFormat() == FrameFormat.IJV) {
return MLContextConversionUtil.javaRDDStringIJVToFrameObject(javaRDD, frameMetadata);
} else {
return MLContextConversionUtil.javaRDDStringCSVToFrameObject(javaRDD, frameMetadata);
}
} else if (!hasMetadata) {
String firstLine = javaRDD.first();
boolean isAllNumbers = isCSVLineAllNumbers(firstLine);
if (isAllNumbers) {
return MLContextConversionUtil.javaRDDStringCSVToMatrixObject(javaRDD);
} else {
return MLContextConversionUtil.javaRDDStringCSVToFrameObject(javaRDD);
}
}
} else if (value instanceof RDD<?>) {
@SuppressWarnings("unchecked") RDD<String> rdd = (RDD<String>) value;
if (hasMatrixMetadata) {
MatrixMetadata matrixMetadata = (MatrixMetadata) metadata;
if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
return MLContextConversionUtil.rddStringIJVToMatrixObject(rdd, matrixMetadata);
} else {
return MLContextConversionUtil.rddStringCSVToMatrixObject(rdd, matrixMetadata);
}
} else if (hasFrameMetadata) {
FrameMetadata frameMetadata = (FrameMetadata) metadata;
if (frameMetadata.getFrameFormat() == FrameFormat.IJV) {
return MLContextConversionUtil.rddStringIJVToFrameObject(rdd, frameMetadata);
} else {
return MLContextConversionUtil.rddStringCSVToFrameObject(rdd, frameMetadata);
}
} else if (!hasMetadata) {
String firstLine = rdd.first();
boolean isAllNumbers = isCSVLineAllNumbers(firstLine);
if (isAllNumbers) {
return MLContextConversionUtil.rddStringCSVToMatrixObject(rdd);
} else {
return MLContextConversionUtil.rddStringCSVToFrameObject(rdd);
}
}
} else if (value instanceof MatrixBlock) {
MatrixBlock matrixBlock = (MatrixBlock) value;
return MLContextConversionUtil.matrixBlockToMatrixObject(name, matrixBlock, (MatrixMetadata) metadata);
} else if (value instanceof FrameBlock) {
FrameBlock frameBlock = (FrameBlock) value;
return MLContextConversionUtil.frameBlockToFrameObject(name, frameBlock, (FrameMetadata) metadata);
} else if (value instanceof Dataset<?>) {
@SuppressWarnings("unchecked") Dataset<Row> dataFrame = (Dataset<Row>) value;
dataFrame = MLUtils.convertVectorColumnsToML(dataFrame);
if (hasMatrixMetadata) {
return MLContextConversionUtil.dataFrameToMatrixObject(dataFrame, (MatrixMetadata) metadata);
} else if (hasFrameMetadata) {
return MLContextConversionUtil.dataFrameToFrameObject(dataFrame, (FrameMetadata) metadata);
} else if (!hasMetadata) {
boolean looksLikeMatrix = doesDataFrameLookLikeMatrix(dataFrame);
if (looksLikeMatrix) {
return MLContextConversionUtil.dataFrameToMatrixObject(dataFrame);
} else {
return MLContextConversionUtil.dataFrameToFrameObject(dataFrame);
}
}
} else if (value instanceof Matrix) {
Matrix matrix = (Matrix) value;
if ((matrix.hasBinaryBlocks()) && (!matrix.hasMatrixObject())) {
if (metadata == null) {
metadata = matrix.getMatrixMetadata();
}
JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlocks = matrix.toBinaryBlocks();
return MLContextConversionUtil.binaryBlocksToMatrixObject(binaryBlocks, (MatrixMetadata) metadata);
} else {
return matrix.toMatrixObject();
}
} else if (value instanceof Frame) {
Frame frame = (Frame) value;
if ((frame.hasBinaryBlocks()) && (!frame.hasFrameObject())) {
if (metadata == null) {
metadata = frame.getFrameMetadata();
}
JavaPairRDD<Long, FrameBlock> binaryBlocks = frame.toBinaryBlocks();
return MLContextConversionUtil.binaryBlocksToFrameObject(binaryBlocks, (FrameMetadata) metadata);
} else {
return frame.toFrameObject();
}
} else if (value instanceof double[][]) {
double[][] doubleMatrix = (double[][]) value;
return MLContextConversionUtil.doubleMatrixToMatrixObject(name, doubleMatrix, (MatrixMetadata) metadata);
} else if (value instanceof URL) {
URL url = (URL) value;
return MLContextConversionUtil.urlToMatrixObject(url, (MatrixMetadata) metadata);
} else if (value instanceof Integer) {
return new IntObject((Integer) value);
} else if (value instanceof Double) {
return new DoubleObject((Double) value);
} else if (value instanceof String) {
return new StringObject((String) value);
} else if (value instanceof Boolean) {
return new BooleanObject((Boolean) value);
}
return null;
}
use of org.apache.spark.api.java.JavaRDD in project incubator-systemml by apache.
the class MLContextUtil method displayInputs.
/**
* Obtain a display of script inputs.
*
* @param name
* the title to display for the inputs
* @param map
* the map of inputs
* @param symbolTable
* the symbol table
* @return the script inputs represented as a String
*/
public static String displayInputs(String name, Map<String, Object> map, LocalVariableMap symbolTable) {
StringBuilder sb = new StringBuilder();
sb.append(name);
sb.append(":\n");
Set<String> keys = map.keySet();
if (keys.isEmpty()) {
sb.append("None\n");
} else {
int count = 0;
for (String key : keys) {
Object object = map.get(key);
@SuppressWarnings("rawtypes") Class clazz = object.getClass();
String type = clazz.getSimpleName();
if (object instanceof JavaRDD<?>) {
type = "JavaRDD";
} else if (object instanceof RDD<?>) {
type = "RDD";
}
sb.append(" [");
sb.append(++count);
sb.append("]");
sb.append(" (");
sb.append(type);
if (doesSymbolTableContainMatrixObject(symbolTable, key)) {
sb.append(" as Matrix");
} else if (doesSymbolTableContainFrameObject(symbolTable, key)) {
sb.append(" as Frame");
}
sb.append(") ");
sb.append(key);
sb.append(": ");
String str = null;
if (object instanceof MatrixBlock) {
MatrixBlock mb = (MatrixBlock) object;
str = "MatrixBlock [sparse? = " + mb.isInSparseFormat() + ", nonzeros = " + mb.getNonZeros() + ", size: " + mb.getNumRows() + " X " + mb.getNumColumns() + "]";
} else
// TODO: Deal with OOM for other
str = object.toString();
// objects such as Frame, etc
str = StringUtils.abbreviate(str, 100);
sb.append(str);
sb.append("\n");
}
}
return sb.toString();
}
Aggregations