Search in sources :

Example 31 with JavaRDD

use of org.apache.spark.api.java.JavaRDD in project rocketmq-externals by apache.

the class RocketMqUtilsTest method testConsumer.

@Test
public void testConsumer() throws MQBrokerException, MQClientException, InterruptedException, UnsupportedEncodingException {
    // start up spark
    Map<String, String> optionParams = new HashMap<>();
    optionParams.put(RocketMQConfig.NAME_SERVER_ADDR, NAME_SERVER);
    SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver").setMaster("local[*]");
    JavaStreamingContext sc = new JavaStreamingContext(sparkConf, new Duration(1000));
    List<String> topics = new ArrayList<>();
    topics.add(TOPIC_DEFAULT);
    LocationStrategy locationStrategy = LocationStrategy.PreferConsistent();
    JavaInputDStream<MessageExt> stream = RocketMqUtils.createJavaMQPullStream(sc, UUID.randomUUID().toString(), topics, ConsumerStrategy.earliest(), false, false, false, locationStrategy, optionParams);
    final Set<MessageExt> result = Collections.synchronizedSet(new HashSet<MessageExt>());
    stream.foreachRDD(new VoidFunction<JavaRDD<MessageExt>>() {

        @Override
        public void call(JavaRDD<MessageExt> messageExtJavaRDD) throws Exception {
            result.addAll(messageExtJavaRDD.collect());
        }
    });
    sc.start();
    long startTime = System.currentTimeMillis();
    boolean matches = false;
    while (!matches && System.currentTimeMillis() - startTime < 20000) {
        matches = MESSAGE_NUM == result.size();
        Thread.sleep(50);
    }
    sc.stop();
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Duration(org.apache.spark.streaming.Duration) MQClientException(org.apache.rocketmq.client.exception.MQClientException) MQBrokerException(org.apache.rocketmq.client.exception.MQBrokerException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) JavaRDD(org.apache.spark.api.java.JavaRDD) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) MessageExt(org.apache.rocketmq.common.message.MessageExt) LocationStrategy(org.apache.rocketmq.spark.LocationStrategy) SparkConf(org.apache.spark.SparkConf) Test(org.junit.Test)

Example 32 with JavaRDD

use of org.apache.spark.api.java.JavaRDD in project net.jgp.labs.spark by jgperrin.

the class StreamingIngestionFileSystemTextFileToDataframeApp method start.

private void start() {
    // Create a local StreamingContext with two working thread and batch interval of
    // 1 second
    SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
    JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
    JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
    msgDataStream.print();
    // Create JavaRDD<Row>
    msgDataStream.foreachRDD(new VoidFunction<JavaRDD<String>>() {

        private static final long serialVersionUID = -590010339928376829L;

        @Override
        public void call(JavaRDD<String> rdd) {
            JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {

                private static final long serialVersionUID = 5167089361335095997L;

                @Override
                public Row call(String msg) {
                    Row row = RowFactory.create(msg);
                    return row;
                }
            });
            // Create Schema
            StructType schema = DataTypes.createStructType(new StructField[] { DataTypes.createStructField("Message", DataTypes.StringType, true) });
            // Get Spark 2.0 session
            SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());
            Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
            msgDataFrame.show();
        }
    });
    jssc.start();
    try {
        jssc.awaitTermination();
    } catch (InterruptedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) StructType(org.apache.spark.sql.types.StructType) Dataset(org.apache.spark.sql.Dataset) JavaRDD(org.apache.spark.api.java.JavaRDD) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) VoidFunction(org.apache.spark.api.java.function.VoidFunction) Function(org.apache.spark.api.java.function.Function) StructField(org.apache.spark.sql.types.StructField) Row(org.apache.spark.sql.Row) SparkConf(org.apache.spark.SparkConf)

Example 33 with JavaRDD

use of org.apache.spark.api.java.JavaRDD in project net.jgp.labs.spark by jgperrin.

the class PiApp method start.

private void start() {
    SparkSession spark = SparkSession.builder().appName("JavaSparkPi").master("spark://10.0.100.81:7077").config("spark.executor.memory", "1g").config("spark.executor.cores", "1").config("spark.cores.max", "2").config("spark.driver.host", "10.0.100.182").config("spark.executor.extraClassPath", "/home/jgp/net.jgp.labs.spark/target/labs-spark-2.2.0-jar-with-dependencies.jar").getOrCreate();
    JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
    int n = 1 * NUM_SAMPLES;
    List<Integer> l = new ArrayList<>(n);
    for (int i = 0; i < n; i++) {
        l.add(i);
    }
    JavaRDD<Integer> dataSet = jsc.parallelize(l, NUM_SAMPLES);
    long t0 = System.currentTimeMillis();
    long count = dataSet.map(integer -> {
        double x = Math.random() * 2 - 1;
        double y = Math.random() * 2 - 1;
        return (x * x + y * y <= 1) ? 1 : 0;
    }).reduce((integer, integer2) -> integer + integer2);
    long t1 = System.currentTimeMillis();
    log.info("Pi is roughly ..... {}", 4.0 * count / n);
    log.info("Processing time ... {} ms", t1 - t0);
    spark.stop();
}
Also used : List(java.util.List) Logger(org.slf4j.Logger) SubStringCounterDataSource(net.jgp.labs.spark.x.datasource.SubStringCounterDataSource) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) LoggerFactory(org.slf4j.LoggerFactory) JavaRDD(org.apache.spark.api.java.JavaRDD) ArrayList(java.util.ArrayList) SparkSession(org.apache.spark.sql.SparkSession) SparkSession(org.apache.spark.sql.SparkSession) ArrayList(java.util.ArrayList) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Example 34 with JavaRDD

use of org.apache.spark.api.java.JavaRDD in project incubator-systemml by apache.

the class MLContextUtil method convertInputType.

/**
 * Convert input types to internal SystemML representations
 *
 * @param parameterName
 *            The name of the input parameter
 * @param parameterValue
 *            The value of the input parameter
 * @param metadata
 *            matrix/frame metadata
 * @return input in SystemML data representation
 */
public static Data convertInputType(String parameterName, Object parameterValue, Metadata metadata) {
    String name = parameterName;
    Object value = parameterValue;
    boolean hasMetadata = (metadata != null) ? true : false;
    boolean hasMatrixMetadata = hasMetadata && (metadata instanceof MatrixMetadata) ? true : false;
    boolean hasFrameMetadata = hasMetadata && (metadata instanceof FrameMetadata) ? true : false;
    if (name == null) {
        throw new MLContextException("Input parameter name is null");
    } else if (value == null) {
        throw new MLContextException("Input parameter value is null for: " + parameterName);
    } else if (value instanceof JavaRDD<?>) {
        @SuppressWarnings("unchecked") JavaRDD<String> javaRDD = (JavaRDD<String>) value;
        if (hasMatrixMetadata) {
            MatrixMetadata matrixMetadata = (MatrixMetadata) metadata;
            if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
                return MLContextConversionUtil.javaRDDStringIJVToMatrixObject(javaRDD, matrixMetadata);
            } else {
                return MLContextConversionUtil.javaRDDStringCSVToMatrixObject(javaRDD, matrixMetadata);
            }
        } else if (hasFrameMetadata) {
            FrameMetadata frameMetadata = (FrameMetadata) metadata;
            if (frameMetadata.getFrameFormat() == FrameFormat.IJV) {
                return MLContextConversionUtil.javaRDDStringIJVToFrameObject(javaRDD, frameMetadata);
            } else {
                return MLContextConversionUtil.javaRDDStringCSVToFrameObject(javaRDD, frameMetadata);
            }
        } else if (!hasMetadata) {
            String firstLine = javaRDD.first();
            boolean isAllNumbers = isCSVLineAllNumbers(firstLine);
            if (isAllNumbers) {
                return MLContextConversionUtil.javaRDDStringCSVToMatrixObject(javaRDD);
            } else {
                return MLContextConversionUtil.javaRDDStringCSVToFrameObject(javaRDD);
            }
        }
    } else if (value instanceof RDD<?>) {
        @SuppressWarnings("unchecked") RDD<String> rdd = (RDD<String>) value;
        if (hasMatrixMetadata) {
            MatrixMetadata matrixMetadata = (MatrixMetadata) metadata;
            if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
                return MLContextConversionUtil.rddStringIJVToMatrixObject(rdd, matrixMetadata);
            } else {
                return MLContextConversionUtil.rddStringCSVToMatrixObject(rdd, matrixMetadata);
            }
        } else if (hasFrameMetadata) {
            FrameMetadata frameMetadata = (FrameMetadata) metadata;
            if (frameMetadata.getFrameFormat() == FrameFormat.IJV) {
                return MLContextConversionUtil.rddStringIJVToFrameObject(rdd, frameMetadata);
            } else {
                return MLContextConversionUtil.rddStringCSVToFrameObject(rdd, frameMetadata);
            }
        } else if (!hasMetadata) {
            String firstLine = rdd.first();
            boolean isAllNumbers = isCSVLineAllNumbers(firstLine);
            if (isAllNumbers) {
                return MLContextConversionUtil.rddStringCSVToMatrixObject(rdd);
            } else {
                return MLContextConversionUtil.rddStringCSVToFrameObject(rdd);
            }
        }
    } else if (value instanceof MatrixBlock) {
        MatrixBlock matrixBlock = (MatrixBlock) value;
        return MLContextConversionUtil.matrixBlockToMatrixObject(name, matrixBlock, (MatrixMetadata) metadata);
    } else if (value instanceof FrameBlock) {
        FrameBlock frameBlock = (FrameBlock) value;
        return MLContextConversionUtil.frameBlockToFrameObject(name, frameBlock, (FrameMetadata) metadata);
    } else if (value instanceof Dataset<?>) {
        @SuppressWarnings("unchecked") Dataset<Row> dataFrame = (Dataset<Row>) value;
        dataFrame = MLUtils.convertVectorColumnsToML(dataFrame);
        if (hasMatrixMetadata) {
            return MLContextConversionUtil.dataFrameToMatrixObject(dataFrame, (MatrixMetadata) metadata);
        } else if (hasFrameMetadata) {
            return MLContextConversionUtil.dataFrameToFrameObject(dataFrame, (FrameMetadata) metadata);
        } else if (!hasMetadata) {
            boolean looksLikeMatrix = doesDataFrameLookLikeMatrix(dataFrame);
            if (looksLikeMatrix) {
                return MLContextConversionUtil.dataFrameToMatrixObject(dataFrame);
            } else {
                return MLContextConversionUtil.dataFrameToFrameObject(dataFrame);
            }
        }
    } else if (value instanceof Matrix) {
        Matrix matrix = (Matrix) value;
        if ((matrix.hasBinaryBlocks()) && (!matrix.hasMatrixObject())) {
            if (metadata == null) {
                metadata = matrix.getMatrixMetadata();
            }
            JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlocks = matrix.toBinaryBlocks();
            return MLContextConversionUtil.binaryBlocksToMatrixObject(binaryBlocks, (MatrixMetadata) metadata);
        } else {
            return matrix.toMatrixObject();
        }
    } else if (value instanceof Frame) {
        Frame frame = (Frame) value;
        if ((frame.hasBinaryBlocks()) && (!frame.hasFrameObject())) {
            if (metadata == null) {
                metadata = frame.getFrameMetadata();
            }
            JavaPairRDD<Long, FrameBlock> binaryBlocks = frame.toBinaryBlocks();
            return MLContextConversionUtil.binaryBlocksToFrameObject(binaryBlocks, (FrameMetadata) metadata);
        } else {
            return frame.toFrameObject();
        }
    } else if (value instanceof double[][]) {
        double[][] doubleMatrix = (double[][]) value;
        return MLContextConversionUtil.doubleMatrixToMatrixObject(name, doubleMatrix, (MatrixMetadata) metadata);
    } else if (value instanceof URL) {
        URL url = (URL) value;
        return MLContextConversionUtil.urlToMatrixObject(url, (MatrixMetadata) metadata);
    } else if (value instanceof Integer) {
        return new IntObject((Integer) value);
    } else if (value instanceof Double) {
        return new DoubleObject((Double) value);
    } else if (value instanceof String) {
        return new StringObject((String) value);
    } else if (value instanceof Boolean) {
        return new BooleanObject((Boolean) value);
    }
    return null;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) URL(java.net.URL) RDD(org.apache.spark.rdd.RDD) JavaRDD(org.apache.spark.api.java.JavaRDD) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) Dataset(org.apache.spark.sql.Dataset) JavaRDD(org.apache.spark.api.java.JavaRDD) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) Row(org.apache.spark.sql.Row) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject)

Example 35 with JavaRDD

use of org.apache.spark.api.java.JavaRDD in project incubator-systemml by apache.

the class MLContextUtil method displayInputs.

/**
 * Obtain a display of script inputs.
 *
 * @param name
 *            the title to display for the inputs
 * @param map
 *            the map of inputs
 * @param symbolTable
 *            the symbol table
 * @return the script inputs represented as a String
 */
public static String displayInputs(String name, Map<String, Object> map, LocalVariableMap symbolTable) {
    StringBuilder sb = new StringBuilder();
    sb.append(name);
    sb.append(":\n");
    Set<String> keys = map.keySet();
    if (keys.isEmpty()) {
        sb.append("None\n");
    } else {
        int count = 0;
        for (String key : keys) {
            Object object = map.get(key);
            @SuppressWarnings("rawtypes") Class clazz = object.getClass();
            String type = clazz.getSimpleName();
            if (object instanceof JavaRDD<?>) {
                type = "JavaRDD";
            } else if (object instanceof RDD<?>) {
                type = "RDD";
            }
            sb.append("  [");
            sb.append(++count);
            sb.append("]");
            sb.append(" (");
            sb.append(type);
            if (doesSymbolTableContainMatrixObject(symbolTable, key)) {
                sb.append(" as Matrix");
            } else if (doesSymbolTableContainFrameObject(symbolTable, key)) {
                sb.append(" as Frame");
            }
            sb.append(") ");
            sb.append(key);
            sb.append(": ");
            String str = null;
            if (object instanceof MatrixBlock) {
                MatrixBlock mb = (MatrixBlock) object;
                str = "MatrixBlock [sparse? = " + mb.isInSparseFormat() + ", nonzeros = " + mb.getNonZeros() + ", size: " + mb.getNumRows() + " X " + mb.getNumColumns() + "]";
            } else
                // TODO: Deal with OOM for other
                str = object.toString();
            // objects such as Frame, etc
            str = StringUtils.abbreviate(str, 100);
            sb.append(str);
            sb.append("\n");
        }
    }
    return sb.toString();
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) JavaRDD(org.apache.spark.api.java.JavaRDD)

Aggregations

JavaRDD (org.apache.spark.api.java.JavaRDD)63 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)33 List (java.util.List)24 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)24 Collectors (java.util.stream.Collectors)20 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)20 Tuple2 (scala.Tuple2)20 Argument (org.broadinstitute.barclay.argparser.Argument)17 Broadcast (org.apache.spark.broadcast.Broadcast)15 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)15 SAMFileHeader (htsjdk.samtools.SAMFileHeader)14 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 IOException (java.io.IOException)14 UserException (org.broadinstitute.hellbender.exceptions.UserException)14 CommandLineProgramProperties (org.broadinstitute.barclay.argparser.CommandLineProgramProperties)13 GATKSparkTool (org.broadinstitute.hellbender.engine.spark.GATKSparkTool)13 Serializable (java.io.Serializable)12 IntervalUtils (org.broadinstitute.hellbender.utils.IntervalUtils)12 java.util (java.util)11 ArrayList (java.util.ArrayList)11