Search in sources :

Example 11 with SparkSession

use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.

the class MySQLToDatasetApp method start.

private void start() {
    SparkSession spark = SparkSession.builder().appName("Dataset from MySQL JDBC Connection").master("local").getOrCreate();
    java.util.Properties props = new Properties();
    props.put("user", "root");
    props.put("password", "password");
    props.put("useSSL", "false");
    Dataset<Row> df = spark.read().jdbc("jdbc:mysql://localhost:3306/sakila?serverTimezone=EST", "actor", props);
    df = df.orderBy(df.col("last_name"));
    df.show();
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) Properties(java.util.Properties) Row(org.apache.spark.sql.Row) Properties(java.util.Properties)

Example 12 with SparkSession

use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.

the class ArrayToDataframeApp method start.

private void start() {
    SparkSession spark = SparkSession.builder().appName("Array to Dataframe").master("local").getOrCreate();
    String[] l = new String[] { "a", "b", "c", "d" };
    List<String> data = Arrays.asList(l);
    Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
    Dataset<Row> df = ds.toDF();
    df.show();
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) Row(org.apache.spark.sql.Row)

Example 13 with SparkSession

use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.

the class StreamingIngestionFileSystemTextFileToDataframeApp method start.

private void start() {
    // Create a local StreamingContext with two working thread and batch interval of
    // 1 second
    SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
    JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
    JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
    msgDataStream.print();
    // Create JavaRDD<Row>
    msgDataStream.foreachRDD(new VoidFunction<JavaRDD<String>>() {

        private static final long serialVersionUID = -590010339928376829L;

        @Override
        public void call(JavaRDD<String> rdd) {
            JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {

                private static final long serialVersionUID = 5167089361335095997L;

                @Override
                public Row call(String msg) {
                    Row row = RowFactory.create(msg);
                    return row;
                }
            });
            // Create Schema
            StructType schema = DataTypes.createStructType(new StructField[] { DataTypes.createStructField("Message", DataTypes.StringType, true) });
            // Get Spark 2.0 session
            SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());
            Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
            msgDataFrame.show();
        }
    });
    jssc.start();
    try {
        jssc.awaitTermination();
    } catch (InterruptedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) StructType(org.apache.spark.sql.types.StructType) Dataset(org.apache.spark.sql.Dataset) JavaRDD(org.apache.spark.api.java.JavaRDD) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) VoidFunction(org.apache.spark.api.java.function.VoidFunction) Function(org.apache.spark.api.java.function.Function) StructField(org.apache.spark.sql.types.StructField) Row(org.apache.spark.sql.Row) SparkConf(org.apache.spark.SparkConf)

Example 14 with SparkSession

use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.

the class ReadLinesFromMultipleFileStreams method start.

private void start() {
    log.debug("-> start()");
    SparkSession spark = SparkSession.builder().appName("Read lines over a file stream").master("local").getOrCreate();
    // @formatter:off
    Dataset<Row> df = spark.readStream().format("text").load(StreamingUtils.getInputDirectory());
    // @formatter:on
    StreamingQuery query = df.writeStream().outputMode(OutputMode.Update()).format("console").start();
    try {
        query.awaitTermination();
    } catch (StreamingQueryException e) {
        log.error("Exception while waiting for query to end {}.", e.getMessage(), e);
    }
    // In this case everything is a string
    df.show();
    df.printSchema();
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) StreamingQuery(org.apache.spark.sql.streaming.StreamingQuery) Row(org.apache.spark.sql.Row) StreamingQueryException(org.apache.spark.sql.streaming.StreamingQueryException)

Example 15 with SparkSession

use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.

the class ConnectLocally method main.

public static void main(String[] args) {
    SparkSession spark = SparkSession.builder().appName("Hello Spark").master("local").getOrCreate();
    System.out.println("Hello, Spark v." + spark.version());
}
Also used : SparkSession(org.apache.spark.sql.SparkSession)

Aggregations

SparkSession (org.apache.spark.sql.SparkSession)53 Row (org.apache.spark.sql.Row)43 StructType (org.apache.spark.sql.types.StructType)11 ArrayList (java.util.ArrayList)6 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)6 StructField (org.apache.spark.sql.types.StructField)6 SparkConf (org.apache.spark.SparkConf)4 JavaRDD (org.apache.spark.api.java.JavaRDD)3 Script (org.apache.sysml.api.mlcontext.Script)3 Test (org.junit.Test)3 Dataset (org.apache.spark.sql.Dataset)2 StreamingQuery (org.apache.spark.sql.streaming.StreamingQuery)2 StreamingQueryException (org.apache.spark.sql.streaming.StreamingQueryException)2 DMLScript (org.apache.sysml.api.DMLScript)2 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)2 MLContext (org.apache.sysml.api.mlcontext.MLContext)2 Matrix (org.apache.sysml.api.mlcontext.Matrix)2 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)2 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)2 File (java.io.File)1