use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.
the class MySQLToDatasetApp method start.
private void start() {
SparkSession spark = SparkSession.builder().appName("Dataset from MySQL JDBC Connection").master("local").getOrCreate();
java.util.Properties props = new Properties();
props.put("user", "root");
props.put("password", "password");
props.put("useSSL", "false");
Dataset<Row> df = spark.read().jdbc("jdbc:mysql://localhost:3306/sakila?serverTimezone=EST", "actor", props);
df = df.orderBy(df.col("last_name"));
df.show();
}
use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.
the class ArrayToDataframeApp method start.
private void start() {
SparkSession spark = SparkSession.builder().appName("Array to Dataframe").master("local").getOrCreate();
String[] l = new String[] { "a", "b", "c", "d" };
List<String> data = Arrays.asList(l);
Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
Dataset<Row> df = ds.toDF();
df.show();
}
use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.
the class StreamingIngestionFileSystemTextFileToDataframeApp method start.
private void start() {
// Create a local StreamingContext with two working thread and batch interval of
// 1 second
SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
msgDataStream.print();
// Create JavaRDD<Row>
msgDataStream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
private static final long serialVersionUID = -590010339928376829L;
@Override
public void call(JavaRDD<String> rdd) {
JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {
private static final long serialVersionUID = 5167089361335095997L;
@Override
public Row call(String msg) {
Row row = RowFactory.create(msg);
return row;
}
});
// Create Schema
StructType schema = DataTypes.createStructType(new StructField[] { DataTypes.createStructField("Message", DataTypes.StringType, true) });
// Get Spark 2.0 session
SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());
Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
msgDataFrame.show();
}
});
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.
the class ReadLinesFromMultipleFileStreams method start.
private void start() {
log.debug("-> start()");
SparkSession spark = SparkSession.builder().appName("Read lines over a file stream").master("local").getOrCreate();
// @formatter:off
Dataset<Row> df = spark.readStream().format("text").load(StreamingUtils.getInputDirectory());
// @formatter:on
StreamingQuery query = df.writeStream().outputMode(OutputMode.Update()).format("console").start();
try {
query.awaitTermination();
} catch (StreamingQueryException e) {
log.error("Exception while waiting for query to end {}.", e.getMessage(), e);
}
// In this case everything is a string
df.show();
df.printSchema();
}
use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.
the class ConnectLocally method main.
public static void main(String[] args) {
SparkSession spark = SparkSession.builder().appName("Hello Spark").master("local").getOrCreate();
System.out.println("Hello, Spark v." + spark.version());
}
Aggregations