use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.
the class FirstPrediction method start.
private void start() {
SparkSession spark = SparkSession.builder().appName("First Prediction").master("local").getOrCreate();
StructType schema = new StructType(new StructField[] { new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), new StructField("features", new VectorUDT(), false, Metadata.empty()) });
// TODO this example is not working yet
}
use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.
the class Loader method start.
private void start() {
SparkConf conf = new SparkConf().setAppName("Concurrency Lab 001").setMaster(Config.MASTER).set("hello", "world");
JavaSparkContext sc = new JavaSparkContext(conf);
SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
String filename = "data/tuple-data-file.csv";
Dataset<Row> df = spark.read().format("csv").option("inferSchema", "true").option("header", "false").load(filename);
df.show();
try {
df.createGlobalTempView("myView");
} catch (AnalysisException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
Thread.sleep(10000);
} catch (InterruptedException e) {
System.out.println("Hmmm... Something interrupted the thread: " + e.getMessage());
}
}
use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.
the class ListNCSchoolDistricts method main.
/**
* @param args
*/
public static void main(String[] args) {
String filename = "/tmp/" + System.currentTimeMillis() + ".json";
try {
FileUtils.copyURLToFile(new URL("https://opendurham.nc.gov/explore/dataset/north-carolina-school-performance-data/download/?format=json&timezone=America/New_York"), new File(filename));
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("File " + filename + " downloaded");
SparkSession spark = SparkSession.builder().appName("NC Schools").master("local").getOrCreate();
String fileToAnalyze = "/tmp/" + filename;
System.out.println("File to analyze: " + fileToAnalyze);
Dataset<Row> df;
df = spark.read().option("dateFormat", "yyyy-mm-dd").json(fileToAnalyze);
df = df.withColumn("district", df.col("fields.district"));
df = df.groupBy("district").count().orderBy(df.col("district"));
df.show(150, false);
}
Aggregations