use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.
the class PiApp method start.
private void start() {
SparkSession spark = SparkSession.builder().appName("JavaSparkPi").master("spark://10.0.100.81:7077").config("spark.executor.memory", "1g").config("spark.executor.cores", "1").config("spark.cores.max", "2").config("spark.driver.host", "10.0.100.182").config("spark.executor.extraClassPath", "/home/jgp/net.jgp.labs.spark/target/labs-spark-2.2.0-jar-with-dependencies.jar").getOrCreate();
JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
int n = 1 * NUM_SAMPLES;
List<Integer> l = new ArrayList<>(n);
for (int i = 0; i < n; i++) {
l.add(i);
}
JavaRDD<Integer> dataSet = jsc.parallelize(l, NUM_SAMPLES);
long t0 = System.currentTimeMillis();
long count = dataSet.map(integer -> {
double x = Math.random() * 2 - 1;
double y = Math.random() * 2 - 1;
return (x * x + y * y <= 1) ? 1 : 0;
}).reduce((integer, integer2) -> integer + integer2);
long t1 = System.currentTimeMillis();
log.info("Pi is roughly ..... {}", 4.0 * count / n);
log.info("Processing time ... {} ms", t1 - t0);
spark.stop();
}
use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.
the class Reader method start.
private void start() {
SparkConf conf = new SparkConf().setAppName("Concurrency Lab 001").setMaster(Config.MASTER);
JavaSparkContext sc = new JavaSparkContext(conf);
SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
conf = spark.sparkContext().conf();
System.out.println(conf.get("hello"));
Dataset<Row> df = spark.sql("SELECT * from myView");
df.show();
}
use of org.apache.spark.sql.SparkSession in project incubator-systemml by apache.
the class FrameConverterTest method runConverter.
@SuppressWarnings("unchecked")
private static void runConverter(ConvType type, MatrixCharacteristics mc, MatrixCharacteristics mcMatrix, List<ValueType> schema, String fnameIn, String fnameOut) throws IOException {
SparkExecutionContext sec = (SparkExecutionContext) ExecutionContextFactory.createContext();
JavaSparkContext sc = sec.getSparkContext();
ValueType[] lschema = schema.toArray(new ValueType[0]);
MapReduceTool.deleteFileIfExistOnHDFS(fnameOut);
switch(type) {
case CSV2BIN:
{
InputInfo iinfo = InputInfo.CSVInputInfo;
OutputInfo oinfo = OutputInfo.BinaryBlockOutputInfo;
JavaPairRDD<LongWritable, Text> rddIn = (JavaPairRDD<LongWritable, Text>) sc.hadoopFile(fnameIn, iinfo.inputFormatClass, iinfo.inputKeyClass, iinfo.inputValueClass);
JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils.csvToBinaryBlock(sc, rddIn, mc, null, false, separator, false, 0).mapToPair(new LongFrameToLongWritableFrameFunction());
rddOut.saveAsHadoopFile(fnameOut, LongWritable.class, FrameBlock.class, oinfo.outputFormatClass);
break;
}
case BIN2CSV:
{
InputInfo iinfo = InputInfo.BinaryBlockInputInfo;
JavaPairRDD<LongWritable, FrameBlock> rddIn = sc.hadoopFile(fnameIn, iinfo.inputFormatClass, LongWritable.class, FrameBlock.class);
JavaPairRDD<Long, FrameBlock> rddIn2 = rddIn.mapToPair(new CopyFrameBlockPairFunction(false));
CSVFileFormatProperties fprop = new CSVFileFormatProperties();
JavaRDD<String> rddOut = FrameRDDConverterUtils.binaryBlockToCsv(rddIn2, mc, fprop, true);
rddOut.saveAsTextFile(fnameOut);
break;
}
case TXTCELL2BIN:
{
InputInfo iinfo = InputInfo.TextCellInputInfo;
OutputInfo oinfo = OutputInfo.BinaryBlockOutputInfo;
JavaPairRDD<LongWritable, Text> rddIn = (JavaPairRDD<LongWritable, Text>) sc.hadoopFile(fnameIn, iinfo.inputFormatClass, iinfo.inputKeyClass, iinfo.inputValueClass);
JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils.textCellToBinaryBlock(sc, rddIn, mc, lschema).mapToPair(new LongFrameToLongWritableFrameFunction());
rddOut.saveAsHadoopFile(fnameOut, LongWritable.class, FrameBlock.class, oinfo.outputFormatClass);
break;
}
case BIN2TXTCELL:
{
InputInfo iinfo = InputInfo.BinaryBlockInputInfo;
JavaPairRDD<LongWritable, FrameBlock> rddIn = sc.hadoopFile(fnameIn, iinfo.inputFormatClass, LongWritable.class, FrameBlock.class);
JavaPairRDD<Long, FrameBlock> rddIn2 = rddIn.mapToPair(new CopyFrameBlockPairFunction(false));
JavaRDD<String> rddOut = FrameRDDConverterUtils.binaryBlockToTextCell(rddIn2, mc);
rddOut.saveAsTextFile(fnameOut);
break;
}
case MAT2BIN:
{
InputInfo iinfo = InputInfo.BinaryBlockInputInfo;
OutputInfo oinfo = OutputInfo.BinaryBlockOutputInfo;
JavaPairRDD<MatrixIndexes, MatrixBlock> rddIn = (JavaPairRDD<MatrixIndexes, MatrixBlock>) sc.hadoopFile(fnameIn, iinfo.inputFormatClass, iinfo.inputKeyClass, iinfo.inputValueClass);
JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils.matrixBlockToBinaryBlock(sc, rddIn, mcMatrix);
rddOut.saveAsHadoopFile(fnameOut, LongWritable.class, FrameBlock.class, oinfo.outputFormatClass);
break;
}
case BIN2MAT:
{
InputInfo iinfo = InputInfo.BinaryBlockInputInfo;
OutputInfo oinfo = OutputInfo.BinaryBlockOutputInfo;
JavaPairRDD<Long, FrameBlock> rddIn = sc.hadoopFile(fnameIn, iinfo.inputFormatClass, LongWritable.class, FrameBlock.class).mapToPair(new LongWritableFrameToLongFrameFunction());
JavaPairRDD<MatrixIndexes, MatrixBlock> rddOut = FrameRDDConverterUtils.binaryBlockToMatrixBlock(rddIn, mc, mcMatrix);
rddOut.saveAsHadoopFile(fnameOut, MatrixIndexes.class, MatrixBlock.class, oinfo.outputFormatClass);
break;
}
case DFRM2BIN:
{
OutputInfo oinfo = OutputInfo.BinaryBlockOutputInfo;
// Create DataFrame
SparkSession sparkSession = SparkSession.builder().sparkContext(sc.sc()).getOrCreate();
StructType dfSchema = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(lschema, false);
JavaRDD<Row> rowRDD = FrameRDDConverterUtils.csvToRowRDD(sc, fnameIn, separator, lschema);
Dataset<Row> df = sparkSession.createDataFrame(rowRDD, dfSchema);
JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc, false).mapToPair(new LongFrameToLongWritableFrameFunction());
rddOut.saveAsHadoopFile(fnameOut, LongWritable.class, FrameBlock.class, oinfo.outputFormatClass);
break;
}
case BIN2DFRM:
{
InputInfo iinfo = InputInfo.BinaryBlockInputInfo;
OutputInfo oinfo = OutputInfo.BinaryBlockOutputInfo;
JavaPairRDD<Long, FrameBlock> rddIn = sc.hadoopFile(fnameIn, iinfo.inputFormatClass, LongWritable.class, FrameBlock.class).mapToPair(new LongWritableFrameToLongFrameFunction());
SparkSession sparkSession = SparkSession.builder().sparkContext(sc.sc()).getOrCreate();
Dataset<Row> df = FrameRDDConverterUtils.binaryBlockToDataFrame(sparkSession, rddIn, mc, lschema);
// Convert back DataFrame to binary block for comparison using original binary to converted DF and back to binary
JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc, true).mapToPair(new LongFrameToLongWritableFrameFunction());
rddOut.saveAsHadoopFile(fnameOut, LongWritable.class, FrameBlock.class, oinfo.outputFormatClass);
break;
}
default:
throw new RuntimeException("Unsuported converter type: " + type.toString());
}
sec.close();
}
use of org.apache.spark.sql.SparkSession in project incubator-systemml by apache.
the class MLContextScratchCleanupTest method runMLContextTestMultipleScript.
private static void runMLContextTestMultipleScript(RUNTIME_PLATFORM platform, boolean wRead) {
RUNTIME_PLATFORM oldplatform = DMLScript.rtplatform;
DMLScript.rtplatform = platform;
// create mlcontext
SparkSession spark = createSystemMLSparkSession("MLContextScratchCleanupTest", "local");
MLContext ml = new MLContext(spark);
ml.setExplain(true);
String dml1 = baseDirectory + File.separator + "ScratchCleanup1.dml";
String dml2 = baseDirectory + File.separator + (wRead ? "ScratchCleanup2b.dml" : "ScratchCleanup2.dml");
try {
Script script1 = dmlFromFile(dml1).in("$rows", rows).in("$cols", cols).out("X");
Matrix X = ml.execute(script1).getMatrix("X");
// clear in-memory/cached data to emulate on-disk storage
X.toMatrixObject().clearData();
Script script2 = dmlFromFile(dml2).in("X", X).out("z");
String z = ml.execute(script2).getString("z");
System.out.println(z);
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
DMLScript.rtplatform = oldplatform;
// stop underlying spark context to allow single jvm tests (otherwise the
// next test that tries to create a SparkContext would fail)
spark.stop();
// clear status mlcontext and spark exec context
ml.close();
}
}
use of org.apache.spark.sql.SparkSession in project net.jgp.labs.spark by jgperrin.
the class S3CsvToDataset2 method start.
private void start() {
SparkSession spark = SparkSession.builder().appName("CSV on S3 to Dataset<Row>").master("spark://10.0.100.81:7077").config("spark.executor.memory", "1g").config("spark.executor.cores", "1").config("spark.cores.max", "2").config("spark.driver.host", "10.0.100.182").config("spark.executor.extraClassPath", "/home/jgp/net.jgp.labs.spark/target/labs-spark-2.2.0-jar-with-dependencies.jar").getOrCreate();
spark.sparkContext().hadoopConfiguration().set("fs.s3a.access.key", "xxx");
spark.sparkContext().hadoopConfiguration().set("fs.s3a.secret.key", "xxx");
// spark.sparkContext().hadoopConfiguration().set("fs.s3n.endpoint",
// "us-east-2");
String bucket = "bucket_name";
String key = "key";
String filename = "s3a://" + bucket + "/" + key;
Dataset<Row> df = spark.read().format("csv").option("inferSchema", "true").option("header", "false").option("sep", "|").load(filename);
df.show();
df.printSchema();
}
Aggregations