use of org.apache.spark.SparkConf in project Gaffer by gchq.
the class ImportRDDOfElementsHandlerTest method checkImportRDDOfElements.
@Test
public void checkImportRDDOfElements() throws OperationException, IOException {
final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
final ArrayBuffer<Element> elements = new ArrayBuffer<>();
for (int i = 0; i < 10; i++) {
final Entity entity = new Entity(TestGroups.ENTITY);
entity.setVertex("" + i);
final Edge edge1 = new Edge(TestGroups.EDGE);
edge1.setSource("" + i);
edge1.setDestination("B");
edge1.setDirected(false);
edge1.putProperty(TestPropertyNames.COUNT, 2);
final Edge edge2 = new Edge(TestGroups.EDGE);
edge2.setSource("" + i);
edge2.setDestination("C");
edge2.setDirected(false);
edge2.putProperty(TestPropertyNames.COUNT, 4);
elements.$plus$eq(edge1);
elements.$plus$eq(edge2);
elements.$plus$eq(entity);
}
final User user = new User();
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("tests").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final SparkContext sparkContext = new SparkContext(sparkConf);
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
configuration.write(new DataOutputStream(baos));
final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
final String outputPath = this.getClass().getResource("/").getPath().toString() + "load";
final String failurePath = this.getClass().getResource("/").getPath().toString() + "failure";
final File file = new File(outputPath);
if (file.exists()) {
FileUtils.forceDelete(file);
}
final RDD<Element> elementRDD = sparkContext.parallelize(elements, 8, ELEMENT_CLASS_TAG);
final ImportRDDOfElements addRdd = new ImportRDDOfElements.Builder().sparkContext(sparkContext).input(elementRDD).option("outputPath", outputPath).option("failurePath", failurePath).build();
graph1.execute(addRdd, user);
FileUtils.forceDelete(file);
// Check all elements were added
final GetRDDOfAllElements rddQuery = new GetRDDOfAllElements.Builder().sparkContext(sparkContext).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
final RDD<Element> rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>();
final Element[] returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
assertEquals(elements.size(), results.size());
sparkContext.stop();
}
use of org.apache.spark.SparkConf in project Gaffer by gchq.
the class GetJavaRDDOfElementsExample method runExamples.
@Override
public void runExamples() {
// Need to actively turn logging on and off as needed as Spark produces some logs
// even when the log level is set to off.
ROOT_LOGGER.setLevel(Level.OFF);
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("GetJavaRDDOfElementsExample").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final JavaSparkContext sc = new JavaSparkContext(sparkConf);
sc.setLogLevel("OFF");
final Graph graph = getGraph();
try {
getJavaRddOfElements(sc, graph);
getJavaRddOfElementsReturningEdgesOnly(sc, graph);
} catch (final OperationException e) {
throw new RuntimeException(e);
}
sc.stop();
ROOT_LOGGER.setLevel(Level.INFO);
}
use of org.apache.spark.SparkConf in project azure-tools-for-java by Microsoft.
the class JavaSparkPi method main.
public static void main(String[] args) throws Exception {
//use this line if you want to run your application in the cluster
//SparkConf sparkConf = new SparkConf().setAppName("JavaSparkPi");
SparkConf sparkConf = new SparkConf().setAppName("JavaSparkPi").setMaster("local[2]");
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
int slices = (args.length == 1) ? Integer.parseInt(args[0]) : 2;
int n = 100000 * slices;
List<Integer> l = new ArrayList<Integer>(n);
for (int i = 0; i < n; i++) {
l.add(i);
}
JavaRDD<Integer> dataSet = jsc.parallelize(l, slices);
int count = dataSet.map(new Function<Integer, Integer>() {
@Override
public Integer call(Integer integer) {
double x = Math.random() * 2 - 1;
double y = Math.random() * 2 - 1;
return (x * x + y * y < 1) ? 1 : 0;
}
}).reduce(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer integer, Integer integer2) {
return integer + integer2;
}
});
System.out.println("Pi is roughly " + 4.0 * count / n);
jsc.stop();
}
use of org.apache.spark.SparkConf in project geode by apache.
the class OQLJavaDemo method main.
public static void main(String[] argv) {
if (argv.length != 1) {
System.err.printf("Usage: OQLJavaDemo <locators>\n");
return;
}
SparkConf conf = new SparkConf().setAppName("OQLJavaDemo");
// "192.168.1.47[10335]"
conf.set(GeodeLocatorPropKey, argv[0]);
JavaSparkContext sc = new JavaSparkContext(conf);
SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);
DataFrame df = javaFunctions(sqlContext).geodeOQL("select * from /str_str_region");
System.out.println("======= DataFrame =======\n");
df.show();
sc.stop();
}
use of org.apache.spark.SparkConf in project geode by apache.
the class RDDSaveJavaDemo method main.
public static void main(String[] argv) {
if (argv.length != 1) {
System.err.printf("Usage: RDDSaveJavaDemo <locators>\n");
return;
}
SparkConf conf = new SparkConf().setAppName("RDDSaveJavaDemo");
conf.set(GeodeLocatorPropKey, argv[0]);
JavaSparkContext sc = new JavaSparkContext(conf);
List<String> data = new ArrayList<String>();
data.add("abcdefg");
data.add("abcdefgh");
data.add("abcdefghi");
JavaRDD<String> rdd = sc.parallelize(data);
GeodeConnectionConf connConf = GeodeConnectionConf.apply(conf);
PairFunction<String, String, Integer> func = new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) throws Exception {
return new Tuple2<String, Integer>(s, s.length());
}
};
javaFunctions(rdd).saveToGeode("str_int_region", func, connConf);
sc.stop();
}
Aggregations