use of org.apache.spark.SparkConf in project hbase by apache.
the class JavaHBaseBulkPutExample method main.
public static void main(String[] args) {
if (args.length < 2) {
System.out.println("JavaHBaseBulkPutExample " + "{tableName} {columnFamily}");
return;
}
String tableName = args[0];
String columnFamily = args[1];
SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkPutExample " + tableName);
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
try {
List<String> list = new ArrayList<>(5);
list.add("1," + columnFamily + ",a,1");
list.add("2," + columnFamily + ",a,2");
list.add("3," + columnFamily + ",a,3");
list.add("4," + columnFamily + ",a,4");
list.add("5," + columnFamily + ",a,5");
JavaRDD<String> rdd = jsc.parallelize(list);
Configuration conf = HBaseConfiguration.create();
JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
hbaseContext.bulkPut(rdd, TableName.valueOf(tableName), new PutFunction());
} finally {
jsc.stop();
}
}
use of org.apache.spark.SparkConf in project zeppelin by apache.
the class SparkInterpreter method createHttpServer.
private Object createHttpServer(File outputDir) {
SparkConf conf = new SparkConf();
try {
// try to create HttpServer
Constructor<?> constructor = getClass().getClassLoader().loadClass("org.apache.spark.HttpServer").getConstructor(new Class[] { SparkConf.class, File.class, SecurityManager.class, int.class, String.class });
Object securityManager = createSecurityManager(conf);
return constructor.newInstance(new Object[] { conf, outputDir, securityManager, 0, "HTTP Server" });
} catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | InstantiationException | InvocationTargetException e) {
// fallback to old constructor
Constructor<?> constructor = null;
try {
constructor = getClass().getClassLoader().loadClass("org.apache.spark.HttpServer").getConstructor(new Class[] { File.class, SecurityManager.class, int.class, String.class });
return constructor.newInstance(new Object[] { outputDir, createSecurityManager(conf), 0, "HTTP Server" });
} catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | InstantiationException | InvocationTargetException e1) {
logger.error(e1.getMessage(), e1);
return null;
}
}
}
use of org.apache.spark.SparkConf in project learning-spark by databricks.
the class LogAnalyzerAppMain method main.
public static void main(String[] args) throws IOException {
Flags.setFromCommandLineArgs(THE_OPTIONS, args);
// Startup the Spark Conf.
SparkConf conf = new SparkConf().setAppName("A Databricks Reference Application: Logs Analysis with Spark");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Flags.getInstance().getSlideInterval());
// Checkpointing must be enabled to use the updateStateByKey function & windowed operations.
jssc.checkpoint(Flags.getInstance().getCheckpointDirectory());
// This methods monitors a directory for new files to read in for streaming.
JavaDStream<String> logData = jssc.textFileStream(Flags.getInstance().getLogsDirectory());
JavaDStream<ApacheAccessLog> accessLogsDStream = logData.map(new Functions.ParseFromLogLine()).cache();
final LogAnalyzerTotal logAnalyzerTotal = new LogAnalyzerTotal();
final LogAnalyzerWindowed logAnalyzerWindowed = new LogAnalyzerWindowed();
// Process the DStream which gathers stats for all of time.
logAnalyzerTotal.processAccessLogs(Flags.getInstance().getOutputDirectory(), accessLogsDStream);
// Calculate statistics for the last time interval.
logAnalyzerWindowed.processAccessLogs(Flags.getInstance().getOutputDirectory(), accessLogsDStream);
// Render the output each time there is a new RDD in the accessLogsDStream.
final Renderer renderer = new Renderer();
accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {
public Void call(JavaRDD<ApacheAccessLog> rdd) {
// Call this to output the stats.
try {
renderer.render(logAnalyzerTotal.getLogStatistics(), logAnalyzerWindowed.getLogStatistics());
} catch (Exception e) {
}
return null;
}
});
// Start the streaming server.
// Start the computation
jssc.start();
// Wait for the computation to terminate
jssc.awaitTermination();
}
use of org.apache.spark.SparkConf in project learning-spark by databricks.
the class KafkaInput method main.
public static void main(String[] args) throws Exception {
String zkQuorum = args[0];
String group = args[1];
SparkConf conf = new SparkConf().setAppName("KafkaInput");
// Create a StreamingContext with a 1 second batch size
JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(1000));
Map<String, Integer> topics = new HashMap<String, Integer>();
topics.put("pandas", 1);
JavaPairDStream<String, String> input = KafkaUtils.createStream(jssc, zkQuorum, group, topics);
input.print();
// start our streaming context and wait for it to "finish"
jssc.start();
// Wait for 10 seconds then exit. To run forever call without a timeout
jssc.awaitTermination(10000);
// Stop the streaming context
jssc.stop();
}
use of org.apache.spark.SparkConf in project learning-spark by databricks.
the class MLlib method main.
public static void main(String[] args) {
SparkConf sparkConf = new SparkConf().setAppName("JavaBookExample");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
// Load 2 types of emails from text files: spam and ham (non-spam).
// Each line has text from one email.
JavaRDD<String> spam = sc.textFile("files/spam.txt");
JavaRDD<String> ham = sc.textFile("files/ham.txt");
// Create a HashingTF instance to map email text to vectors of 100 features.
final HashingTF tf = new HashingTF(100);
// Each email is split into words, and each word is mapped to one feature.
// Create LabeledPoint datasets for positive (spam) and negative (ham) examples.
JavaRDD<LabeledPoint> positiveExamples = spam.map(new Function<String, LabeledPoint>() {
@Override
public LabeledPoint call(String email) {
return new LabeledPoint(1, tf.transform(Arrays.asList(email.split(" "))));
}
});
JavaRDD<LabeledPoint> negativeExamples = ham.map(new Function<String, LabeledPoint>() {
@Override
public LabeledPoint call(String email) {
return new LabeledPoint(0, tf.transform(Arrays.asList(email.split(" "))));
}
});
JavaRDD<LabeledPoint> trainingData = positiveExamples.union(negativeExamples);
// Cache data since Logistic Regression is an iterative algorithm.
trainingData.cache();
// Create a Logistic Regression learner which uses the LBFGS optimizer.
LogisticRegressionWithSGD lrLearner = new LogisticRegressionWithSGD();
// Run the actual learning algorithm on the training data.
LogisticRegressionModel model = lrLearner.run(trainingData.rdd());
// Test on a positive example (spam) and a negative one (ham).
// First apply the same HashingTF feature transformation used on the training data.
Vector posTestExample = tf.transform(Arrays.asList("O M G GET cheap stuff by sending money to ...".split(" ")));
Vector negTestExample = tf.transform(Arrays.asList("Hi Dad, I started studying Spark the other ...".split(" ")));
// Now use the learned model to predict spam/ham for new emails.
System.out.println("Prediction for positive test example: " + model.predict(posTestExample));
System.out.println("Prediction for negative test example: " + model.predict(negTestExample));
sc.stop();
}
Aggregations