use of org.apache.gora.examples.generated.WebPage in project gora by apache.
the class SparkWordCount method wordCount.
public int wordCount(DataStore<String, WebPage> inStore, DataStore<String, TokenDatum> outStore) throws IOException {
//Spark engine initialization
GoraSparkEngine<String, WebPage> goraSparkEngine = new GoraSparkEngine<>(String.class, WebPage.class);
SparkConf sparkConf = new SparkConf().setAppName("Gora Spark Word Count Application").setMaster("local");
Class[] c = new Class[1];
c[0] = inStore.getPersistentClass();
sparkConf.registerKryoClasses(c);
//
JavaSparkContext sc = new JavaSparkContext(sparkConf);
JavaPairRDD<String, WebPage> goraRDD = goraSparkEngine.initialize(sc, inStore);
long count = goraRDD.count();
log.info("Total Web page count: {}", count);
JavaRDD<Tuple2<String, Long>> mappedGoraRdd = goraRDD.values().map(mapFunc);
JavaPairRDD<String, Long> reducedGoraRdd = JavaPairRDD.fromJavaRDD(mappedGoraRdd).reduceByKey(redFunc);
//Print output for debug purpose
log.info("SparkWordCount debug purpose TokenDatum print starts:");
Map<String, Long> tokenDatumMap = reducedGoraRdd.collectAsMap();
for (String key : tokenDatumMap.keySet()) {
log.info(key);
log.info(tokenDatumMap.get(key).toString());
}
log.info("SparkWordCount debug purpose TokenDatum print ends:");
//
//write output to datastore
Configuration sparkHadoopConf = goraSparkEngine.generateOutputConf(outStore);
reducedGoraRdd.saveAsNewAPIHadoopDataset(sparkHadoopConf);
return 1;
}
use of org.apache.gora.examples.generated.WebPage in project gora by apache.
the class SparkWordCount method run.
public int run(String[] args) throws Exception {
DataStore<String, WebPage> inStore;
DataStore<String, TokenDatum> outStore;
Configuration hadoopConf = new Configuration();
if (args.length > 0) {
String dataStoreClass = args[0];
inStore = DataStoreFactory.getDataStore(dataStoreClass, String.class, WebPage.class, hadoopConf);
if (args.length > 1) {
dataStoreClass = args[1];
}
outStore = DataStoreFactory.getDataStore(dataStoreClass, String.class, TokenDatum.class, hadoopConf);
} else {
inStore = DataStoreFactory.getDataStore(String.class, WebPage.class, hadoopConf);
outStore = DataStoreFactory.getDataStore(String.class, TokenDatum.class, hadoopConf);
}
return wordCount(inStore, outStore);
}
use of org.apache.gora.examples.generated.WebPage in project gora by apache.
the class MapReduceSerialization method mapReduceSerialization.
public int mapReduceSerialization(DataStore<String, WebPage> inStore, DataStore<String, WebPage> outStore) throws IOException, InterruptedException, ClassNotFoundException {
Query<String, WebPage> query = inStore.newQuery();
query.setFields("url");
Job job = createJob(inStore, query, outStore);
return job.waitForCompletion(true) ? 0 : 1;
}
use of org.apache.gora.examples.generated.WebPage in project gora by apache.
the class WordCount method run.
@Override
public int run(String[] args) throws Exception {
DataStore<String, WebPage> inStore;
DataStore<String, TokenDatum> outStore;
Configuration conf = new Configuration();
if (args.length > 0) {
String dataStoreClass = args[0];
inStore = DataStoreFactory.getDataStore(dataStoreClass, String.class, WebPage.class, conf);
if (args.length > 1) {
dataStoreClass = args[1];
}
outStore = DataStoreFactory.getDataStore(dataStoreClass, String.class, TokenDatum.class, conf);
} else {
inStore = DataStoreFactory.getDataStore(String.class, WebPage.class, conf);
outStore = DataStoreFactory.getDataStore(String.class, TokenDatum.class, conf);
}
return wordCount(inStore, outStore);
}
use of org.apache.gora.examples.generated.WebPage in project gora by apache.
the class WebPageDataCreator method run.
public int run(String[] args) throws Exception {
String dataStoreClass = "org.apache.gora.hbase.store.HBaseStore";
if (args.length > 0) {
dataStoreClass = args[0];
}
DataStore<String, WebPage> store = DataStoreFactory.getDataStore(dataStoreClass, String.class, WebPage.class, new Configuration());
createWebPageData(store);
return 0;
}
Aggregations