use of co.cask.cdap.api.data.stream.Stream in project cdap by caskdata.
the class WordCountMinusFlowApp method configure.
@Override
public void configure() {
setName("WordCountApp");
setDescription("Application for counting words");
addStream(new Stream("text"));
createDataset("mydataset", KeyValueTable.class);
addService(new WordFrequencyService());
addMapReduce(new VoidMapReduceJob());
}
use of co.cask.cdap.api.data.stream.Stream in project cdap by caskdata.
the class WebCrawlApp method configure.
@Override
public void configure() {
setName("WebCrawlerApp");
setDescription("Web Crawler Application");
addStream(new Stream("urls"));
createDataset("crawled-pages", KeyValueTable.class);
addFlow(new CrawlFlow());
}
use of co.cask.cdap.api.data.stream.Stream in project cdap by caskdata.
the class SparkKMeansApp method configure.
@Override
public void configure() {
setName("SparkKMeans");
setDescription("Spark KMeans app");
// Ingest data into the Application via a Stream
addStream(new Stream("pointsStream"));
// Process points data in real-time using a Flow
addFlow(new PointsFlow());
// Run a Spark program on the acquired data
addSpark(new SparkKMeansSpecification());
// Retrieve the processed data using a Service
addService(new CentersService());
// Store input and processed data in ObjectStore Datasets
try {
ObjectStores.createObjectStore(getConfigurer(), "points", Point.class, DatasetProperties.builder().setDescription("Store points data").build());
ObjectStores.createObjectStore(getConfigurer(), "centers", String.class, DatasetProperties.builder().setDescription("Store centers data").build());
} catch (UnsupportedTypeException e) {
// because String is an actual class.
throw new RuntimeException(e);
}
}
use of co.cask.cdap.api.data.stream.Stream in project cdap by caskdata.
the class SparkPageRankApp method configure.
@Override
public void configure() {
setName("SparkPageRank");
setDescription("Spark page rank application.");
// Ingest data into the Application via a Stream
addStream(new Stream(BACKLINK_URL_STREAM));
// Run a Spark program on the acquired data
addSpark(new PageRankSpark());
// Runs MapReduce program on data emitted by Spark program
addMapReduce(new RanksCounter());
// Runs Spark followed by a MapReduce in a Workflow
addWorkflow(new PageRankWorkflow());
// Service to retrieve process data
addService(SERVICE_HANDLERS, new SparkPageRankServiceHandler());
// Store input and processed data in ObjectStore Datasets
try {
ObjectStores.createObjectStore(getConfigurer(), "ranks", Integer.class, DatasetProperties.builder().setDescription("Ranks Dataset").build());
ObjectStores.createObjectStore(getConfigurer(), "rankscount", Integer.class, DatasetProperties.builder().setDescription("Ranks Count Dataset").build());
} catch (UnsupportedTypeException e) {
// because String and Double are actual classes.
throw new RuntimeException(e);
}
}
use of co.cask.cdap.api.data.stream.Stream in project cdap by caskdata.
the class StreamBatchSource method configurePipeline.
@Override
public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
streamBatchConfig.validate();
pipelineConfigurer.addStream(new Stream(streamBatchConfig.name));
}
Aggregations