use of org.apache.heron.streamlet.StreamletReducers in project heron by twitter.
the class StreamletWithKeybyCountAndReduce method buildTopology.
@Override
protected TestTopologyBuilder buildTopology(TestTopologyBuilder testTopologyBuilder) {
Builder streamletBuilder = Builder.newBuilder();
Streamlet<String> monthStreamlet = streamletBuilder.newSource(() -> MONTHS).setName("months-text").flatMap((String m) -> Arrays.asList(m.split(" - "))).setName("months").filter((month) -> incomingMonths.add(month.toLowerCase())).setName("unique-months");
SerializableFunction<String, String> getSeason = month -> {
if (SPRING_MONTHS.contains(month)) {
return "spring";
} else if (SUMMER_MONTHS.contains(month)) {
return "summer";
} else if (FALL_MONTHS.contains(month)) {
return "fall";
} else if (WINTER_MONTHS.contains(month)) {
return "winter";
} else {
return "really?";
}
};
SerializableFunction<String, Integer> getNumberOfDays = month -> {
switch(month) {
case "january":
return 31;
case "february":
// Dont use this code in real projects
return 28;
case "march":
return 31;
case "april":
return 30;
case "may":
return 31;
case "june":
return 30;
case "july":
return 31;
case "august":
return 31;
case "september":
return 30;
case "october":
return 31;
case "november":
return 30;
case "december":
return 31;
default:
// Shouldn't be here
return -1;
}
};
// Count months per season
monthStreamlet.keyBy(getSeason, getNumberOfDays).setName("key-by-season").countByKey(x -> x.getKey()).setName("key-by-and-count").map(x -> String.format("%s: %d months", x.getKey(), x.getValue())).setName("to-string");
// Sum days per season
monthStreamlet.<String, Integer>reduceByKey(getSeason, getNumberOfDays, StreamletReducers::sum).setName("sum-by-season").map(x -> String.format("%s: %d days", x.getKey(), x.getValue())).setName("to-string-2");
BuilderImpl streamletBuilderImpl = (BuilderImpl) streamletBuilder;
TestTopologyBuilder topology = (TestTopologyBuilder) streamletBuilderImpl.build(testTopologyBuilder);
return topology;
}
use of org.apache.heron.streamlet.StreamletReducers in project heron by twitter.
the class WindowedWordCountTopology method main.
public static void main(String[] args) throws Exception {
Builder processingGraphBuilder = Builder.newBuilder();
processingGraphBuilder.newSource(() -> StreamletUtils.randomFromList(SENTENCES)).setName("random-sentences-source").flatMap(sentence -> Arrays.asList(sentence.toLowerCase().split("\\s+"))).setName("flatten-into-individual-words").reduceByKeyAndWindow(// The key extractor (the word is left unchanged)
word -> word, // Value extractor (the value is always 1)
word -> 1, WindowConfig.TumblingCountWindow(50), StreamletReducers::sum).setName("reduce-operation").consume(kv -> {
String logMessage = String.format("(word: %s, count: %d)", kv.getKey().getKey(), kv.getValue());
LOG.info(logMessage);
});
// The topology's parallelism (the number of containers across which the topology's
// processing instance will be split) can be defined via the second command-line
// argument (or else the default of 2 will be used).
int topologyParallelism = StreamletUtils.getParallelism(args, 2);
Config config = Config.newBuilder().setNumContainers(topologyParallelism).build();
// Fetches the topology name from the first command-line argument
String topologyName = StreamletUtils.getTopologyName(args);
// Finally, the processing graph and configuration are passed to the Runner, which converts
// the graph into a Heron topology that can be run in a Heron cluster.
new Runner().run(topologyName, config, processingGraphBuilder);
}
Aggregations