use of org.apache.flink.util.Collector in project flink by apache.
the class WordCount method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = getTextDataSet(env);
DataSet<Tuple2<String, Integer>> counts = // normalize and split each line
text.map(line -> line.toLowerCase().split("\\W+")).flatMap((String[] tokens, Collector<Tuple2<String, Integer>> out) -> {
// emit the pairs with non-zero-length words
Arrays.stream(tokens).filter(t -> t.length() > 0).forEach(t -> out.collect(new Tuple2<>(t, 1)));
}).groupBy(0).sum(1);
// emit result
if (fileOutput) {
counts.writeAsCsv(outputPath, "\n", " ");
} else {
counts.print();
}
// execute program
env.execute("WordCount Example");
}
use of org.apache.flink.util.Collector in project flink by apache.
the class WordCount method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
// set up the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data
DataStream<String> text = getTextDataStream(env);
DataStream<Tuple2<String, Integer>> counts = // normalize and split each line
text.map(line -> line.toLowerCase().split("\\W+")).flatMap((String[] tokens, Collector<Tuple2<String, Integer>> out) -> {
// emit the pairs with non-zero-length words
Arrays.stream(tokens).filter(t -> t.length() > 0).forEach(t -> out.collect(new Tuple2<>(t, 1)));
}).keyBy(0).sum(1);
// emit result
if (fileOutput) {
counts.writeAsCsv(outputPath);
} else {
counts.print();
}
// execute program
env.execute("Streaming WordCount Example");
}
use of org.apache.flink.util.Collector in project flink by apache.
the class StreamingJobGraphGeneratorTest method testResourcesForIteration.
/**
* Verifies that the resources are merged correctly for chained operators (covers middle chaining and iteration cases)
* when generating job graph
*/
@Test
public void testResourcesForIteration() throws Exception {
ResourceSpec resource1 = new ResourceSpec(0.1, 100);
ResourceSpec resource2 = new ResourceSpec(0.2, 200);
ResourceSpec resource3 = new ResourceSpec(0.3, 300);
ResourceSpec resource4 = new ResourceSpec(0.4, 400);
ResourceSpec resource5 = new ResourceSpec(0.5, 500);
Method opMethod = SingleOutputStreamOperator.class.getDeclaredMethod("setResources", ResourceSpec.class);
opMethod.setAccessible(true);
Method sinkMethod = DataStreamSink.class.getDeclaredMethod("setResources", ResourceSpec.class);
sinkMethod.setAccessible(true);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {
@Override
public void run(SourceContext<Integer> ctx) throws Exception {
}
@Override
public void cancel() {
}
}).name("test_source");
opMethod.invoke(source, resource1);
IterativeStream<Integer> iteration = source.iterate(3000);
opMethod.invoke(iteration, resource2);
DataStream<Integer> flatMap = iteration.flatMap(new FlatMapFunction<Integer, Integer>() {
@Override
public void flatMap(Integer value, Collector<Integer> out) throws Exception {
out.collect(value);
}
}).name("test_flatMap");
opMethod.invoke(flatMap, resource3);
// CHAIN(flatMap -> Filter)
DataStream<Integer> increment = flatMap.filter(new FilterFunction<Integer>() {
@Override
public boolean filter(Integer value) throws Exception {
return false;
}
}).name("test_filter");
opMethod.invoke(increment, resource4);
DataStreamSink<Integer> sink = iteration.closeWith(increment).addSink(new SinkFunction<Integer>() {
@Override
public void invoke(Integer value) throws Exception {
}
}).disableChaining().name("test_sink");
sinkMethod.invoke(sink, resource5);
JobGraph jobGraph = new StreamingJobGraphGenerator(env.getStreamGraph(), 1).createJobGraph();
for (JobVertex jobVertex : jobGraph.getVertices()) {
if (jobVertex.getName().contains("test_source")) {
assertTrue(jobVertex.getMinResources().equals(resource1));
} else if (jobVertex.getName().contains("Iteration_Source")) {
assertTrue(jobVertex.getPreferredResources().equals(resource2));
} else if (jobVertex.getName().contains("test_flatMap")) {
assertTrue(jobVertex.getMinResources().equals(resource3.merge(resource4)));
} else if (jobVertex.getName().contains("Iteration_Tail")) {
assertTrue(jobVertex.getPreferredResources().equals(ResourceSpec.DEFAULT));
} else if (jobVertex.getName().contains("test_sink")) {
assertTrue(jobVertex.getMinResources().equals(resource5));
}
}
}
use of org.apache.flink.util.Collector in project flink by apache.
the class InternalWindowFunctionTest method testInternalIterableWindowFunction.
@SuppressWarnings("unchecked")
@Test
public void testInternalIterableWindowFunction() throws Exception {
WindowFunctionMock mock = mock(WindowFunctionMock.class);
InternalIterableWindowFunction<Long, String, Long, TimeWindow> windowFunction = new InternalIterableWindowFunction<>(mock);
// check setOutputType
TypeInformation<String> stringType = BasicTypeInfo.STRING_TYPE_INFO;
ExecutionConfig execConf = new ExecutionConfig();
execConf.setParallelism(42);
StreamingFunctionUtils.setOutputType(windowFunction, stringType, execConf);
verify(mock).setOutputType(stringType, execConf);
// check open
Configuration config = new Configuration();
windowFunction.open(config);
verify(mock).open(config);
// check setRuntimeContext
RuntimeContext rCtx = mock(RuntimeContext.class);
windowFunction.setRuntimeContext(rCtx);
verify(mock).setRuntimeContext(rCtx);
// check apply
TimeWindow w = mock(TimeWindow.class);
Iterable<Long> i = (Iterable<Long>) mock(Iterable.class);
Collector<String> c = (Collector<String>) mock(Collector.class);
windowFunction.apply(42L, w, i, c);
verify(mock).apply(eq(42L), eq(w), eq(i), eq(c));
// check close
windowFunction.close();
verify(mock).close();
}
use of org.apache.flink.util.Collector in project flink by apache.
the class InternalWindowFunctionTest method testInternalAggregateProcessAllWindowFunction.
@SuppressWarnings("unchecked")
@Test
public void testInternalAggregateProcessAllWindowFunction() throws Exception {
AggregateProcessAllWindowFunctionMock mock = mock(AggregateProcessAllWindowFunctionMock.class);
InternalAggregateProcessAllWindowFunction<Long, Set<Long>, Map<Long, Long>, String, TimeWindow> windowFunction = new InternalAggregateProcessAllWindowFunction<>(new AggregateFunction<Long, Set<Long>, Map<Long, Long>>() {
private static final long serialVersionUID = 1L;
@Override
public Set<Long> createAccumulator() {
return new HashSet<>();
}
@Override
public void add(Long value, Set<Long> accumulator) {
accumulator.add(value);
}
@Override
public Map<Long, Long> getResult(Set<Long> accumulator) {
Map<Long, Long> result = new HashMap<>();
for (Long in : accumulator) {
result.put(in, in);
}
return result;
}
@Override
public Set<Long> merge(Set<Long> a, Set<Long> b) {
a.addAll(b);
return a;
}
}, mock);
// check setOutputType
TypeInformation<String> stringType = BasicTypeInfo.STRING_TYPE_INFO;
ExecutionConfig execConf = new ExecutionConfig();
execConf.setParallelism(42);
StreamingFunctionUtils.setOutputType(windowFunction, stringType, execConf);
verify(mock).setOutputType(stringType, execConf);
// check open
Configuration config = new Configuration();
windowFunction.open(config);
verify(mock).open(config);
// check setRuntimeContext
RuntimeContext rCtx = mock(RuntimeContext.class);
windowFunction.setRuntimeContext(rCtx);
verify(mock).setRuntimeContext(rCtx);
// check apply
TimeWindow w = mock(TimeWindow.class);
Collector<String> c = (Collector<String>) mock(Collector.class);
List<Long> args = new LinkedList<>();
args.add(23L);
args.add(24L);
windowFunction.apply(((byte) 0), w, args, c);
verify(mock).process((AggregateProcessAllWindowFunctionMock.Context) anyObject(), (Iterable) argThat(containsInAnyOrder(allOf(hasEntry(is(23L), is(23L)), hasEntry(is(24L), is(24L))))), eq(c));
// check close
windowFunction.close();
verify(mock).close();
}
Aggregations