Search in sources :

Example 41 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class WordCount method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // get input data
    DataSet<String> text = getTextDataSet(env);
    DataSet<Tuple2<String, Integer>> counts = // normalize and split each line
    text.map(line -> line.toLowerCase().split("\\W+")).flatMap((String[] tokens, Collector<Tuple2<String, Integer>> out) -> {
        // emit the pairs with non-zero-length words
        Arrays.stream(tokens).filter(t -> t.length() > 0).forEach(t -> out.collect(new Tuple2<>(t, 1)));
    }).groupBy(0).sum(1);
    // emit result
    if (fileOutput) {
        counts.writeAsCsv(outputPath, "\n", " ");
    } else {
        counts.print();
    }
    // execute program
    env.execute("WordCount Example");
}
Also used : Arrays(java.util.Arrays) DataSet(org.apache.flink.api.java.DataSet) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) WordCountData(org.apache.flink.examples.java.wordcount.util.WordCountData) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector)

Example 42 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class WordCount method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    // set up the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // get input data
    DataStream<String> text = getTextDataStream(env);
    DataStream<Tuple2<String, Integer>> counts = // normalize and split each line
    text.map(line -> line.toLowerCase().split("\\W+")).flatMap((String[] tokens, Collector<Tuple2<String, Integer>> out) -> {
        // emit the pairs with non-zero-length words
        Arrays.stream(tokens).filter(t -> t.length() > 0).forEach(t -> out.collect(new Tuple2<>(t, 1)));
    }).keyBy(0).sum(1);
    // emit result
    if (fileOutput) {
        counts.writeAsCsv(outputPath);
    } else {
        counts.print();
    }
    // execute program
    env.execute("Streaming WordCount Example");
}
Also used : DataStream(org.apache.flink.streaming.api.datastream.DataStream) Arrays(java.util.Arrays) WordCountData(org.apache.flink.examples.java.wordcount.util.WordCountData) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 43 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class StreamingJobGraphGeneratorTest method testResourcesForIteration.

/**
	 * Verifies that the resources are merged correctly for chained operators (covers middle chaining and iteration cases)
	 * when generating job graph
	 */
@Test
public void testResourcesForIteration() throws Exception {
    ResourceSpec resource1 = new ResourceSpec(0.1, 100);
    ResourceSpec resource2 = new ResourceSpec(0.2, 200);
    ResourceSpec resource3 = new ResourceSpec(0.3, 300);
    ResourceSpec resource4 = new ResourceSpec(0.4, 400);
    ResourceSpec resource5 = new ResourceSpec(0.5, 500);
    Method opMethod = SingleOutputStreamOperator.class.getDeclaredMethod("setResources", ResourceSpec.class);
    opMethod.setAccessible(true);
    Method sinkMethod = DataStreamSink.class.getDeclaredMethod("setResources", ResourceSpec.class);
    sinkMethod.setAccessible(true);
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {

        @Override
        public void run(SourceContext<Integer> ctx) throws Exception {
        }

        @Override
        public void cancel() {
        }
    }).name("test_source");
    opMethod.invoke(source, resource1);
    IterativeStream<Integer> iteration = source.iterate(3000);
    opMethod.invoke(iteration, resource2);
    DataStream<Integer> flatMap = iteration.flatMap(new FlatMapFunction<Integer, Integer>() {

        @Override
        public void flatMap(Integer value, Collector<Integer> out) throws Exception {
            out.collect(value);
        }
    }).name("test_flatMap");
    opMethod.invoke(flatMap, resource3);
    // CHAIN(flatMap -> Filter)
    DataStream<Integer> increment = flatMap.filter(new FilterFunction<Integer>() {

        @Override
        public boolean filter(Integer value) throws Exception {
            return false;
        }
    }).name("test_filter");
    opMethod.invoke(increment, resource4);
    DataStreamSink<Integer> sink = iteration.closeWith(increment).addSink(new SinkFunction<Integer>() {

        @Override
        public void invoke(Integer value) throws Exception {
        }
    }).disableChaining().name("test_sink");
    sinkMethod.invoke(sink, resource5);
    JobGraph jobGraph = new StreamingJobGraphGenerator(env.getStreamGraph(), 1).createJobGraph();
    for (JobVertex jobVertex : jobGraph.getVertices()) {
        if (jobVertex.getName().contains("test_source")) {
            assertTrue(jobVertex.getMinResources().equals(resource1));
        } else if (jobVertex.getName().contains("Iteration_Source")) {
            assertTrue(jobVertex.getPreferredResources().equals(resource2));
        } else if (jobVertex.getName().contains("test_flatMap")) {
            assertTrue(jobVertex.getMinResources().equals(resource3.merge(resource4)));
        } else if (jobVertex.getName().contains("Iteration_Tail")) {
            assertTrue(jobVertex.getPreferredResources().equals(ResourceSpec.DEFAULT));
        } else if (jobVertex.getName().contains("test_sink")) {
            assertTrue(jobVertex.getMinResources().equals(resource5));
        }
    }
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) Method(java.lang.reflect.Method) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 44 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class InternalWindowFunctionTest method testInternalIterableWindowFunction.

@SuppressWarnings("unchecked")
@Test
public void testInternalIterableWindowFunction() throws Exception {
    WindowFunctionMock mock = mock(WindowFunctionMock.class);
    InternalIterableWindowFunction<Long, String, Long, TimeWindow> windowFunction = new InternalIterableWindowFunction<>(mock);
    // check setOutputType
    TypeInformation<String> stringType = BasicTypeInfo.STRING_TYPE_INFO;
    ExecutionConfig execConf = new ExecutionConfig();
    execConf.setParallelism(42);
    StreamingFunctionUtils.setOutputType(windowFunction, stringType, execConf);
    verify(mock).setOutputType(stringType, execConf);
    // check open
    Configuration config = new Configuration();
    windowFunction.open(config);
    verify(mock).open(config);
    // check setRuntimeContext
    RuntimeContext rCtx = mock(RuntimeContext.class);
    windowFunction.setRuntimeContext(rCtx);
    verify(mock).setRuntimeContext(rCtx);
    // check apply
    TimeWindow w = mock(TimeWindow.class);
    Iterable<Long> i = (Iterable<Long>) mock(Iterable.class);
    Collector<String> c = (Collector<String>) mock(Collector.class);
    windowFunction.apply(42L, w, i, c);
    verify(mock).apply(eq(42L), eq(w), eq(i), eq(c));
    // check close
    windowFunction.close();
    verify(mock).close();
}
Also used : Configuration(org.apache.flink.configuration.Configuration) InternalIterableWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalIterableWindowFunction) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) Collector(org.apache.flink.util.Collector) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) Test(org.junit.Test)

Example 45 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class InternalWindowFunctionTest method testInternalAggregateProcessAllWindowFunction.

@SuppressWarnings("unchecked")
@Test
public void testInternalAggregateProcessAllWindowFunction() throws Exception {
    AggregateProcessAllWindowFunctionMock mock = mock(AggregateProcessAllWindowFunctionMock.class);
    InternalAggregateProcessAllWindowFunction<Long, Set<Long>, Map<Long, Long>, String, TimeWindow> windowFunction = new InternalAggregateProcessAllWindowFunction<>(new AggregateFunction<Long, Set<Long>, Map<Long, Long>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Set<Long> createAccumulator() {
            return new HashSet<>();
        }

        @Override
        public void add(Long value, Set<Long> accumulator) {
            accumulator.add(value);
        }

        @Override
        public Map<Long, Long> getResult(Set<Long> accumulator) {
            Map<Long, Long> result = new HashMap<>();
            for (Long in : accumulator) {
                result.put(in, in);
            }
            return result;
        }

        @Override
        public Set<Long> merge(Set<Long> a, Set<Long> b) {
            a.addAll(b);
            return a;
        }
    }, mock);
    // check setOutputType
    TypeInformation<String> stringType = BasicTypeInfo.STRING_TYPE_INFO;
    ExecutionConfig execConf = new ExecutionConfig();
    execConf.setParallelism(42);
    StreamingFunctionUtils.setOutputType(windowFunction, stringType, execConf);
    verify(mock).setOutputType(stringType, execConf);
    // check open
    Configuration config = new Configuration();
    windowFunction.open(config);
    verify(mock).open(config);
    // check setRuntimeContext
    RuntimeContext rCtx = mock(RuntimeContext.class);
    windowFunction.setRuntimeContext(rCtx);
    verify(mock).setRuntimeContext(rCtx);
    // check apply
    TimeWindow w = mock(TimeWindow.class);
    Collector<String> c = (Collector<String>) mock(Collector.class);
    List<Long> args = new LinkedList<>();
    args.add(23L);
    args.add(24L);
    windowFunction.apply(((byte) 0), w, args, c);
    verify(mock).process((AggregateProcessAllWindowFunctionMock.Context) anyObject(), (Iterable) argThat(containsInAnyOrder(allOf(hasEntry(is(23L), is(23L)), hasEntry(is(24L), is(24L))))), eq(c));
    // check close
    windowFunction.close();
    verify(mock).close();
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) Configuration(org.apache.flink.configuration.Configuration) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) InternalAggregateProcessAllWindowFunction(org.apache.flink.streaming.runtime.operators.windowing.functions.InternalAggregateProcessAllWindowFunction) LinkedList(java.util.LinkedList) Collector(org.apache.flink.util.Collector) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Aggregations

Collector (org.apache.flink.util.Collector)51 Test (org.junit.Test)38 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)20 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)16 Configuration (org.apache.flink.configuration.Configuration)16 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)15 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)15 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)14 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)11 HashMap (java.util.HashMap)9 ArrayList (java.util.ArrayList)8 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)8 Plan (org.apache.flink.api.common.Plan)7 HashSet (java.util.HashSet)6 RichGroupReduceFunction (org.apache.flink.api.common.functions.RichGroupReduceFunction)6 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)6 Map (java.util.Map)5 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)5 TaskInfo (org.apache.flink.api.common.TaskInfo)4 CoGroupFunction (org.apache.flink.api.common.functions.CoGroupFunction)4