Search in sources :

Example 31 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class AsyncIOExample method main.

public static void main(String[] args) throws Exception {
    // obtain execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // parse parameters
    final ParameterTool params = ParameterTool.fromArgs(args);
    final String statePath;
    final String cpMode;
    final int maxCount;
    final long sleepFactor;
    final float failRatio;
    final String mode;
    final int taskNum;
    final String timeType;
    final long shutdownWaitTS;
    final long timeout;
    try {
        // check the configuration for the job
        statePath = params.get("fsStatePath", null);
        cpMode = params.get("checkpointMode", "exactly_once");
        maxCount = params.getInt("maxCount", 100000);
        sleepFactor = params.getLong("sleepFactor", 100);
        failRatio = params.getFloat("failRatio", 0.001f);
        mode = params.get("waitMode", "ordered");
        taskNum = params.getInt("waitOperatorParallelism", 1);
        timeType = params.get("eventType", "EventTime");
        shutdownWaitTS = params.getLong("shutdownWaitTS", 20000);
        timeout = params.getLong("timeout", 10000L);
    } catch (Exception e) {
        printUsage();
        throw e;
    }
    StringBuilder configStringBuilder = new StringBuilder();
    final String lineSeparator = System.getProperty("line.separator");
    configStringBuilder.append("Job configuration").append(lineSeparator).append("FS state path=").append(statePath).append(lineSeparator).append("Checkpoint mode=").append(cpMode).append(lineSeparator).append("Max count of input from source=").append(maxCount).append(lineSeparator).append("Sleep factor=").append(sleepFactor).append(lineSeparator).append("Fail ratio=").append(failRatio).append(lineSeparator).append("Waiting mode=").append(mode).append(lineSeparator).append("Parallelism for async wait operator=").append(taskNum).append(lineSeparator).append("Event type=").append(timeType).append(lineSeparator).append("Shutdown wait timestamp=").append(shutdownWaitTS);
    LOG.info(configStringBuilder.toString());
    if (statePath != null) {
        // setup state and checkpoint mode
        env.setStateBackend(new FsStateBackend(statePath));
    }
    if (EXACTLY_ONCE_MODE.equals(cpMode)) {
        env.enableCheckpointing(1000L, CheckpointingMode.EXACTLY_ONCE);
    } else {
        env.enableCheckpointing(1000L, CheckpointingMode.AT_LEAST_ONCE);
    }
    // enable watermark or not
    if (EVENT_TIME.equals(timeType)) {
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    } else if (INGESTION_TIME.equals(timeType)) {
        env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
    }
    // create input stream of an single integer
    DataStream<Integer> inputStream = env.addSource(new SimpleSource(maxCount));
    // create async function, which will *wait* for a while to simulate the process of async i/o
    AsyncFunction<Integer, String> function = new SampleAsyncFunction(sleepFactor, failRatio, shutdownWaitTS);
    // add async operator to streaming job
    DataStream<String> result;
    if (ORDERED.equals(mode)) {
        result = AsyncDataStream.orderedWait(inputStream, function, timeout, TimeUnit.MILLISECONDS, 20).setParallelism(taskNum);
    } else {
        result = AsyncDataStream.unorderedWait(inputStream, function, timeout, TimeUnit.MILLISECONDS, 20).setParallelism(taskNum);
    }
    // add a reduce to get the sum of each keys.
    result.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {

        private static final long serialVersionUID = -938116068682344455L;

        @Override
        public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
            out.collect(new Tuple2<>(value, 1));
        }
    }).keyBy(0).sum(1).print();
    // execute the program
    env.execute("Async IO Example");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) AsyncCollector(org.apache.flink.streaming.api.functions.async.collector.AsyncCollector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) FsStateBackend(org.apache.flink.runtime.state.filesystem.FsStateBackend)

Example 32 with Collector

use of org.apache.flink.util.Collector in project beam by apache.

the class FlinkDoFnFunction method mapPartition.

@Override
public void mapPartition(Iterable<WindowedValue<InputT>> values, Collector<WindowedValue<OutputT>> out) throws Exception {
    RuntimeContext runtimeContext = getRuntimeContext();
    DoFnRunners.OutputManager outputManager;
    if (outputMap.size() == 1) {
        outputManager = new FlinkDoFnFunction.DoFnOutputManager(out);
    } else {
        // it has some additional outputs
        outputManager = new FlinkDoFnFunction.MultiDoFnOutputManager((Collector) out, outputMap);
    }
    List<TupleTag<?>> additionalOutputTags = Lists.newArrayList(outputMap.keySet());
    DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(serializedOptions.getPipelineOptions(), doFn, new FlinkSideInputReader(sideInputs, runtimeContext), outputManager, mainOutputTag, additionalOutputTags, new FlinkNoOpStepContext(), windowingStrategy);
    if ((serializedOptions.getPipelineOptions().as(FlinkPipelineOptions.class)).getEnableMetrics()) {
        doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, getRuntimeContext());
    }
    doFnRunner.startBundle();
    for (WindowedValue<InputT> value : values) {
        doFnRunner.processElement(value);
    }
    doFnRunner.finishBundle();
}
Also used : DoFnRunners(org.apache.beam.runners.core.DoFnRunners) TupleTag(org.apache.beam.sdk.values.TupleTag) Collector(org.apache.flink.util.Collector) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext)

Example 33 with Collector

use of org.apache.flink.util.Collector in project beam by apache.

the class FlinkStatefulDoFnFunction method reduce.

@Override
public void reduce(Iterable<WindowedValue<KV<K, V>>> values, Collector<WindowedValue<OutputT>> out) throws Exception {
    RuntimeContext runtimeContext = getRuntimeContext();
    DoFnRunners.OutputManager outputManager;
    if (outputMap.size() == 1) {
        outputManager = new FlinkDoFnFunction.DoFnOutputManager(out);
    } else {
        // it has some additional Outputs
        outputManager = new FlinkDoFnFunction.MultiDoFnOutputManager((Collector) out, outputMap);
    }
    final Iterator<WindowedValue<KV<K, V>>> iterator = values.iterator();
    // get the first value, we need this for initializing the state internals with the key.
    // we are guaranteed to have a first value, otherwise reduce() would not have been called.
    WindowedValue<KV<K, V>> currentValue = iterator.next();
    final K key = currentValue.getValue().getKey();
    final InMemoryStateInternals<K> stateInternals = InMemoryStateInternals.forKey(key);
    // Used with Batch, we know that all the data is available for this key. We can't use the
    // timer manager from the context because it doesn't exist. So we create one and advance
    // time to the end after processing all elements.
    final InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
    timerInternals.advanceProcessingTime(Instant.now());
    timerInternals.advanceSynchronizedProcessingTime(Instant.now());
    List<TupleTag<?>> additionalOutputTags = Lists.newArrayList(outputMap.keySet());
    DoFnRunner<KV<K, V>, OutputT> doFnRunner = DoFnRunners.simpleRunner(serializedOptions.getPipelineOptions(), dofn, new FlinkSideInputReader(sideInputs, runtimeContext), outputManager, mainOutputTag, additionalOutputTags, new FlinkNoOpStepContext() {

        @Override
        public StateInternals stateInternals() {
            return stateInternals;
        }

        @Override
        public TimerInternals timerInternals() {
            return timerInternals;
        }
    }, windowingStrategy);
    if ((serializedOptions.getPipelineOptions().as(FlinkPipelineOptions.class)).getEnableMetrics()) {
        doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, getRuntimeContext());
    }
    doFnRunner.startBundle();
    doFnRunner.processElement(currentValue);
    while (iterator.hasNext()) {
        currentValue = iterator.next();
        doFnRunner.processElement(currentValue);
    }
    // Finish any pending windows by advancing the input watermark to infinity.
    timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
    // Finally, advance the processing time to infinity to fire any timers.
    timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
    timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
    fireEligibleTimers(timerInternals, doFnRunner);
    doFnRunner.finishBundle();
}
Also used : DoFnRunners(org.apache.beam.runners.core.DoFnRunners) TupleTag(org.apache.beam.sdk.values.TupleTag) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) Collector(org.apache.flink.util.Collector) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) KV(org.apache.beam.sdk.values.KV) TimerInternals(org.apache.beam.runners.core.TimerInternals) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) StateInternals(org.apache.beam.runners.core.StateInternals) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext)

Example 34 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class TransitiveClosureNaive method main.

public static void main(String... args) throws Exception {
    // Checking input parameters
    final ParameterTool params = ParameterTool.fromArgs(args);
    // set up execution environment
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(params);
    final int maxIterations = params.getInt("iterations", 10);
    DataSet<Tuple2<Long, Long>> edges;
    if (params.has("edges")) {
        edges = env.readCsvFile(params.get("edges")).fieldDelimiter(" ").types(Long.class, Long.class);
    } else {
        System.out.println("Executing TransitiveClosureNaive example with default edges data set.");
        System.out.println("Use --edges to specify file input.");
        edges = ConnectedComponentsData.getDefaultEdgeDataSet(env);
    }
    IterativeDataSet<Tuple2<Long, Long>> paths = edges.iterate(maxIterations);
    DataSet<Tuple2<Long, Long>> nextPaths = paths.join(edges).where(1).equalTo(0).with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {

        @Override
        public /**
						left: Path (z,x) - x is reachable by z
						right: Edge (x,y) - edge x-->y exists
						out: Path (z,y) - y is reachable by z
					 */
        Tuple2<Long, Long> join(Tuple2<Long, Long> left, Tuple2<Long, Long> right) throws Exception {
            return new Tuple2<Long, Long>(left.f0, right.f1);
        }
    }).withForwardedFieldsFirst("0").withForwardedFieldsSecond("1").union(paths).groupBy(0, 1).reduceGroup(new GroupReduceFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {

        @Override
        public void reduce(Iterable<Tuple2<Long, Long>> values, Collector<Tuple2<Long, Long>> out) throws Exception {
            out.collect(values.iterator().next());
        }
    }).withForwardedFields("0;1");
    DataSet<Tuple2<Long, Long>> newPaths = paths.coGroup(nextPaths).where(0).equalTo(0).with(new CoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {

        Set<Tuple2<Long, Long>> prevSet = new HashSet<Tuple2<Long, Long>>();

        @Override
        public void coGroup(Iterable<Tuple2<Long, Long>> prevPaths, Iterable<Tuple2<Long, Long>> nextPaths, Collector<Tuple2<Long, Long>> out) throws Exception {
            for (Tuple2<Long, Long> prev : prevPaths) {
                prevSet.add(prev);
            }
            for (Tuple2<Long, Long> next : nextPaths) {
                if (!prevSet.contains(next)) {
                    out.collect(next);
                }
            }
        }
    }).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0");
    DataSet<Tuple2<Long, Long>> transitiveClosure = paths.closeWith(nextPaths, newPaths);
    // emit result
    if (params.has("output")) {
        transitiveClosure.writeAsCsv(params.get("output"), "\n", " ");
        // execute program explicitly, because file sinks are lazy
        env.execute("Transitive Closure Example");
    } else {
        System.out.println("Printing result to stdout. Use --output to specify output path.");
        transitiveClosure.print();
    }
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) HashSet(java.util.HashSet)

Example 35 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class SocketWindowWordCount method main.

public static void main(String[] args) throws Exception {
    // the host and the port to connect to
    final String hostname;
    final int port;
    try {
        final ParameterTool params = ParameterTool.fromArgs(args);
        hostname = params.has("hostname") ? params.get("hostname") : "localhost";
        port = params.getInt("port");
    } catch (Exception e) {
        System.err.println("No port specified. Please run 'SocketWindowWordCount " + "--hostname <hostname> --port <port>', where hostname (localhost by default) " + "and port is the address of the text server");
        System.err.println("To start a simple text server, run 'netcat -l <port>' and " + "type the input text into the command line");
        return;
    }
    // get the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // get input data by connecting to the socket
    DataStream<String> text = env.socketTextStream(hostname, port, "\n");
    // parse the data, group it, window it, and aggregate the counts
    DataStream<WordWithCount> windowCounts = text.flatMap(new FlatMapFunction<String, WordWithCount>() {

        @Override
        public void flatMap(String value, Collector<WordWithCount> out) {
            for (String word : value.split("\\s")) {
                out.collect(new WordWithCount(word, 1L));
            }
        }
    }).keyBy("word").timeWindow(Time.seconds(5)).reduce(new ReduceFunction<WordWithCount>() {

        @Override
        public WordWithCount reduce(WordWithCount a, WordWithCount b) {
            return new WordWithCount(a.word, a.count + b.count);
        }
    });
    // print the results with a single thread, rather than in parallel
    windowCounts.print().setParallelism(1);
    env.execute("Socket Window WordCount");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Aggregations

Collector (org.apache.flink.util.Collector)50 Test (org.junit.Test)38 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)20 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)16 Configuration (org.apache.flink.configuration.Configuration)16 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)15 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)14 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)14 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)11 ArrayList (java.util.ArrayList)8 HashMap (java.util.HashMap)8 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)8 Plan (org.apache.flink.api.common.Plan)7 HashSet (java.util.HashSet)6 RichGroupReduceFunction (org.apache.flink.api.common.functions.RichGroupReduceFunction)6 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)6 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)5 Map (java.util.Map)4 TaskInfo (org.apache.flink.api.common.TaskInfo)4 CoGroupFunction (org.apache.flink.api.common.functions.CoGroupFunction)4