use of org.apache.flink.util.Collector in project flink by apache.
the class AsyncIOExample method main.
public static void main(String[] args) throws Exception {
// obtain execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// parse parameters
final ParameterTool params = ParameterTool.fromArgs(args);
final String statePath;
final String cpMode;
final int maxCount;
final long sleepFactor;
final float failRatio;
final String mode;
final int taskNum;
final String timeType;
final long shutdownWaitTS;
final long timeout;
try {
// check the configuration for the job
statePath = params.get("fsStatePath", null);
cpMode = params.get("checkpointMode", "exactly_once");
maxCount = params.getInt("maxCount", 100000);
sleepFactor = params.getLong("sleepFactor", 100);
failRatio = params.getFloat("failRatio", 0.001f);
mode = params.get("waitMode", "ordered");
taskNum = params.getInt("waitOperatorParallelism", 1);
timeType = params.get("eventType", "EventTime");
shutdownWaitTS = params.getLong("shutdownWaitTS", 20000);
timeout = params.getLong("timeout", 10000L);
} catch (Exception e) {
printUsage();
throw e;
}
StringBuilder configStringBuilder = new StringBuilder();
final String lineSeparator = System.getProperty("line.separator");
configStringBuilder.append("Job configuration").append(lineSeparator).append("FS state path=").append(statePath).append(lineSeparator).append("Checkpoint mode=").append(cpMode).append(lineSeparator).append("Max count of input from source=").append(maxCount).append(lineSeparator).append("Sleep factor=").append(sleepFactor).append(lineSeparator).append("Fail ratio=").append(failRatio).append(lineSeparator).append("Waiting mode=").append(mode).append(lineSeparator).append("Parallelism for async wait operator=").append(taskNum).append(lineSeparator).append("Event type=").append(timeType).append(lineSeparator).append("Shutdown wait timestamp=").append(shutdownWaitTS);
LOG.info(configStringBuilder.toString());
if (statePath != null) {
// setup state and checkpoint mode
env.setStateBackend(new FsStateBackend(statePath));
}
if (EXACTLY_ONCE_MODE.equals(cpMode)) {
env.enableCheckpointing(1000L, CheckpointingMode.EXACTLY_ONCE);
} else {
env.enableCheckpointing(1000L, CheckpointingMode.AT_LEAST_ONCE);
}
// enable watermark or not
if (EVENT_TIME.equals(timeType)) {
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
} else if (INGESTION_TIME.equals(timeType)) {
env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
}
// create input stream of an single integer
DataStream<Integer> inputStream = env.addSource(new SimpleSource(maxCount));
// create async function, which will *wait* for a while to simulate the process of async i/o
AsyncFunction<Integer, String> function = new SampleAsyncFunction(sleepFactor, failRatio, shutdownWaitTS);
// add async operator to streaming job
DataStream<String> result;
if (ORDERED.equals(mode)) {
result = AsyncDataStream.orderedWait(inputStream, function, timeout, TimeUnit.MILLISECONDS, 20).setParallelism(taskNum);
} else {
result = AsyncDataStream.unorderedWait(inputStream, function, timeout, TimeUnit.MILLISECONDS, 20).setParallelism(taskNum);
}
// add a reduce to get the sum of each keys.
result.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
private static final long serialVersionUID = -938116068682344455L;
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
out.collect(new Tuple2<>(value, 1));
}
}).keyBy(0).sum(1).print();
// execute the program
env.execute("Async IO Example");
}
use of org.apache.flink.util.Collector in project beam by apache.
the class FlinkDoFnFunction method mapPartition.
@Override
public void mapPartition(Iterable<WindowedValue<InputT>> values, Collector<WindowedValue<OutputT>> out) throws Exception {
RuntimeContext runtimeContext = getRuntimeContext();
DoFnRunners.OutputManager outputManager;
if (outputMap.size() == 1) {
outputManager = new FlinkDoFnFunction.DoFnOutputManager(out);
} else {
// it has some additional outputs
outputManager = new FlinkDoFnFunction.MultiDoFnOutputManager((Collector) out, outputMap);
}
List<TupleTag<?>> additionalOutputTags = Lists.newArrayList(outputMap.keySet());
DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(serializedOptions.getPipelineOptions(), doFn, new FlinkSideInputReader(sideInputs, runtimeContext), outputManager, mainOutputTag, additionalOutputTags, new FlinkNoOpStepContext(), windowingStrategy);
if ((serializedOptions.getPipelineOptions().as(FlinkPipelineOptions.class)).getEnableMetrics()) {
doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, getRuntimeContext());
}
doFnRunner.startBundle();
for (WindowedValue<InputT> value : values) {
doFnRunner.processElement(value);
}
doFnRunner.finishBundle();
}
use of org.apache.flink.util.Collector in project beam by apache.
the class FlinkStatefulDoFnFunction method reduce.
@Override
public void reduce(Iterable<WindowedValue<KV<K, V>>> values, Collector<WindowedValue<OutputT>> out) throws Exception {
RuntimeContext runtimeContext = getRuntimeContext();
DoFnRunners.OutputManager outputManager;
if (outputMap.size() == 1) {
outputManager = new FlinkDoFnFunction.DoFnOutputManager(out);
} else {
// it has some additional Outputs
outputManager = new FlinkDoFnFunction.MultiDoFnOutputManager((Collector) out, outputMap);
}
final Iterator<WindowedValue<KV<K, V>>> iterator = values.iterator();
// get the first value, we need this for initializing the state internals with the key.
// we are guaranteed to have a first value, otherwise reduce() would not have been called.
WindowedValue<KV<K, V>> currentValue = iterator.next();
final K key = currentValue.getValue().getKey();
final InMemoryStateInternals<K> stateInternals = InMemoryStateInternals.forKey(key);
// Used with Batch, we know that all the data is available for this key. We can't use the
// timer manager from the context because it doesn't exist. So we create one and advance
// time to the end after processing all elements.
final InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
timerInternals.advanceProcessingTime(Instant.now());
timerInternals.advanceSynchronizedProcessingTime(Instant.now());
List<TupleTag<?>> additionalOutputTags = Lists.newArrayList(outputMap.keySet());
DoFnRunner<KV<K, V>, OutputT> doFnRunner = DoFnRunners.simpleRunner(serializedOptions.getPipelineOptions(), dofn, new FlinkSideInputReader(sideInputs, runtimeContext), outputManager, mainOutputTag, additionalOutputTags, new FlinkNoOpStepContext() {
@Override
public StateInternals stateInternals() {
return stateInternals;
}
@Override
public TimerInternals timerInternals() {
return timerInternals;
}
}, windowingStrategy);
if ((serializedOptions.getPipelineOptions().as(FlinkPipelineOptions.class)).getEnableMetrics()) {
doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, getRuntimeContext());
}
doFnRunner.startBundle();
doFnRunner.processElement(currentValue);
while (iterator.hasNext()) {
currentValue = iterator.next();
doFnRunner.processElement(currentValue);
}
// Finish any pending windows by advancing the input watermark to infinity.
timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
// Finally, advance the processing time to infinity to fire any timers.
timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
fireEligibleTimers(timerInternals, doFnRunner);
doFnRunner.finishBundle();
}
use of org.apache.flink.util.Collector in project flink by apache.
the class TransitiveClosureNaive method main.
public static void main(String... args) throws Exception {
// Checking input parameters
final ParameterTool params = ParameterTool.fromArgs(args);
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(params);
final int maxIterations = params.getInt("iterations", 10);
DataSet<Tuple2<Long, Long>> edges;
if (params.has("edges")) {
edges = env.readCsvFile(params.get("edges")).fieldDelimiter(" ").types(Long.class, Long.class);
} else {
System.out.println("Executing TransitiveClosureNaive example with default edges data set.");
System.out.println("Use --edges to specify file input.");
edges = ConnectedComponentsData.getDefaultEdgeDataSet(env);
}
IterativeDataSet<Tuple2<Long, Long>> paths = edges.iterate(maxIterations);
DataSet<Tuple2<Long, Long>> nextPaths = paths.join(edges).where(1).equalTo(0).with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public /**
left: Path (z,x) - x is reachable by z
right: Edge (x,y) - edge x-->y exists
out: Path (z,y) - y is reachable by z
*/
Tuple2<Long, Long> join(Tuple2<Long, Long> left, Tuple2<Long, Long> right) throws Exception {
return new Tuple2<Long, Long>(left.f0, right.f1);
}
}).withForwardedFieldsFirst("0").withForwardedFieldsSecond("1").union(paths).groupBy(0, 1).reduceGroup(new GroupReduceFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public void reduce(Iterable<Tuple2<Long, Long>> values, Collector<Tuple2<Long, Long>> out) throws Exception {
out.collect(values.iterator().next());
}
}).withForwardedFields("0;1");
DataSet<Tuple2<Long, Long>> newPaths = paths.coGroup(nextPaths).where(0).equalTo(0).with(new CoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
Set<Tuple2<Long, Long>> prevSet = new HashSet<Tuple2<Long, Long>>();
@Override
public void coGroup(Iterable<Tuple2<Long, Long>> prevPaths, Iterable<Tuple2<Long, Long>> nextPaths, Collector<Tuple2<Long, Long>> out) throws Exception {
for (Tuple2<Long, Long> prev : prevPaths) {
prevSet.add(prev);
}
for (Tuple2<Long, Long> next : nextPaths) {
if (!prevSet.contains(next)) {
out.collect(next);
}
}
}
}).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0");
DataSet<Tuple2<Long, Long>> transitiveClosure = paths.closeWith(nextPaths, newPaths);
// emit result
if (params.has("output")) {
transitiveClosure.writeAsCsv(params.get("output"), "\n", " ");
// execute program explicitly, because file sinks are lazy
env.execute("Transitive Closure Example");
} else {
System.out.println("Printing result to stdout. Use --output to specify output path.");
transitiveClosure.print();
}
}
use of org.apache.flink.util.Collector in project flink by apache.
the class SocketWindowWordCount method main.
public static void main(String[] args) throws Exception {
// the host and the port to connect to
final String hostname;
final int port;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
hostname = params.has("hostname") ? params.get("hostname") : "localhost";
port = params.getInt("port");
} catch (Exception e) {
System.err.println("No port specified. Please run 'SocketWindowWordCount " + "--hostname <hostname> --port <port>', where hostname (localhost by default) " + "and port is the address of the text server");
System.err.println("To start a simple text server, run 'netcat -l <port>' and " + "type the input text into the command line");
return;
}
// get the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data by connecting to the socket
DataStream<String> text = env.socketTextStream(hostname, port, "\n");
// parse the data, group it, window it, and aggregate the counts
DataStream<WordWithCount> windowCounts = text.flatMap(new FlatMapFunction<String, WordWithCount>() {
@Override
public void flatMap(String value, Collector<WordWithCount> out) {
for (String word : value.split("\\s")) {
out.collect(new WordWithCount(word, 1L));
}
}
}).keyBy("word").timeWindow(Time.seconds(5)).reduce(new ReduceFunction<WordWithCount>() {
@Override
public WordWithCount reduce(WordWithCount a, WordWithCount b) {
return new WordWithCount(a.word, a.count + b.count);
}
});
// print the results with a single thread, rather than in parallel
windowCounts.print().setParallelism(1);
env.execute("Socket Window WordCount");
}
Aggregations