use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class FlinkTopology method translateTopology.
/**
* Creates a Flink program that uses the specified spouts and bolts.
*/
private void translateTopology() {
unprocessdInputsPerBolt.clear();
outputStreams.clear();
declarers.clear();
availableInputs.clear();
// Storm defaults to parallelism 1
env.setParallelism(1);
for (final Entry<String, IRichSpout> spout : spouts.entrySet()) {
final String spoutId = spout.getKey();
final IRichSpout userSpout = spout.getValue();
final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer();
userSpout.declareOutputFields(declarer);
final HashMap<String, Fields> sourceStreams = declarer.outputStreams;
this.outputStreams.put(spoutId, sourceStreams);
declarers.put(spoutId, declarer);
final HashMap<String, DataStream<Tuple>> outputStreams = new HashMap<String, DataStream<Tuple>>();
final DataStreamSource<?> source;
if (sourceStreams.size() == 1) {
final SpoutWrapper<Tuple> spoutWrapperSingleOutput = new SpoutWrapper<Tuple>(userSpout, spoutId, null, null);
spoutWrapperSingleOutput.setStormTopology(stormTopology);
final String outputStreamId = (String) sourceStreams.keySet().toArray()[0];
DataStreamSource<Tuple> src = env.addSource(spoutWrapperSingleOutput, spoutId, declarer.getOutputType(outputStreamId));
outputStreams.put(outputStreamId, src);
source = src;
} else {
final SpoutWrapper<SplitStreamType<Tuple>> spoutWrapperMultipleOutputs = new SpoutWrapper<SplitStreamType<Tuple>>(userSpout, spoutId, null, null);
spoutWrapperMultipleOutputs.setStormTopology(stormTopology);
@SuppressWarnings({ "unchecked", "rawtypes" }) DataStreamSource<SplitStreamType<Tuple>> multiSource = env.addSource(spoutWrapperMultipleOutputs, spoutId, (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class));
SplitStream<SplitStreamType<Tuple>> splitSource = multiSource.split(new StormStreamSelector<Tuple>());
for (String streamId : sourceStreams.keySet()) {
SingleOutputStreamOperator<Tuple> outStream = splitSource.select(streamId).map(new SplitStreamMapper<Tuple>());
outStream.getTransformation().setOutputType(declarer.getOutputType(streamId));
outputStreams.put(streamId, outStream);
}
source = multiSource;
}
availableInputs.put(spoutId, outputStreams);
final ComponentCommon common = stormTopology.get_spouts().get(spoutId).get_common();
if (common.is_set_parallelism_hint()) {
int dop = common.get_parallelism_hint();
source.setParallelism(dop);
} else {
common.set_parallelism_hint(1);
}
}
/**
* 1. Connect all spout streams with bolts streams
* 2. Then proceed with the bolts stream already connected
*
* Because we do not know the order in which an iterator steps over a set, we might process a consumer before
* its producer
* ->thus, we might need to repeat multiple times
*/
boolean makeProgress = true;
while (bolts.size() > 0) {
if (!makeProgress) {
StringBuilder strBld = new StringBuilder();
strBld.append("Unable to build Topology. Could not connect the following bolts:");
for (String boltId : bolts.keySet()) {
strBld.append("\n ");
strBld.append(boltId);
strBld.append(": missing input streams [");
for (Entry<GlobalStreamId, Grouping> streams : unprocessdInputsPerBolt.get(boltId)) {
strBld.append("'");
strBld.append(streams.getKey().get_streamId());
strBld.append("' from '");
strBld.append(streams.getKey().get_componentId());
strBld.append("'; ");
}
strBld.append("]");
}
throw new RuntimeException(strBld.toString());
}
makeProgress = false;
final Iterator<Entry<String, IRichBolt>> boltsIterator = bolts.entrySet().iterator();
while (boltsIterator.hasNext()) {
final Entry<String, IRichBolt> bolt = boltsIterator.next();
final String boltId = bolt.getKey();
final IRichBolt userBolt = copyObject(bolt.getValue());
final ComponentCommon common = stormTopology.get_bolts().get(boltId).get_common();
Set<Entry<GlobalStreamId, Grouping>> unprocessedBoltInputs = unprocessdInputsPerBolt.get(boltId);
if (unprocessedBoltInputs == null) {
unprocessedBoltInputs = new HashSet<>();
unprocessedBoltInputs.addAll(common.get_inputs().entrySet());
unprocessdInputsPerBolt.put(boltId, unprocessedBoltInputs);
}
// check if all inputs are available
final int numberOfInputs = unprocessedBoltInputs.size();
int inputsAvailable = 0;
for (Entry<GlobalStreamId, Grouping> entry : unprocessedBoltInputs) {
final String producerId = entry.getKey().get_componentId();
final String streamId = entry.getKey().get_streamId();
final HashMap<String, DataStream<Tuple>> streams = availableInputs.get(producerId);
if (streams != null && streams.get(streamId) != null) {
inputsAvailable++;
}
}
if (inputsAvailable != numberOfInputs) {
// traverse other bolts first until inputs are available
continue;
} else {
makeProgress = true;
boltsIterator.remove();
}
final Map<GlobalStreamId, DataStream<Tuple>> inputStreams = new HashMap<>(numberOfInputs);
for (Entry<GlobalStreamId, Grouping> input : unprocessedBoltInputs) {
final GlobalStreamId streamId = input.getKey();
final Grouping grouping = input.getValue();
final String producerId = streamId.get_componentId();
final Map<String, DataStream<Tuple>> producer = availableInputs.get(producerId);
inputStreams.put(streamId, processInput(boltId, userBolt, streamId, grouping, producer));
}
final SingleOutputStreamOperator<?> outputStream = createOutput(boltId, userBolt, inputStreams);
if (common.is_set_parallelism_hint()) {
int dop = common.get_parallelism_hint();
outputStream.setParallelism(dop);
} else {
common.set_parallelism_hint(1);
}
}
}
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class FlinkTopology method createOutput.
@SuppressWarnings({ "unchecked", "rawtypes" })
private SingleOutputStreamOperator<?> createOutput(String boltId, IRichBolt bolt, Map<GlobalStreamId, DataStream<Tuple>> inputStreams) {
assert (boltId != null);
assert (bolt != null);
assert (inputStreams != null);
Iterator<Entry<GlobalStreamId, DataStream<Tuple>>> iterator = inputStreams.entrySet().iterator();
Entry<GlobalStreamId, DataStream<Tuple>> input1 = iterator.next();
GlobalStreamId streamId1 = input1.getKey();
String inputStreamId1 = streamId1.get_streamId();
String inputComponentId1 = streamId1.get_componentId();
Fields inputSchema1 = this.outputStreams.get(inputComponentId1).get(inputStreamId1);
DataStream<Tuple> singleInputStream = input1.getValue();
DataStream<StormTuple<Tuple>> mergedInputStream = null;
while (iterator.hasNext()) {
Entry<GlobalStreamId, DataStream<Tuple>> input2 = iterator.next();
GlobalStreamId streamId2 = input2.getKey();
DataStream<Tuple> inputStream2 = input2.getValue();
if (mergedInputStream == null) {
mergedInputStream = singleInputStream.connect(inputStream2).flatMap(new TwoFlinkStreamsMerger(streamId1, inputSchema1, streamId2, this.outputStreams.get(streamId2.get_componentId()).get(streamId2.get_streamId()))).returns(StormTuple.class);
} else {
mergedInputStream = mergedInputStream.connect(inputStream2).flatMap(new StormFlinkStreamMerger(streamId2, this.outputStreams.get(streamId2.get_componentId()).get(streamId2.get_streamId()))).returns(StormTuple.class);
}
}
final HashMap<String, Fields> boltOutputs = this.outputStreams.get(boltId);
final FlinkOutputFieldsDeclarer declarer = this.declarers.get(boltId);
final SingleOutputStreamOperator<?> outputStream;
if (boltOutputs.size() < 2) {
// single output stream or sink
String outputStreamId;
if (boltOutputs.size() == 1) {
outputStreamId = (String) boltOutputs.keySet().toArray()[0];
} else {
outputStreamId = null;
}
final TypeInformation<Tuple> outType = declarer.getOutputType(outputStreamId);
final SingleOutputStreamOperator<Tuple> outStream;
// only one input
if (inputStreams.entrySet().size() == 1) {
BoltWrapper<Tuple, Tuple> boltWrapper = new BoltWrapper<>(bolt, boltId, inputStreamId1, inputComponentId1, inputSchema1, null);
boltWrapper.setStormTopology(stormTopology);
outStream = singleInputStream.transform(boltId, outType, boltWrapper);
} else {
MergedInputsBoltWrapper<Tuple, Tuple> boltWrapper = new MergedInputsBoltWrapper<Tuple, Tuple>(bolt, boltId, null);
boltWrapper.setStormTopology(stormTopology);
outStream = mergedInputStream.transform(boltId, outType, boltWrapper);
}
if (outType != null) {
// only for non-sink nodes
final HashMap<String, DataStream<Tuple>> op = new HashMap<>();
op.put(outputStreamId, outStream);
availableInputs.put(boltId, op);
}
outputStream = outStream;
} else {
final TypeInformation<SplitStreamType<Tuple>> outType = (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class);
final SingleOutputStreamOperator<SplitStreamType<Tuple>> multiStream;
// only one input
if (inputStreams.entrySet().size() == 1) {
final BoltWrapper<Tuple, SplitStreamType<Tuple>> boltWrapperMultipleOutputs = new BoltWrapper<>(bolt, boltId, inputStreamId1, inputComponentId1, inputSchema1, null);
boltWrapperMultipleOutputs.setStormTopology(stormTopology);
multiStream = singleInputStream.transform(boltId, outType, boltWrapperMultipleOutputs);
} else {
final MergedInputsBoltWrapper<Tuple, SplitStreamType<Tuple>> boltWrapperMultipleOutputs = new MergedInputsBoltWrapper<Tuple, SplitStreamType<Tuple>>(bolt, boltId, null);
boltWrapperMultipleOutputs.setStormTopology(stormTopology);
multiStream = mergedInputStream.transform(boltId, outType, boltWrapperMultipleOutputs);
}
final SplitStream<SplitStreamType<Tuple>> splitStream = multiStream.split(new StormStreamSelector<Tuple>());
final HashMap<String, DataStream<Tuple>> op = new HashMap<>();
for (String outputStreamId : boltOutputs.keySet()) {
op.put(outputStreamId, splitStream.select(outputStreamId).map(new SplitStreamMapper<Tuple>()));
SingleOutputStreamOperator<Tuple> outStream = splitStream.select(outputStreamId).map(new SplitStreamMapper<Tuple>());
outStream.getTransformation().setOutputType(declarer.getOutputType(outputStreamId));
op.put(outputStreamId, outStream);
}
availableInputs.put(boltId, op);
outputStream = multiStream;
}
return outputStream;
}
use of org.apache.flink.streaming.api.datastream.DataStream in project camel by apache.
the class DataStreamFlinkProducer method process.
@Override
public void process(Exchange exchange) throws Exception {
DataStream ds = resolveDataStream(exchange);
DataStreamCallback dataStreamCallback = resolveDataStreamCallback(exchange);
Object body = exchange.getIn().getBody();
Object result = body instanceof List ? dataStreamCallback.onDataStream(ds, ((List) body).toArray(new Object[0])) : dataStreamCallback.onDataStream(ds, body);
collectResults(exchange, result);
}
use of org.apache.flink.streaming.api.datastream.DataStream in project camel by apache.
the class FlinkProducerTest method shouldExecuteVoidDataStreamCallback.
@Test
public void shouldExecuteVoidDataStreamCallback() throws IOException {
final File output = File.createTempFile("camel", "flink");
output.delete();
template.sendBodyAndHeader(flinkDataStreamUri, null, FlinkConstants.FLINK_DATASTREAM_CALLBACK_HEADER, new VoidDataStreamCallback() {
@Override
public void doOnDataStream(DataStream ds, Object... payloads) throws Exception {
ds.writeAsText(output.getAbsolutePath());
}
});
Truth.assertThat(output.length()).isAtLeast(0L);
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class KafkaConsumerTestBase method runCollectingSchemaTest.
/**
* Test that ensures that DeserializationSchema can emit multiple records via a Collector.
*
* @throws Exception
*/
public void runCollectingSchemaTest() throws Exception {
final int elementCount = 20;
final String topic = writeSequence("testCollectingSchema", elementCount, 1, 1);
// read using custom schema
final StreamExecutionEnvironment env1 = StreamExecutionEnvironment.getExecutionEnvironment();
env1.setParallelism(1);
env1.getConfig().setRestartStrategy(RestartStrategies.noRestart());
Properties props = new Properties();
props.putAll(standardProps);
props.putAll(secureProps);
DataStream<Tuple2<Integer, String>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, new CollectingDeserializationSchema(elementCount), props).assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Tuple2<Integer, String>>() {
@Override
public long extractAscendingTimestamp(Tuple2<Integer, String> element) {
String string = element.f1;
return Long.parseLong(string.substring(0, string.length() - 1));
}
}));
fromKafka.keyBy(t -> t.f0).process(new KeyedProcessFunction<Integer, Tuple2<Integer, String>, Void>() {
private boolean registered = false;
@Override
public void processElement(Tuple2<Integer, String> value, Context ctx, Collector<Void> out) throws Exception {
if (!registered) {
ctx.timerService().registerEventTimeTimer(elementCount - 2);
registered = true;
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Void> out) throws Exception {
throw new SuccessException();
}
});
tryExecute(env1, "Consume " + elementCount + " elements from Kafka");
deleteTestTopic(topic);
}
Aggregations