use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class WikipediaEditsSourceTest method testWikipediaEditsSource.
/**
* NOTE: if you are behind a firewall you may need to use a SOCKS Proxy for this test.
*
* We first check the connection to the IRC server. If it fails, this test
* is effectively ignored.
*
* @see <a href="http://docs.oracle.com/javase/8/docs/technotes/guides/net/proxies.html">Socks Proxy</a>
*/
@Test(timeout = 120 * 1000)
public void testWikipediaEditsSource() throws Exception {
final int numRetries = 5;
final int waitBetweenRetriesMillis = 2000;
final int connectTimeout = 1000;
boolean success = false;
for (int i = 0; i < numRetries && !success; i++) {
// Check connection
boolean canConnect = false;
String host = WikipediaEditsSource.DEFAULT_HOST;
int port = WikipediaEditsSource.DEFAULT_PORT;
try (Socket s = new Socket()) {
s.connect(new InetSocketAddress(host, port), connectTimeout);
canConnect = s.isConnected();
} catch (Throwable ignored) {
}
if (canConnect) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getConfig().disableSysoutLogging();
DataStream<WikipediaEditEvent> edits = env.addSource(new WikipediaEditsSource());
edits.addSink(new SinkFunction<WikipediaEditEvent>() {
@Override
public void invoke(WikipediaEditEvent value) throws Exception {
throw new Exception("Expected test exception");
}
});
try {
env.execute();
fail("Did not throw expected Exception.");
} catch (Exception e) {
assertNotNull(e.getCause());
assertEquals("Expected test exception", e.getCause().getMessage());
}
success = true;
} else {
LOG.info("Failed to connect to IRC server ({}/{}). Retrying in {} ms.", i + 1, numRetries, waitBetweenRetriesMillis);
Thread.sleep(waitBetweenRetriesMillis);
}
}
if (success) {
LOG.info("Successfully ran test.");
} else {
LOG.info("Skipped test, because not able to connect to IRC server.");
}
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class SpoutSplitExample method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(final String[] args) throws Exception {
boolean useFile = SpoutSplitExample.parseParameters(args);
// set up the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
String[] rawOutputs = new String[] { RandomSpout.EVEN_STREAM, RandomSpout.ODD_STREAM };
final DataStream<SplitStreamType<Integer>> numbers = env.addSource(new SpoutWrapper<SplitStreamType<Integer>>(new RandomSpout(true, seed), rawOutputs, 1000), TypeExtractor.getForObject(new SplitStreamType<Integer>()));
SplitStream<SplitStreamType<Integer>> splitStream = numbers.split(new StormStreamSelector<Integer>());
DataStream<SplitStreamType<Integer>> evenStream = splitStream.select(RandomSpout.EVEN_STREAM);
DataStream<SplitStreamType<Integer>> oddStream = splitStream.select(RandomSpout.ODD_STREAM);
DataStream<Tuple2<String, Integer>> evenResult = evenStream.map(new SplitStreamMapper<Integer>()).returns(Integer.class).map(new Enrich(true));
DataStream<Tuple2<String, Integer>> oddResult = oddStream.map(new SplitStreamMapper<Integer>()).transform("oddBolt", TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)), new BoltWrapper<Integer, Tuple2<String, Integer>>(new VerifyAndEnrichBolt(false)));
if (useFile) {
evenResult.writeAsText(outputPath + "/even");
oddResult.writeAsText(outputPath + "/odd");
} else {
evenResult.print();
oddResult.print();
}
// execute program
env.execute("Spout split stream example");
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class BoltTokenizerWordCountPojo method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(final String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
// set up the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data
final DataStream<Sentence> text = getTextDataStream(env);
final DataStream<Tuple2<String, Integer>> counts = text.transform("BoltTokenizerPojo", TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)), new BoltWrapper<Sentence, Tuple2<String, Integer>>(new BoltTokenizerByName())).keyBy(0).sum(1);
// emit result
if (fileOutput) {
counts.writeAsText(outputPath);
} else {
counts.print();
}
// execute program
env.execute("Streaming WordCount with POJO bolt tokenizer");
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class BoltTokenizerWordCountWithNames method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(final String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
// set up the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data
final DataStream<Tuple1<String>> text = getTextDataStream(env);
final DataStream<Tuple2<String, Integer>> counts = text.transform("BoltTokenizerWithNames", TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)), new BoltWrapper<Tuple1<String>, Tuple2<String, Integer>>(new BoltTokenizerByName(), new Fields("sentence"))).keyBy(0).sum(1);
// emit result
if (fileOutput) {
counts.writeAsText(outputPath);
} else {
counts.print();
}
// execute program
env.execute("Streaming WordCount with schema bolt tokenizer");
}
use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.
the class DataStreamUtils method collect.
/**
* Returns an iterator to iterate over the elements of the DataStream.
* @return The iterator
*/
public static <OUT> Iterator<OUT> collect(DataStream<OUT> stream) throws IOException {
TypeSerializer<OUT> serializer = stream.getType().createSerializer(stream.getExecutionEnvironment().getConfig());
SocketStreamIterator<OUT> iter = new SocketStreamIterator<OUT>(serializer);
//Find out what IP of us should be given to CollectSink, that it will be able to connect to
StreamExecutionEnvironment env = stream.getExecutionEnvironment();
InetAddress clientAddress;
if (env instanceof RemoteStreamEnvironment) {
String host = ((RemoteStreamEnvironment) env).getHost();
int port = ((RemoteStreamEnvironment) env).getPort();
try {
clientAddress = ConnectionUtils.findConnectingAddress(new InetSocketAddress(host, port), 2000, 400);
} catch (Exception e) {
throw new IOException("Could not determine an suitable network address to " + "receive back data from the streaming program.", e);
}
} else if (env instanceof LocalStreamEnvironment) {
clientAddress = InetAddress.getLoopbackAddress();
} else {
try {
clientAddress = InetAddress.getLocalHost();
} catch (UnknownHostException e) {
throw new IOException("Could not determine this machines own local address to " + "receive back data from the streaming program.", e);
}
}
DataStreamSink<OUT> sink = stream.addSink(new CollectSink<OUT>(clientAddress, iter.getPort(), serializer));
// It would not work if multiple instances would connect to the same port
sink.setParallelism(1);
(new CallExecute(env, iter)).start();
return iter;
}
Aggregations