Search in sources :

Example 41 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class WikipediaEditsSourceTest method testWikipediaEditsSource.

/**
	 * NOTE: if you are behind a firewall you may need to use a SOCKS Proxy for this test.
	 *
	 * We first check the connection to the IRC server. If it fails, this test
	 * is effectively ignored.
	 *
	 * @see <a href="http://docs.oracle.com/javase/8/docs/technotes/guides/net/proxies.html">Socks Proxy</a>
	 */
@Test(timeout = 120 * 1000)
public void testWikipediaEditsSource() throws Exception {
    final int numRetries = 5;
    final int waitBetweenRetriesMillis = 2000;
    final int connectTimeout = 1000;
    boolean success = false;
    for (int i = 0; i < numRetries && !success; i++) {
        // Check connection
        boolean canConnect = false;
        String host = WikipediaEditsSource.DEFAULT_HOST;
        int port = WikipediaEditsSource.DEFAULT_PORT;
        try (Socket s = new Socket()) {
            s.connect(new InetSocketAddress(host, port), connectTimeout);
            canConnect = s.isConnected();
        } catch (Throwable ignored) {
        }
        if (canConnect) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
            env.getConfig().disableSysoutLogging();
            DataStream<WikipediaEditEvent> edits = env.addSource(new WikipediaEditsSource());
            edits.addSink(new SinkFunction<WikipediaEditEvent>() {

                @Override
                public void invoke(WikipediaEditEvent value) throws Exception {
                    throw new Exception("Expected test exception");
                }
            });
            try {
                env.execute();
                fail("Did not throw expected Exception.");
            } catch (Exception e) {
                assertNotNull(e.getCause());
                assertEquals("Expected test exception", e.getCause().getMessage());
            }
            success = true;
        } else {
            LOG.info("Failed to connect to IRC server ({}/{}). Retrying in {} ms.", i + 1, numRetries, waitBetweenRetriesMillis);
            Thread.sleep(waitBetweenRetriesMillis);
        }
    }
    if (success) {
        LOG.info("Successfully ran test.");
    } else {
        LOG.info("Skipped test, because not able to connect to IRC server.");
    }
}
Also used : InetSocketAddress(java.net.InetSocketAddress) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Socket(java.net.Socket) Test(org.junit.Test)

Example 42 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class SpoutSplitExample method main.

// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(final String[] args) throws Exception {
    boolean useFile = SpoutSplitExample.parseParameters(args);
    // set up the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    String[] rawOutputs = new String[] { RandomSpout.EVEN_STREAM, RandomSpout.ODD_STREAM };
    final DataStream<SplitStreamType<Integer>> numbers = env.addSource(new SpoutWrapper<SplitStreamType<Integer>>(new RandomSpout(true, seed), rawOutputs, 1000), TypeExtractor.getForObject(new SplitStreamType<Integer>()));
    SplitStream<SplitStreamType<Integer>> splitStream = numbers.split(new StormStreamSelector<Integer>());
    DataStream<SplitStreamType<Integer>> evenStream = splitStream.select(RandomSpout.EVEN_STREAM);
    DataStream<SplitStreamType<Integer>> oddStream = splitStream.select(RandomSpout.ODD_STREAM);
    DataStream<Tuple2<String, Integer>> evenResult = evenStream.map(new SplitStreamMapper<Integer>()).returns(Integer.class).map(new Enrich(true));
    DataStream<Tuple2<String, Integer>> oddResult = oddStream.map(new SplitStreamMapper<Integer>()).transform("oddBolt", TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)), new BoltWrapper<Integer, Tuple2<String, Integer>>(new VerifyAndEnrichBolt(false)));
    if (useFile) {
        evenResult.writeAsText(outputPath + "/even");
        oddResult.writeAsText(outputPath + "/odd");
    } else {
        evenResult.print();
        oddResult.print();
    }
    // execute program
    env.execute("Spout split stream example");
}
Also used : RandomSpout(org.apache.flink.storm.split.operators.RandomSpout) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SplitStreamMapper(org.apache.flink.storm.util.SplitStreamMapper) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) VerifyAndEnrichBolt(org.apache.flink.storm.split.operators.VerifyAndEnrichBolt) SplitStreamType(org.apache.flink.storm.util.SplitStreamType)

Example 43 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class BoltTokenizerWordCountPojo method main.

// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(final String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    // set up the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // get input data
    final DataStream<Sentence> text = getTextDataStream(env);
    final DataStream<Tuple2<String, Integer>> counts = text.transform("BoltTokenizerPojo", TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)), new BoltWrapper<Sentence, Tuple2<String, Integer>>(new BoltTokenizerByName())).keyBy(0).sum(1);
    // emit result
    if (fileOutput) {
        counts.writeAsText(outputPath);
    } else {
        counts.print();
    }
    // execute program
    env.execute("Streaming WordCount with POJO bolt tokenizer");
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) BoltTokenizerByName(org.apache.flink.storm.wordcount.operators.BoltTokenizerByName) Sentence(org.apache.flink.storm.wordcount.operators.WordCountDataPojos.Sentence)

Example 44 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class BoltTokenizerWordCountWithNames method main.

// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(final String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    // set up the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // get input data
    final DataStream<Tuple1<String>> text = getTextDataStream(env);
    final DataStream<Tuple2<String, Integer>> counts = text.transform("BoltTokenizerWithNames", TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)), new BoltWrapper<Tuple1<String>, Tuple2<String, Integer>>(new BoltTokenizerByName(), new Fields("sentence"))).keyBy(0).sum(1);
    // emit result
    if (fileOutput) {
        counts.writeAsText(outputPath);
    } else {
        counts.print();
    }
    // execute program
    env.execute("Streaming WordCount with schema bolt tokenizer");
}
Also used : Fields(org.apache.storm.tuple.Fields) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) BoltTokenizerByName(org.apache.flink.storm.wordcount.operators.BoltTokenizerByName)

Example 45 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class DataStreamUtils method collect.

/**
	 * Returns an iterator to iterate over the elements of the DataStream.
	 * @return The iterator
	 */
public static <OUT> Iterator<OUT> collect(DataStream<OUT> stream) throws IOException {
    TypeSerializer<OUT> serializer = stream.getType().createSerializer(stream.getExecutionEnvironment().getConfig());
    SocketStreamIterator<OUT> iter = new SocketStreamIterator<OUT>(serializer);
    //Find out what IP of us should be given to CollectSink, that it will be able to connect to
    StreamExecutionEnvironment env = stream.getExecutionEnvironment();
    InetAddress clientAddress;
    if (env instanceof RemoteStreamEnvironment) {
        String host = ((RemoteStreamEnvironment) env).getHost();
        int port = ((RemoteStreamEnvironment) env).getPort();
        try {
            clientAddress = ConnectionUtils.findConnectingAddress(new InetSocketAddress(host, port), 2000, 400);
        } catch (Exception e) {
            throw new IOException("Could not determine an suitable network address to " + "receive back data from the streaming program.", e);
        }
    } else if (env instanceof LocalStreamEnvironment) {
        clientAddress = InetAddress.getLoopbackAddress();
    } else {
        try {
            clientAddress = InetAddress.getLocalHost();
        } catch (UnknownHostException e) {
            throw new IOException("Could not determine this machines own local address to " + "receive back data from the streaming program.", e);
        }
    }
    DataStreamSink<OUT> sink = stream.addSink(new CollectSink<OUT>(clientAddress, iter.getPort(), serializer));
    // It would not work if multiple instances would connect to the same port
    sink.setParallelism(1);
    (new CallExecute(env, iter)).start();
    return iter;
}
Also used : LocalStreamEnvironment(org.apache.flink.streaming.api.environment.LocalStreamEnvironment) RemoteStreamEnvironment(org.apache.flink.streaming.api.environment.RemoteStreamEnvironment) UnknownHostException(java.net.UnknownHostException) InetSocketAddress(java.net.InetSocketAddress) IOException(java.io.IOException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) InetAddress(java.net.InetAddress)

Aggregations

StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)383 Test (org.junit.Test)286 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)192 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)81 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)75 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)48 EventTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.EventTimeTrigger)42 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)34 Properties (java.util.Properties)32 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)31 TumblingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows)30 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)27 SuccessException (org.apache.flink.test.util.SuccessException)27 IOException (java.io.IOException)24 Configuration (org.apache.flink.configuration.Configuration)24 SlidingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows)24 KeySelector (org.apache.flink.api.java.functions.KeySelector)22 ProcessingTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.ProcessingTimeTrigger)21 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)20 MapFunction (org.apache.flink.api.common.functions.MapFunction)19