Search in sources :

Example 66 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class ElasticsearchSinkTestBase method runTransportClientTest.

/**
	 * Tests that the Elasticsearch sink works properly using a {@link TransportClient}.
	 */
public void runTransportClientTest() throws Exception {
    final String index = "transport-client-test-index";
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());
    Map<String, String> userConfig = new HashMap<>();
    // This instructs the sink to emit after every element, otherwise they would be buffered
    userConfig.put(ElasticsearchSinkBase.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
    userConfig.put("cluster.name", CLUSTER_NAME);
    source.addSink(createElasticsearchSinkForEmbeddedNode(userConfig, new SourceSinkDataTestKit.TestElasticsearchSinkFunction(index)));
    env.execute("Elasticsearch TransportClient Test");
    // verify the results
    Client client = embeddedNodeEnv.getClient();
    SourceSinkDataTestKit.verifyProducedSinkData(client, index);
    client.close();
}
Also used : HashMap(java.util.HashMap) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourceSinkDataTestKit(org.apache.flink.streaming.connectors.elasticsearch.testutils.SourceSinkDataTestKit) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Client(org.elasticsearch.client.Client) TransportClient(org.elasticsearch.client.transport.TransportClient)

Example 67 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class ElasticsearchSinkITCase method testDeprecatedIndexRequestBuilderVariant.

/**
	 * Tests that behaviour of the deprecated {@link IndexRequestBuilder} constructor works properly.
	 */
@Test
public void testDeprecatedIndexRequestBuilderVariant() throws Exception {
    final String index = "index-req-builder-test-index";
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());
    Map<String, String> userConfig = new HashMap<>();
    // This instructs the sink to emit after every element, otherwise they would be buffered
    userConfig.put(ElasticsearchSinkBase.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
    userConfig.put("cluster.name", CLUSTER_NAME);
    userConfig.put("node.local", "true");
    List<TransportAddress> transports = Lists.newArrayList();
    transports.add(new LocalTransportAddress("1"));
    source.addSink(new ElasticsearchSink<>(userConfig, transports, new TestIndexRequestBuilder(index)));
    env.execute("Elasticsearch Deprecated IndexRequestBuilder Bridge Test");
    // verify the results
    Client client = embeddedNodeEnv.getClient();
    SourceSinkDataTestKit.verifyProducedSinkData(client, index);
    client.close();
}
Also used : LocalTransportAddress(org.elasticsearch.common.transport.LocalTransportAddress) HashMap(java.util.HashMap) TransportAddress(org.elasticsearch.common.transport.TransportAddress) LocalTransportAddress(org.elasticsearch.common.transport.LocalTransportAddress) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourceSinkDataTestKit(org.apache.flink.streaming.connectors.elasticsearch.testutils.SourceSinkDataTestKit) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Client(org.elasticsearch.client.Client) Test(org.junit.Test)

Example 68 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class WordCount method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // get input data
    DataSet<String> text = getTextDataSet(env);
    DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1)
    text.flatMap(new Tokenizer()).groupBy(0).aggregate(Aggregations.SUM, 1);
    // emit result
    if (fileOutput) {
        counts.writeAsCsv(outputPath, "\n", " ");
        // execute program
        env.execute("WordCount Example");
    } else {
        counts.print();
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2)

Example 69 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class RollingSinkITCase method testNonRollingStringWriter.

/**
	 * This tests {@link StringWriter} with
	 * non-rolling output.
	 */
@Test
public void testNonRollingStringWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/string-non-rolling-out";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);
    DataStream<Tuple2<Integer, String>> source = env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());
    RollingSink<String> sink = new RollingSink<String>(outPath).setBucketer(new NonRollingBucketer()).setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");
    source.map(new MapFunction<Tuple2<Integer, String>, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map(Tuple2<Integer, String> value) throws Exception {
            return value.f1;
        }
    }).addSink(sink);
    env.execute("RollingSink String Write Test");
    FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));
    BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
    for (int i = 0; i < NUM_ELEMENTS; i += 2) {
        String line = br.readLine();
        Assert.assertEquals("message #" + i, line);
    }
    inStream.close();
    inStream = dfs.open(new Path(outPath + "/part-1-0"));
    br = new BufferedReader(new InputStreamReader(inStream));
    for (int i = 1; i < NUM_ELEMENTS; i += 2) {
        String line = br.readLine();
        Assert.assertEquals("message #" + i, line);
    }
    inStream.close();
}
Also used : Path(org.apache.hadoop.fs.Path) InputStreamReader(java.io.InputStreamReader) MapFunction(org.apache.flink.api.common.functions.MapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BufferedReader(java.io.BufferedReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 70 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class RollingSinkITCase method testDateTimeRollingStringWriter.

/**
	 * This uses {@link org.apache.flink.streaming.connectors.fs.DateTimeBucketer} to
	 * produce rolling files. The clock of DateTimeBucketer is set to
	 * {@link ModifyableClock} to keep the time in lockstep with the processing of elements using
	 * latches.
	 */
@Test
public void testDateTimeRollingStringWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/rolling-out";
    DateTimeBucketer.setClock(new ModifyableClock());
    ModifyableClock.setCurrentTime(0);
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);
    DataStream<Tuple2<Integer, String>> source = env.addSource(new WaitingTestSourceFunction(NUM_ELEMENTS)).broadcast();
    // the parallel flatMap is chained to the sink, so when it has seen 5 elements it can
    // fire the latch
    DataStream<String> mapped = source.flatMap(new RichFlatMapFunction<Tuple2<Integer, String>, String>() {

        private static final long serialVersionUID = 1L;

        int count = 0;

        @Override
        public void flatMap(Tuple2<Integer, String> value, Collector<String> out) throws Exception {
            out.collect(value.f1);
            count++;
            if (count >= 5) {
                if (getRuntimeContext().getIndexOfThisSubtask() == 0) {
                    latch1.trigger();
                } else {
                    latch2.trigger();
                }
                count = 0;
            }
        }
    });
    RollingSink<String> sink = new RollingSink<String>(outPath).setBucketer(new DateTimeBucketer("ss")).setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");
    mapped.addSink(sink);
    env.execute("RollingSink String Write Test");
    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);
    // we should have 8 rolling files, 4 time intervals and parallelism of 2
    int numFiles = 0;
    while (files.hasNext()) {
        LocatedFileStatus file = files.next();
        numFiles++;
        if (file.getPath().toString().contains("rolling-out/00")) {
            FSDataInputStream inStream = dfs.open(file.getPath());
            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
            for (int i = 0; i < 5; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }
            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/05")) {
            FSDataInputStream inStream = dfs.open(file.getPath());
            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
            for (int i = 5; i < 10; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }
            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/10")) {
            FSDataInputStream inStream = dfs.open(file.getPath());
            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
            for (int i = 10; i < 15; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }
            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/15")) {
            FSDataInputStream inStream = dfs.open(file.getPath());
            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
            for (int i = 15; i < 20; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }
            inStream.close();
        } else {
            Assert.fail("File " + file + " does not match any expected roll pattern.");
        }
    }
    Assert.assertEquals(8, numFiles);
}
Also used : Path(org.apache.hadoop.fs.Path) InputStreamReader(java.io.InputStreamReader) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BufferedReader(java.io.BufferedReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1159 Test (org.junit.Test)871 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)486 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)266 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)195 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)137 ArrayList (java.util.ArrayList)136 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)103 Plan (org.apache.flink.api.common.Plan)103 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)103 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)99 Configuration (org.apache.flink.configuration.Configuration)87 List (java.util.List)82 IOException (java.io.IOException)79 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)77 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)74 HashMap (java.util.HashMap)72 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)66 Collection (java.util.Collection)61 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)60