use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class ElasticsearchSinkTestBase method runTransportClientTest.
/**
* Tests that the Elasticsearch sink works properly using a {@link TransportClient}.
*/
public void runTransportClientTest() throws Exception {
final String index = "transport-client-test-index";
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());
Map<String, String> userConfig = new HashMap<>();
// This instructs the sink to emit after every element, otherwise they would be buffered
userConfig.put(ElasticsearchSinkBase.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
userConfig.put("cluster.name", CLUSTER_NAME);
source.addSink(createElasticsearchSinkForEmbeddedNode(userConfig, new SourceSinkDataTestKit.TestElasticsearchSinkFunction(index)));
env.execute("Elasticsearch TransportClient Test");
// verify the results
Client client = embeddedNodeEnv.getClient();
SourceSinkDataTestKit.verifyProducedSinkData(client, index);
client.close();
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class ElasticsearchSinkITCase method testDeprecatedIndexRequestBuilderVariant.
/**
* Tests that behaviour of the deprecated {@link IndexRequestBuilder} constructor works properly.
*/
@Test
public void testDeprecatedIndexRequestBuilderVariant() throws Exception {
final String index = "index-req-builder-test-index";
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());
Map<String, String> userConfig = new HashMap<>();
// This instructs the sink to emit after every element, otherwise they would be buffered
userConfig.put(ElasticsearchSinkBase.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
userConfig.put("cluster.name", CLUSTER_NAME);
userConfig.put("node.local", "true");
List<TransportAddress> transports = Lists.newArrayList();
transports.add(new LocalTransportAddress("1"));
source.addSink(new ElasticsearchSink<>(userConfig, transports, new TestIndexRequestBuilder(index)));
env.execute("Elasticsearch Deprecated IndexRequestBuilder Bridge Test");
// verify the results
Client client = embeddedNodeEnv.getClient();
SourceSinkDataTestKit.verifyProducedSinkData(client, index);
client.close();
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class WordCount method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = getTextDataSet(env);
DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new Tokenizer()).groupBy(0).aggregate(Aggregations.SUM, 1);
// emit result
if (fileOutput) {
counts.writeAsCsv(outputPath, "\n", " ");
// execute program
env.execute("WordCount Example");
} else {
counts.print();
}
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class RollingSinkITCase method testNonRollingStringWriter.
/**
* This tests {@link StringWriter} with
* non-rolling output.
*/
@Test
public void testNonRollingStringWriter() throws Exception {
final int NUM_ELEMENTS = 20;
final int PARALLELISM = 2;
final String outPath = hdfsURI + "/string-non-rolling-out";
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(PARALLELISM);
DataStream<Tuple2<Integer, String>> source = env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());
RollingSink<String> sink = new RollingSink<String>(outPath).setBucketer(new NonRollingBucketer()).setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");
source.map(new MapFunction<Tuple2<Integer, String>, String>() {
private static final long serialVersionUID = 1L;
@Override
public String map(Tuple2<Integer, String> value) throws Exception {
return value.f1;
}
}).addSink(sink);
env.execute("RollingSink String Write Test");
FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));
BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
for (int i = 0; i < NUM_ELEMENTS; i += 2) {
String line = br.readLine();
Assert.assertEquals("message #" + i, line);
}
inStream.close();
inStream = dfs.open(new Path(outPath + "/part-1-0"));
br = new BufferedReader(new InputStreamReader(inStream));
for (int i = 1; i < NUM_ELEMENTS; i += 2) {
String line = br.readLine();
Assert.assertEquals("message #" + i, line);
}
inStream.close();
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class RollingSinkITCase method testDateTimeRollingStringWriter.
/**
* This uses {@link org.apache.flink.streaming.connectors.fs.DateTimeBucketer} to
* produce rolling files. The clock of DateTimeBucketer is set to
* {@link ModifyableClock} to keep the time in lockstep with the processing of elements using
* latches.
*/
@Test
public void testDateTimeRollingStringWriter() throws Exception {
final int NUM_ELEMENTS = 20;
final int PARALLELISM = 2;
final String outPath = hdfsURI + "/rolling-out";
DateTimeBucketer.setClock(new ModifyableClock());
ModifyableClock.setCurrentTime(0);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(PARALLELISM);
DataStream<Tuple2<Integer, String>> source = env.addSource(new WaitingTestSourceFunction(NUM_ELEMENTS)).broadcast();
// the parallel flatMap is chained to the sink, so when it has seen 5 elements it can
// fire the latch
DataStream<String> mapped = source.flatMap(new RichFlatMapFunction<Tuple2<Integer, String>, String>() {
private static final long serialVersionUID = 1L;
int count = 0;
@Override
public void flatMap(Tuple2<Integer, String> value, Collector<String> out) throws Exception {
out.collect(value.f1);
count++;
if (count >= 5) {
if (getRuntimeContext().getIndexOfThisSubtask() == 0) {
latch1.trigger();
} else {
latch2.trigger();
}
count = 0;
}
}
});
RollingSink<String> sink = new RollingSink<String>(outPath).setBucketer(new DateTimeBucketer("ss")).setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");
mapped.addSink(sink);
env.execute("RollingSink String Write Test");
RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);
// we should have 8 rolling files, 4 time intervals and parallelism of 2
int numFiles = 0;
while (files.hasNext()) {
LocatedFileStatus file = files.next();
numFiles++;
if (file.getPath().toString().contains("rolling-out/00")) {
FSDataInputStream inStream = dfs.open(file.getPath());
BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
for (int i = 0; i < 5; i++) {
String line = br.readLine();
Assert.assertEquals("message #" + i, line);
}
inStream.close();
} else if (file.getPath().toString().contains("rolling-out/05")) {
FSDataInputStream inStream = dfs.open(file.getPath());
BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
for (int i = 5; i < 10; i++) {
String line = br.readLine();
Assert.assertEquals("message #" + i, line);
}
inStream.close();
} else if (file.getPath().toString().contains("rolling-out/10")) {
FSDataInputStream inStream = dfs.open(file.getPath());
BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
for (int i = 10; i < 15; i++) {
String line = br.readLine();
Assert.assertEquals("message #" + i, line);
}
inStream.close();
} else if (file.getPath().toString().contains("rolling-out/15")) {
FSDataInputStream inStream = dfs.open(file.getPath());
BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
for (int i = 15; i < 20; i++) {
String line = br.readLine();
Assert.assertEquals("message #" + i, line);
}
inStream.close();
} else {
Assert.fail("File " + file + " does not match any expected roll pattern.");
}
}
Assert.assertEquals(8, numFiles);
}
Aggregations