use of org.apache.flink.test.testfunctions.Tokenizer in project flink by apache.
the class TextOutputFormatITCase method testProgram.
@Override
protected void testProgram() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<String> text = env.fromElements(WordCountData.TEXT);
DataStream<Tuple2<String, Integer>> counts = text.flatMap(new Tokenizer()).keyBy(0).sum(1);
counts.writeAsText(resultPath);
env.execute("WriteAsTextTest");
}
use of org.apache.flink.test.testfunctions.Tokenizer in project flink by apache.
the class WordCountMapredITCase method internalRun.
private void internalRun(boolean isTestDeprecatedAPI) throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<LongWritable, Text>> input;
if (isTestDeprecatedAPI) {
input = env.readHadoopFile(new TextInputFormat(), LongWritable.class, Text.class, textPath);
} else {
input = env.createInput(readHadoopFile(new TextInputFormat(), LongWritable.class, Text.class, textPath));
}
DataSet<String> text = input.map(new MapFunction<Tuple2<LongWritable, Text>, String>() {
@Override
public String map(Tuple2<LongWritable, Text> value) throws Exception {
return value.f1.toString();
}
});
DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new Tokenizer()).groupBy(0).sum(1);
DataSet<Tuple2<Text, LongWritable>> words = counts.map(new MapFunction<Tuple2<String, Integer>, Tuple2<Text, LongWritable>>() {
@Override
public Tuple2<Text, LongWritable> map(Tuple2<String, Integer> value) throws Exception {
return new Tuple2<Text, LongWritable>(new Text(value.f0), new LongWritable(value.f1));
}
});
// Set up Hadoop Output Format
HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), new JobConf());
hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
TextOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path(resultPath));
// Output & Execute
words.output(hadoopOutputFormat);
env.execute("Hadoop Compat WordCount");
}
use of org.apache.flink.test.testfunctions.Tokenizer in project flink by apache.
the class WordCountMapreduceITCase method internalRun.
private void internalRun(boolean isTestDeprecatedAPI) throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<LongWritable, Text>> input;
if (isTestDeprecatedAPI) {
input = env.readHadoopFile(new TextInputFormat(), LongWritable.class, Text.class, textPath);
} else {
input = env.createInput(readHadoopFile(new TextInputFormat(), LongWritable.class, Text.class, textPath));
}
DataSet<String> text = input.map(new MapFunction<Tuple2<LongWritable, Text>, String>() {
@Override
public String map(Tuple2<LongWritable, Text> value) throws Exception {
return value.f1.toString();
}
});
DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new Tokenizer()).groupBy(0).sum(1);
DataSet<Tuple2<Text, LongWritable>> words = counts.map(new MapFunction<Tuple2<String, Integer>, Tuple2<Text, LongWritable>>() {
@Override
public Tuple2<Text, LongWritable> map(Tuple2<String, Integer> value) throws Exception {
return new Tuple2<Text, LongWritable>(new Text(value.f0), new LongWritable(value.f1));
}
});
// Set up Hadoop Output Format
Job job = Job.getInstance();
HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), job);
job.getConfiguration().set("mapred.textoutputformat.separator", " ");
TextOutputFormat.setOutputPath(job, new Path(resultPath));
// Output & Execute
words.output(hadoopOutputFormat);
env.execute("Hadoop Compat WordCount");
}
use of org.apache.flink.test.testfunctions.Tokenizer in project flink by apache.
the class LocalExecutorITCase method getWordCountPlan.
private Plan getWordCountPlan(File inFile, File outFile, int parallelism) {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
env.readTextFile(inFile.getAbsolutePath()).flatMap(new Tokenizer()).groupBy(0).sum(1).writeAsCsv(outFile.getAbsolutePath());
return env.createProgramPlan();
}
use of org.apache.flink.test.testfunctions.Tokenizer in project flink by apache.
the class CsvOutputFormatITCase method testProgram.
@Override
protected void testProgram() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<String> text = env.fromElements(WordCountData.TEXT);
DataStream<Tuple2<String, Integer>> counts = text.flatMap(new Tokenizer()).keyBy(0).sum(1);
counts.writeAsCsv(resultPath);
env.execute("WriteAsCsvTest");
}
Aggregations