Search in sources :

Example 1 with TableOutputFormat

use of org.apache.hadoop.hbase.mapreduce.TableOutputFormat in project flink by apache.

the class HBaseWriteExample method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // get input data
    DataSet<String> text = getTextDataSet(env);
    DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1)
    text.flatMap(new Tokenizer()).groupBy(0).sum(1);
    // emit result
    Job job = Job.getInstance();
    job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, outputTableName);
    // TODO is "mapred.output.dir" really useful?
    job.getConfiguration().set("mapred.output.dir", HBaseFlinkTestConstants.TMP_DIR);
    counts.map(new RichMapFunction<Tuple2<String, Integer>, Tuple2<Text, Mutation>>() {

        private transient Tuple2<Text, Mutation> reuse;

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            reuse = new Tuple2<Text, Mutation>();
        }

        @Override
        public Tuple2<Text, Mutation> map(Tuple2<String, Integer> t) throws Exception {
            reuse.f0 = new Text(t.f0);
            Put put = new Put(t.f0.getBytes(ConfigConstants.DEFAULT_CHARSET));
            put.add(HBaseFlinkTestConstants.CF_SOME, HBaseFlinkTestConstants.Q_SOME, Bytes.toBytes(t.f1));
            reuse.f1 = put;
            return reuse;
        }
    }).output(new HadoopOutputFormat<Text, Mutation>(new TableOutputFormat<Text>(), job));
    // execute program
    env.execute("WordCount (HBase sink) Example");
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) Text(org.apache.hadoop.io.Text) Put(org.apache.hadoop.hbase.client.Put) TableOutputFormat(org.apache.hadoop.hbase.mapreduce.TableOutputFormat) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Mutation(org.apache.hadoop.hbase.client.Mutation) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)1 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 Configuration (org.apache.flink.configuration.Configuration)1 Mutation (org.apache.hadoop.hbase.client.Mutation)1 Put (org.apache.hadoop.hbase.client.Put)1 TableOutputFormat (org.apache.hadoop.hbase.mapreduce.TableOutputFormat)1 Text (org.apache.hadoop.io.Text)1 Job (org.apache.hadoop.mapreduce.Job)1