Search in sources :

Example 1 with ImmutableBytesWritable

use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hbase by apache.

the class HFileOutputFormat2 method configureIncrementalLoad.

static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(cls);
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }
    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName());
    if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
        // record this table name for creating writer by favored nodes
        LOG.info("bulkload locality sensitive enabled");
        conf.set(OUTPUT_TABLE_NAME_CONF_KEY, regionLocator.getName().getNameAsString());
    }
    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + regionLocator.getName());
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());
    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(conf, tableDescriptor);
    configureBloomType(tableDescriptor, conf);
    configureBlockSize(tableDescriptor, conf);
    configureDataBlockEncoding(tableDescriptor, conf);
    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
}
Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Configuration(org.apache.hadoop.conf.Configuration) Put(org.apache.hadoop.hbase.client.Put)

Example 2 with ImmutableBytesWritable

use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hbase by apache.

the class MultiHFileOutputFormat method createMultiHFileRecordWriter.

static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createMultiHFileRecordWriter(final TaskAttemptContext context) throws IOException {
    // Get the path of the output directory
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputDir.getFileSystem(conf);
    // Map of tables to writers
    final Map<ImmutableBytesWritable, RecordWriter<ImmutableBytesWritable, V>> tableWriters = new HashMap<>();
    return new RecordWriter<ImmutableBytesWritable, V>() {

        @Override
        public void write(ImmutableBytesWritable tableName, V cell) throws IOException, InterruptedException {
            RecordWriter<ImmutableBytesWritable, V> tableWriter = tableWriters.get(tableName);
            // if there is new table, verify that table directory exists
            if (tableWriter == null) {
                // using table name as directory name
                final Path tableOutputDir = new Path(outputDir, Bytes.toString(tableName.copyBytes()));
                fs.mkdirs(tableOutputDir);
                LOG.info("Writing Table '" + tableName.toString() + "' data into following directory" + tableOutputDir.toString());
                // Create writer for one specific table
                tableWriter = new HFileOutputFormat2.HFileRecordWriter<>(context, tableOutputDir);
                // Put table into map
                tableWriters.put(tableName, tableWriter);
            }
            // Write <Row, Cell> into tableWriter
            // in the original code, it does not use Row
            tableWriter.write(null, cell);
        }

        @Override
        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (RecordWriter<ImmutableBytesWritable, V> writer : tableWriters.values()) {
                writer.close(c);
            }
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) FileSystem(org.apache.hadoop.fs.FileSystem) HFileOutputFormat2(org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2)

Example 3 with ImmutableBytesWritable

use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hbase by apache.

the class HFileOutputFormat2 method writePartitions.

/**
   * Write out a {@link SequenceFile} that can be read by
   * {@link TotalOrderPartitioner} that contains the split points in startKeys.
   */
@SuppressWarnings("deprecation")
private static void writePartitions(Configuration conf, Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException {
    LOG.info("Writing partition information to " + partitionsPath);
    if (startKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    }
    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<>(startKeys);
    ImmutableBytesWritable first = sorted.first();
    if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.get()));
    }
    sorted.remove(first);
    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class);
    try {
        for (ImmutableBytesWritable startKey : sorted) {
            writer.append(startKey, NullWritable.get());
        }
    } finally {
        writer.close();
    }
}
Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) HFileSystem(org.apache.hadoop.hbase.fs.HFileSystem)

Example 4 with ImmutableBytesWritable

use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project tdi-studio-se by Talend.

the class HBaseStore method run.

public static void run(String zookeeperHost, String zookeeperPort, String table, final String columns, Map<String, String> properties, TalendRDD<List<Object>> rdd, final List<Integer> keyList) throws IOException {
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.zookeeper.quorum", zookeeperHost);
    conf.set("hbase.zookeeper.property.clientPort", zookeeperPort);
    conf.set("hbase.mapred.tablecolumns", columns);
    for (Entry<String, String> e : properties.entrySet()) {
        conf.set(e.getKey(), e.getValue());
    }
    TalendPairRDD<ImmutableBytesWritable, Put> hbaseRdd = rdd.mapToPair(new PairFunction<List<Object>, ImmutableBytesWritable, Put>() {

        private static final long serialVersionUID = 1L;

        public Tuple2<ImmutableBytesWritable, Put> call(List<Object> t) throws Exception {
            String key = "";
            for (int i : keyList) {
                key = key + t.get(i);
            }
            org.apache.hadoop.hbase.client.Put put = new org.apache.hadoop.hbase.client.Put(DigestUtils.md5("".equals(key) ? t.toString() : key));
            String[] cols = columns.split(" ");
            int i = 0;
            for (Object o : t) {
                if (cols.length > i) {
                    put.add(org.apache.hadoop.hbase.util.Bytes.toBytes(cols[i].split(":")[0]), org.apache.hadoop.hbase.util.Bytes.toBytes(cols[i].split(":")[1]), (o != null ? org.apache.hadoop.hbase.util.Bytes.toBytes(o.toString()) : null));
                }
                i++;
            }
            return new Tuple2<ImmutableBytesWritable, Put>(new ImmutableBytesWritable(), put);
        }
    });
    JobConf config = new JobConf(conf);
    config.set(TableOutputFormat.OUTPUT_TABLE, table);
    config.setOutputFormat(TableOutputFormat.class);
    hbaseRdd.saveAsHadoopDataset(config);
}
Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) Configuration(org.apache.hadoop.conf.Configuration) Put(org.apache.hadoop.hbase.client.Put) Put(org.apache.hadoop.hbase.client.Put) IOException(java.io.IOException) Tuple2(scala.Tuple2) List(java.util.List) JobConf(org.apache.hadoop.mapred.JobConf)

Example 5 with ImmutableBytesWritable

use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project crunch by cloudera.

the class WordCountHBaseTest method run.

public void run(Pipeline pipeline) throws IOException {
    Random rand = new Random();
    int postFix = Math.abs(rand.nextInt());
    String inputTableName = "crunch_words_" + postFix;
    String outputTableName = "crunch_counts_" + postFix;
    try {
        HTable inputTable = hbaseTestUtil.createTable(Bytes.toBytes(inputTableName), WORD_COLFAM);
        HTable outputTable = hbaseTestUtil.createTable(Bytes.toBytes(outputTableName), COUNTS_COLFAM);
        int key = 0;
        key = put(inputTable, key, "cat");
        key = put(inputTable, key, "cat");
        key = put(inputTable, key, "dog");
        Scan scan = new Scan();
        scan.addColumn(WORD_COLFAM, null);
        HBaseSourceTarget source = new HBaseSourceTarget(inputTableName, scan);
        PTable<ImmutableBytesWritable, Result> shakespeare = pipeline.read(source);
        pipeline.write(wordCount(shakespeare), new HBaseTarget(outputTableName));
        pipeline.done();
        assertIsLong(outputTable, "cat", 2);
        assertIsLong(outputTable, "dog", 1);
    } finally {
    // not quite sure...
    }
}
Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Random(java.util.Random) HBaseSourceTarget(org.apache.crunch.io.hbase.HBaseSourceTarget) HBaseTarget(org.apache.crunch.io.hbase.HBaseTarget) Scan(org.apache.hadoop.hbase.client.Scan) HTable(org.apache.hadoop.hbase.client.HTable) Result(org.apache.hadoop.hbase.client.Result)

Aggregations

ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)296 Test (org.junit.Test)86 Expression (org.apache.phoenix.expression.Expression)36 IOException (java.io.IOException)33 PhoenixArray (org.apache.phoenix.schema.types.PhoenixArray)30 ArrayList (java.util.ArrayList)28 Configuration (org.apache.hadoop.conf.Configuration)28 Result (org.apache.hadoop.hbase.client.Result)28 Cell (org.apache.hadoop.hbase.Cell)27 KeyValue (org.apache.hadoop.hbase.KeyValue)27 LiteralExpression (org.apache.phoenix.expression.LiteralExpression)27 PTable (org.apache.phoenix.schema.PTable)27 PDataType (org.apache.phoenix.schema.types.PDataType)26 PSmallint (org.apache.phoenix.schema.types.PSmallint)25 PTinyint (org.apache.phoenix.schema.types.PTinyint)23 Put (org.apache.hadoop.hbase.client.Put)20 PUnsignedSmallint (org.apache.phoenix.schema.types.PUnsignedSmallint)20 PUnsignedTinyint (org.apache.phoenix.schema.types.PUnsignedTinyint)20 PhoenixConnection (org.apache.phoenix.jdbc.PhoenixConnection)19 List (java.util.List)18