Search in sources :

Example 86 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hive by apache.

the class TestHBaseSerDe method testHBaseSerDeWithHiveMapToHBaseColumnFamily.

public void testHBaseSerDeWithHiveMapToHBaseColumnFamily() throws SerDeException {
    byte[] cfint = "cf-int".getBytes();
    byte[] cfbyte = "cf-byte".getBytes();
    byte[] cfshort = "cf-short".getBytes();
    byte[] cflong = "cf-long".getBytes();
    byte[] cffloat = "cf-float".getBytes();
    byte[] cfdouble = "cf-double".getBytes();
    byte[] cfbool = "cf-bool".getBytes();
    byte[][] columnFamilies = new byte[][] { cfint, cfbyte, cfshort, cflong, cffloat, cfdouble, cfbool };
    byte[][] rowKeys = new byte[][] { Integer.toString(1).getBytes(), Integer.toString(Integer.MIN_VALUE).getBytes(), Integer.toString(Integer.MAX_VALUE).getBytes() };
    byte[][][] columnQualifiersAndValues = new byte[][][] { { Bytes.toBytes(1), new byte[] { 1 }, Bytes.toBytes((short) 1), Bytes.toBytes((long) 1), Bytes.toBytes((float) 1.0F), Bytes.toBytes(1.0), Bytes.toBytes(true) }, { Bytes.toBytes(Integer.MIN_VALUE), new byte[] { Byte.MIN_VALUE }, Bytes.toBytes((short) Short.MIN_VALUE), Bytes.toBytes((long) Long.MIN_VALUE), Bytes.toBytes((float) Float.MIN_VALUE), Bytes.toBytes(Double.MIN_VALUE), Bytes.toBytes(false) }, { Bytes.toBytes(Integer.MAX_VALUE), new byte[] { Byte.MAX_VALUE }, Bytes.toBytes((short) Short.MAX_VALUE), Bytes.toBytes((long) Long.MAX_VALUE), Bytes.toBytes((float) Float.MAX_VALUE), Bytes.toBytes(Double.MAX_VALUE), Bytes.toBytes(true) } };
    List<KeyValue> kvs = new ArrayList<KeyValue>();
    Result[] r = new Result[] { null, null, null };
    Put[] p = new Put[] { null, null, null };
    for (int i = 0; i < r.length; i++) {
        kvs.clear();
        p[i] = new Put(rowKeys[i]);
        for (int j = 0; j < columnQualifiersAndValues[i].length; j++) {
            kvs.add(new KeyValue(rowKeys[i], columnFamilies[j], columnQualifiersAndValues[i][j], columnQualifiersAndValues[i][j]));
            p[i].add(columnFamilies[j], columnQualifiersAndValues[i][j], columnQualifiersAndValues[i][j]);
        }
        r[i] = new Result(kvs);
    }
    Object[][] expectedData = { { new Text(Integer.toString(1)), new IntWritable(1), new ByteWritable((byte) 1), new ShortWritable((short) 1), new LongWritable(1), new FloatWritable(1.0F), new DoubleWritable(1.0), new BooleanWritable(true) }, { new Text(Integer.toString(Integer.MIN_VALUE)), new IntWritable(Integer.MIN_VALUE), new ByteWritable(Byte.MIN_VALUE), new ShortWritable(Short.MIN_VALUE), new LongWritable(Long.MIN_VALUE), new FloatWritable(Float.MIN_VALUE), new DoubleWritable(Double.MIN_VALUE), new BooleanWritable(false) }, { new Text(Integer.toString(Integer.MAX_VALUE)), new IntWritable(Integer.MAX_VALUE), new ByteWritable(Byte.MAX_VALUE), new ShortWritable(Short.MAX_VALUE), new LongWritable(Long.MAX_VALUE), new FloatWritable(Float.MAX_VALUE), new DoubleWritable(Double.MAX_VALUE), new BooleanWritable(true) } };
    HBaseSerDe hbaseSerDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForHiveMapHBaseColumnFamily();
    SerDeUtils.initializeSerDe(hbaseSerDe, conf, tbl, null);
    deserializeAndSerializeHiveMapHBaseColumnFamily(hbaseSerDe, r, p, expectedData, rowKeys, columnFamilies, columnQualifiersAndValues);
    hbaseSerDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesForHiveMapHBaseColumnFamilyII();
    SerDeUtils.initializeSerDe(hbaseSerDe, conf, tbl, null);
    deserializeAndSerializeHiveMapHBaseColumnFamily(hbaseSerDe, r, p, expectedData, rowKeys, columnFamilies, columnQualifiersAndValues);
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) AvroTableProperties(org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties) Properties(java.util.Properties) Put(org.apache.hadoop.hbase.client.Put) Result(org.apache.hadoop.hbase.client.Result) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 87 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hive by apache.

the class TestHBaseSerDe method testHBaseSerDeI.

/**
   * Test the default behavior of the Lazy family of objects and object inspectors.
   */
public void testHBaseSerDeI() throws SerDeException {
    byte[] cfa = "cola".getBytes();
    byte[] cfb = "colb".getBytes();
    byte[] cfc = "colc".getBytes();
    byte[] qualByte = "byte".getBytes();
    byte[] qualShort = "short".getBytes();
    byte[] qualInt = "int".getBytes();
    byte[] qualLong = "long".getBytes();
    byte[] qualFloat = "float".getBytes();
    byte[] qualDouble = "double".getBytes();
    byte[] qualString = "string".getBytes();
    byte[] qualBool = "boolean".getBytes();
    byte[] rowKey = Bytes.toBytes("test-row1");
    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();
    kvs.add(new KeyValue(rowKey, cfa, qualByte, Bytes.toBytes("123")));
    kvs.add(new KeyValue(rowKey, cfb, qualShort, Bytes.toBytes("456")));
    kvs.add(new KeyValue(rowKey, cfc, qualInt, Bytes.toBytes("789")));
    kvs.add(new KeyValue(rowKey, cfa, qualLong, Bytes.toBytes("1000")));
    kvs.add(new KeyValue(rowKey, cfb, qualFloat, Bytes.toBytes("-0.01")));
    kvs.add(new KeyValue(rowKey, cfc, qualDouble, Bytes.toBytes("5.3")));
    kvs.add(new KeyValue(rowKey, cfa, qualString, Bytes.toBytes("Hadoop, HBase, and Hive")));
    kvs.add(new KeyValue(rowKey, cfb, qualBool, Bytes.toBytes("true")));
    Collections.sort(kvs, KeyValue.COMPARATOR);
    Result r = new Result(kvs);
    Put p = new Put(rowKey);
    p.add(cfa, qualByte, Bytes.toBytes("123"));
    p.add(cfb, qualShort, Bytes.toBytes("456"));
    p.add(cfc, qualInt, Bytes.toBytes("789"));
    p.add(cfa, qualLong, Bytes.toBytes("1000"));
    p.add(cfb, qualFloat, Bytes.toBytes("-0.01"));
    p.add(cfc, qualDouble, Bytes.toBytes("5.3"));
    p.add(cfa, qualString, Bytes.toBytes("Hadoop, HBase, and Hive"));
    p.add(cfb, qualBool, Bytes.toBytes("true"));
    Object[] expectedFieldsData = { new Text("test-row1"), new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new FloatWritable(-0.01F), new DoubleWritable(5.3), new Text("Hadoop, HBase, and Hive"), new BooleanWritable(true) };
    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesI_I();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
    deserializeAndSerialize(serDe, r, p, expectedFieldsData);
    serDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesI_II();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
    deserializeAndSerialize(serDe, r, p, expectedFieldsData);
    serDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesI_III();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
    deserializeAndSerialize(serDe, r, p, expectedFieldsData);
    serDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesI_IV();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
    deserializeAndSerialize(serDe, r, p, expectedFieldsData);
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) AvroTableProperties(org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties) Properties(java.util.Properties) Put(org.apache.hadoop.hbase.client.Put) Result(org.apache.hadoop.hbase.client.Result) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 88 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hive by apache.

the class TestHBaseSerDe method testHBaseSerDeWithAvroSchemaUrl.

public void testHBaseSerDeWithAvroSchemaUrl() throws SerDeException, IOException {
    byte[] cfa = "cola".getBytes();
    byte[] qualAvro = "avro".getBytes();
    byte[] rowKey = Bytes.toBytes("test-row1");
    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();
    byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA);
    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
    Result r = new Result(kvs);
    Put p = new Put(rowKey);
    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
    Object[] expectedFieldsData = { new String("test-row1"), new String("[[42, true, 42432234234]]") };
    MiniDFSCluster miniDfs = null;
    try {
        // MiniDFSCluster litters files and folders all over the place.
        miniDfs = new MiniDFSCluster(new Configuration(), 1, true, null);
        miniDfs.getFileSystem().mkdirs(new Path("/path/to/schema"));
        FSDataOutputStream out = miniDfs.getFileSystem().create(new Path("/path/to/schema/schema.avsc"));
        out.writeBytes(RECORD_SCHEMA);
        out.close();
        String onHDFS = miniDfs.getFileSystem().getUri() + "/path/to/schema/schema.avsc";
        // Create, initialize, and test the SerDe
        HBaseSerDe serDe = new HBaseSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createPropertiesForHiveAvroSchemaUrl(onHDFS);
        serDe.initialize(conf, tbl);
        deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING);
    } finally {
        // Teardown the cluster
        if (miniDfs != null) {
            miniDfs.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) KeyValue(org.apache.hadoop.hbase.KeyValue) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) AvroTableProperties(org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties) Properties(java.util.Properties) Put(org.apache.hadoop.hbase.client.Put) Result(org.apache.hadoop.hbase.client.Result) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream)

Example 89 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hive by apache.

the class TestHBaseSerDe method testHBaseSerDeWithAvroSerClass.

public void testHBaseSerDeWithAvroSerClass() throws SerDeException, IOException {
    byte[] cfa = "cola".getBytes();
    byte[] qualAvro = "avro".getBytes();
    byte[] rowKey = Bytes.toBytes("test-row1");
    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();
    byte[] avroData = getTestAvroBytesFromClass1(1);
    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
    Result r = new Result(kvs);
    Put p = new Put(rowKey);
    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
    Object[] expectedFieldsData = { new String("test-row1"), new String("[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], " + "[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], " + "[999, 1234567890], [999, 1234455555]]]") };
    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForHiveAvroSerClass();
    serDe.initialize(conf, tbl);
    deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING_2);
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) AvroTableProperties(org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties) Properties(java.util.Properties) Put(org.apache.hadoop.hbase.client.Put) Result(org.apache.hadoop.hbase.client.Result)

Example 90 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hive by apache.

the class TestHBaseSerDe method testHBaseSerDeCustomStructValue.

public void testHBaseSerDeCustomStructValue() throws IOException, SerDeException {
    byte[] cfa = "cola".getBytes();
    byte[] qualStruct = "struct".getBytes();
    TestStruct testStruct = new TestStruct("A", "B", "C", false, (byte) 0);
    byte[] key = testStruct.getBytes();
    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();
    byte[] testData = testStruct.getBytes();
    kvs.add(new KeyValue(key, cfa, qualStruct, testData));
    Result r = new Result(kvs);
    byte[] putKey = testStruct.getBytesWithDelimiters();
    Put p = new Put(putKey);
    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(putKey, cfa, qualStruct, Bytes.padTail(testData, 2)));
    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForValueStruct();
    serDe.initialize(conf, tbl);
    deserializeAndSerializeHBaseValueStruct(serDe, r, p);
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) AvroTableProperties(org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties) Properties(java.util.Properties) Put(org.apache.hadoop.hbase.client.Put) Result(org.apache.hadoop.hbase.client.Result)

Aggregations

KeyValue (org.apache.hadoop.hbase.KeyValue)552 Test (org.junit.Test)289 Cell (org.apache.hadoop.hbase.Cell)193 ArrayList (java.util.ArrayList)172 Put (org.apache.hadoop.hbase.client.Put)98 Scan (org.apache.hadoop.hbase.client.Scan)85 Result (org.apache.hadoop.hbase.client.Result)70 Configuration (org.apache.hadoop.conf.Configuration)64 Path (org.apache.hadoop.fs.Path)55 ArrayBackedTag (org.apache.hadoop.hbase.ArrayBackedTag)36 Tag (org.apache.hadoop.hbase.Tag)35 ByteBuffer (java.nio.ByteBuffer)34 List (java.util.List)34 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)34 IOException (java.io.IOException)32 TableName (org.apache.hadoop.hbase.TableName)32 TreeMap (java.util.TreeMap)29 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)28 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)28 WALEdit (org.apache.hadoop.hbase.regionserver.wal.WALEdit)27