use of org.apache.hadoop.hbase.KeyValue in project hive by apache.
the class TestHBaseSerDe method testHBaseSerDeWithHiveMapToHBaseColumnFamily.
public void testHBaseSerDeWithHiveMapToHBaseColumnFamily() throws SerDeException {
byte[] cfint = "cf-int".getBytes();
byte[] cfbyte = "cf-byte".getBytes();
byte[] cfshort = "cf-short".getBytes();
byte[] cflong = "cf-long".getBytes();
byte[] cffloat = "cf-float".getBytes();
byte[] cfdouble = "cf-double".getBytes();
byte[] cfbool = "cf-bool".getBytes();
byte[][] columnFamilies = new byte[][] { cfint, cfbyte, cfshort, cflong, cffloat, cfdouble, cfbool };
byte[][] rowKeys = new byte[][] { Integer.toString(1).getBytes(), Integer.toString(Integer.MIN_VALUE).getBytes(), Integer.toString(Integer.MAX_VALUE).getBytes() };
byte[][][] columnQualifiersAndValues = new byte[][][] { { Bytes.toBytes(1), new byte[] { 1 }, Bytes.toBytes((short) 1), Bytes.toBytes((long) 1), Bytes.toBytes((float) 1.0F), Bytes.toBytes(1.0), Bytes.toBytes(true) }, { Bytes.toBytes(Integer.MIN_VALUE), new byte[] { Byte.MIN_VALUE }, Bytes.toBytes((short) Short.MIN_VALUE), Bytes.toBytes((long) Long.MIN_VALUE), Bytes.toBytes((float) Float.MIN_VALUE), Bytes.toBytes(Double.MIN_VALUE), Bytes.toBytes(false) }, { Bytes.toBytes(Integer.MAX_VALUE), new byte[] { Byte.MAX_VALUE }, Bytes.toBytes((short) Short.MAX_VALUE), Bytes.toBytes((long) Long.MAX_VALUE), Bytes.toBytes((float) Float.MAX_VALUE), Bytes.toBytes(Double.MAX_VALUE), Bytes.toBytes(true) } };
List<KeyValue> kvs = new ArrayList<KeyValue>();
Result[] r = new Result[] { null, null, null };
Put[] p = new Put[] { null, null, null };
for (int i = 0; i < r.length; i++) {
kvs.clear();
p[i] = new Put(rowKeys[i]);
for (int j = 0; j < columnQualifiersAndValues[i].length; j++) {
kvs.add(new KeyValue(rowKeys[i], columnFamilies[j], columnQualifiersAndValues[i][j], columnQualifiersAndValues[i][j]));
p[i].add(columnFamilies[j], columnQualifiersAndValues[i][j], columnQualifiersAndValues[i][j]);
}
r[i] = new Result(kvs);
}
Object[][] expectedData = { { new Text(Integer.toString(1)), new IntWritable(1), new ByteWritable((byte) 1), new ShortWritable((short) 1), new LongWritable(1), new FloatWritable(1.0F), new DoubleWritable(1.0), new BooleanWritable(true) }, { new Text(Integer.toString(Integer.MIN_VALUE)), new IntWritable(Integer.MIN_VALUE), new ByteWritable(Byte.MIN_VALUE), new ShortWritable(Short.MIN_VALUE), new LongWritable(Long.MIN_VALUE), new FloatWritable(Float.MIN_VALUE), new DoubleWritable(Double.MIN_VALUE), new BooleanWritable(false) }, { new Text(Integer.toString(Integer.MAX_VALUE)), new IntWritable(Integer.MAX_VALUE), new ByteWritable(Byte.MAX_VALUE), new ShortWritable(Short.MAX_VALUE), new LongWritable(Long.MAX_VALUE), new FloatWritable(Float.MAX_VALUE), new DoubleWritable(Double.MAX_VALUE), new BooleanWritable(true) } };
HBaseSerDe hbaseSerDe = new HBaseSerDe();
Configuration conf = new Configuration();
Properties tbl = createPropertiesForHiveMapHBaseColumnFamily();
SerDeUtils.initializeSerDe(hbaseSerDe, conf, tbl, null);
deserializeAndSerializeHiveMapHBaseColumnFamily(hbaseSerDe, r, p, expectedData, rowKeys, columnFamilies, columnQualifiersAndValues);
hbaseSerDe = new HBaseSerDe();
conf = new Configuration();
tbl = createPropertiesForHiveMapHBaseColumnFamilyII();
SerDeUtils.initializeSerDe(hbaseSerDe, conf, tbl, null);
deserializeAndSerializeHiveMapHBaseColumnFamily(hbaseSerDe, r, p, expectedData, rowKeys, columnFamilies, columnQualifiersAndValues);
}
use of org.apache.hadoop.hbase.KeyValue in project hive by apache.
the class TestHBaseSerDe method testHBaseSerDeI.
/**
* Test the default behavior of the Lazy family of objects and object inspectors.
*/
public void testHBaseSerDeI() throws SerDeException {
byte[] cfa = "cola".getBytes();
byte[] cfb = "colb".getBytes();
byte[] cfc = "colc".getBytes();
byte[] qualByte = "byte".getBytes();
byte[] qualShort = "short".getBytes();
byte[] qualInt = "int".getBytes();
byte[] qualLong = "long".getBytes();
byte[] qualFloat = "float".getBytes();
byte[] qualDouble = "double".getBytes();
byte[] qualString = "string".getBytes();
byte[] qualBool = "boolean".getBytes();
byte[] rowKey = Bytes.toBytes("test-row1");
// Data
List<KeyValue> kvs = new ArrayList<KeyValue>();
kvs.add(new KeyValue(rowKey, cfa, qualByte, Bytes.toBytes("123")));
kvs.add(new KeyValue(rowKey, cfb, qualShort, Bytes.toBytes("456")));
kvs.add(new KeyValue(rowKey, cfc, qualInt, Bytes.toBytes("789")));
kvs.add(new KeyValue(rowKey, cfa, qualLong, Bytes.toBytes("1000")));
kvs.add(new KeyValue(rowKey, cfb, qualFloat, Bytes.toBytes("-0.01")));
kvs.add(new KeyValue(rowKey, cfc, qualDouble, Bytes.toBytes("5.3")));
kvs.add(new KeyValue(rowKey, cfa, qualString, Bytes.toBytes("Hadoop, HBase, and Hive")));
kvs.add(new KeyValue(rowKey, cfb, qualBool, Bytes.toBytes("true")));
Collections.sort(kvs, KeyValue.COMPARATOR);
Result r = new Result(kvs);
Put p = new Put(rowKey);
p.add(cfa, qualByte, Bytes.toBytes("123"));
p.add(cfb, qualShort, Bytes.toBytes("456"));
p.add(cfc, qualInt, Bytes.toBytes("789"));
p.add(cfa, qualLong, Bytes.toBytes("1000"));
p.add(cfb, qualFloat, Bytes.toBytes("-0.01"));
p.add(cfc, qualDouble, Bytes.toBytes("5.3"));
p.add(cfa, qualString, Bytes.toBytes("Hadoop, HBase, and Hive"));
p.add(cfb, qualBool, Bytes.toBytes("true"));
Object[] expectedFieldsData = { new Text("test-row1"), new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new FloatWritable(-0.01F), new DoubleWritable(5.3), new Text("Hadoop, HBase, and Hive"), new BooleanWritable(true) };
// Create, initialize, and test the SerDe
HBaseSerDe serDe = new HBaseSerDe();
Configuration conf = new Configuration();
Properties tbl = createPropertiesI_I();
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
deserializeAndSerialize(serDe, r, p, expectedFieldsData);
serDe = new HBaseSerDe();
conf = new Configuration();
tbl = createPropertiesI_II();
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
deserializeAndSerialize(serDe, r, p, expectedFieldsData);
serDe = new HBaseSerDe();
conf = new Configuration();
tbl = createPropertiesI_III();
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
deserializeAndSerialize(serDe, r, p, expectedFieldsData);
serDe = new HBaseSerDe();
conf = new Configuration();
tbl = createPropertiesI_IV();
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
deserializeAndSerialize(serDe, r, p, expectedFieldsData);
}
use of org.apache.hadoop.hbase.KeyValue in project hive by apache.
the class TestHBaseSerDe method testHBaseSerDeWithAvroSchemaUrl.
public void testHBaseSerDeWithAvroSchemaUrl() throws SerDeException, IOException {
byte[] cfa = "cola".getBytes();
byte[] qualAvro = "avro".getBytes();
byte[] rowKey = Bytes.toBytes("test-row1");
// Data
List<KeyValue> kvs = new ArrayList<KeyValue>();
byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA);
kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
Result r = new Result(kvs);
Put p = new Put(rowKey);
// Post serialization, separators are automatically inserted between different fields in the
// struct. Currently there is not way to disable that. So the work around here is to pad the
// data with the separator bytes before creating a "Put" object
p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
Object[] expectedFieldsData = { new String("test-row1"), new String("[[42, true, 42432234234]]") };
MiniDFSCluster miniDfs = null;
try {
// MiniDFSCluster litters files and folders all over the place.
miniDfs = new MiniDFSCluster(new Configuration(), 1, true, null);
miniDfs.getFileSystem().mkdirs(new Path("/path/to/schema"));
FSDataOutputStream out = miniDfs.getFileSystem().create(new Path("/path/to/schema/schema.avsc"));
out.writeBytes(RECORD_SCHEMA);
out.close();
String onHDFS = miniDfs.getFileSystem().getUri() + "/path/to/schema/schema.avsc";
// Create, initialize, and test the SerDe
HBaseSerDe serDe = new HBaseSerDe();
Configuration conf = new Configuration();
Properties tbl = createPropertiesForHiveAvroSchemaUrl(onHDFS);
serDe.initialize(conf, tbl);
deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING);
} finally {
// Teardown the cluster
if (miniDfs != null) {
miniDfs.shutdown();
}
}
}
use of org.apache.hadoop.hbase.KeyValue in project hive by apache.
the class TestHBaseSerDe method testHBaseSerDeWithAvroSerClass.
public void testHBaseSerDeWithAvroSerClass() throws SerDeException, IOException {
byte[] cfa = "cola".getBytes();
byte[] qualAvro = "avro".getBytes();
byte[] rowKey = Bytes.toBytes("test-row1");
// Data
List<KeyValue> kvs = new ArrayList<KeyValue>();
byte[] avroData = getTestAvroBytesFromClass1(1);
kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
Result r = new Result(kvs);
Put p = new Put(rowKey);
// Post serialization, separators are automatically inserted between different fields in the
// struct. Currently there is not way to disable that. So the work around here is to pad the
// data with the separator bytes before creating a "Put" object
p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
Object[] expectedFieldsData = { new String("test-row1"), new String("[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], " + "[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], " + "[999, 1234567890], [999, 1234455555]]]") };
// Create, initialize, and test the SerDe
HBaseSerDe serDe = new HBaseSerDe();
Configuration conf = new Configuration();
Properties tbl = createPropertiesForHiveAvroSerClass();
serDe.initialize(conf, tbl);
deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING_2);
}
use of org.apache.hadoop.hbase.KeyValue in project hive by apache.
the class TestHBaseSerDe method testHBaseSerDeCustomStructValue.
public void testHBaseSerDeCustomStructValue() throws IOException, SerDeException {
byte[] cfa = "cola".getBytes();
byte[] qualStruct = "struct".getBytes();
TestStruct testStruct = new TestStruct("A", "B", "C", false, (byte) 0);
byte[] key = testStruct.getBytes();
// Data
List<KeyValue> kvs = new ArrayList<KeyValue>();
byte[] testData = testStruct.getBytes();
kvs.add(new KeyValue(key, cfa, qualStruct, testData));
Result r = new Result(kvs);
byte[] putKey = testStruct.getBytesWithDelimiters();
Put p = new Put(putKey);
// Post serialization, separators are automatically inserted between different fields in the
// struct. Currently there is not way to disable that. So the work around here is to pad the
// data with the separator bytes before creating a "Put" object
p.add(new KeyValue(putKey, cfa, qualStruct, Bytes.padTail(testData, 2)));
// Create, initialize, and test the SerDe
HBaseSerDe serDe = new HBaseSerDe();
Configuration conf = new Configuration();
Properties tbl = createPropertiesForValueStruct();
serDe.initialize(conf, tbl);
deserializeAndSerializeHBaseValueStruct(serDe, r, p);
}
Aggregations