Search in sources :

Example 6 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class VectorAssignRow method assignRowColumn.

private void assignRowColumn(ColumnVector columnVector, int batchIndex, TypeInfo targetTypeInfo, Object object) {
    if (object == null) {
        assignNullRowColumn(columnVector, batchIndex, targetTypeInfo);
        return;
    }
    switch(targetTypeInfo.getCategory()) {
        case PRIMITIVE:
            {
                final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfo).getPrimitiveCategory();
                switch(targetPrimitiveCategory) {
                    case VOID:
                        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                        return;
                    case BOOLEAN:
                        if (object instanceof Boolean) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = (((Boolean) object) ? 1 : 0);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = (((BooleanWritable) object).get() ? 1 : 0);
                        }
                        break;
                    case BYTE:
                        if (object instanceof Byte) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((Byte) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((ByteWritable) object).get();
                        }
                        break;
                    case SHORT:
                        if (object instanceof Short) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((Short) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((ShortWritable) object).get();
                        }
                        break;
                    case INT:
                        if (object instanceof Integer) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((Integer) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((IntWritable) object).get();
                        }
                        break;
                    case LONG:
                        if (object instanceof Long) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((Long) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((LongWritable) object).get();
                        }
                        break;
                    case TIMESTAMP:
                        if (object instanceof Timestamp) {
                            ((TimestampColumnVector) columnVector).set(batchIndex, ((Timestamp) object).toSqlTimestamp());
                        } else {
                            ((TimestampColumnVector) columnVector).set(batchIndex, ((TimestampWritableV2) object).getTimestamp().toSqlTimestamp());
                        }
                        break;
                    case DATE:
                        if (object instanceof Date) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = DateWritableV2.dateToDays((Date) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((DateWritableV2) object).getDays();
                        }
                        break;
                    case FLOAT:
                        if (object instanceof Float) {
                            ((DoubleColumnVector) columnVector).vector[batchIndex] = ((Float) object);
                        } else {
                            ((DoubleColumnVector) columnVector).vector[batchIndex] = ((FloatWritable) object).get();
                        }
                        break;
                    case DOUBLE:
                        if (object instanceof Double) {
                            ((DoubleColumnVector) columnVector).vector[batchIndex] = ((Double) object);
                        } else {
                            ((DoubleColumnVector) columnVector).vector[batchIndex] = ((DoubleWritable) object).get();
                        }
                        break;
                    case BINARY:
                        {
                            if (object instanceof byte[]) {
                                byte[] bytes = (byte[]) object;
                                ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                            } else {
                                BytesWritable bw = (BytesWritable) object;
                                ((BytesColumnVector) columnVector).setVal(batchIndex, bw.getBytes(), 0, bw.getLength());
                            }
                        }
                        break;
                    case STRING:
                        {
                            if (object instanceof String) {
                                String string = (String) object;
                                byte[] bytes = string.getBytes();
                                ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                            } else {
                                Text tw = (Text) object;
                                ((BytesColumnVector) columnVector).setVal(batchIndex, tw.getBytes(), 0, tw.getLength());
                            }
                        }
                        break;
                    case VARCHAR:
                        {
                            // UNDONE: Performance problem with conversion to String, then bytes...
                            // We store VARCHAR type stripped of pads.
                            HiveVarchar hiveVarchar;
                            if (object instanceof HiveVarchar) {
                                hiveVarchar = (HiveVarchar) object;
                            } else {
                                hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar();
                            }
                            // TODO: HIVE-13624 Do we need maxLength checking?
                            byte[] bytes = hiveVarchar.getValue().getBytes();
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                        }
                        break;
                    case CHAR:
                        {
                            // UNDONE: Performance problem with conversion to String, then bytes...
                            // We store CHAR type stripped of pads.
                            HiveChar hiveChar;
                            if (object instanceof HiveChar) {
                                hiveChar = (HiveChar) object;
                            } else {
                                hiveChar = ((HiveCharWritable) object).getHiveChar();
                            }
                            // TODO: HIVE-13624 Do we need maxLength checking?
                            // We store CHAR in vector row batch with padding stripped.
                            byte[] bytes = hiveChar.getStrippedValue().getBytes();
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                        }
                        break;
                    case DECIMAL:
                        if (columnVector instanceof DecimalColumnVector) {
                            if (object instanceof HiveDecimal) {
                                ((DecimalColumnVector) columnVector).set(batchIndex, (HiveDecimal) object);
                            } else {
                                ((DecimalColumnVector) columnVector).set(batchIndex, (HiveDecimalWritable) object);
                            }
                        } else {
                            if (object instanceof HiveDecimal) {
                                ((Decimal64ColumnVector) columnVector).set(batchIndex, (HiveDecimal) object);
                            } else {
                                ((Decimal64ColumnVector) columnVector).set(batchIndex, (HiveDecimalWritable) object);
                            }
                        }
                        break;
                    case INTERVAL_YEAR_MONTH:
                        if (object instanceof HiveIntervalYearMonth) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((HiveIntervalYearMonth) object).getTotalMonths();
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth().getTotalMonths();
                        }
                        break;
                    case INTERVAL_DAY_TIME:
                        if (object instanceof HiveIntervalDayTime) {
                            ((IntervalDayTimeColumnVector) columnVector).set(batchIndex, (HiveIntervalDayTime) object);
                        } else {
                            ((IntervalDayTimeColumnVector) columnVector).set(batchIndex, ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime());
                        }
                        break;
                    default:
                        throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + " not supported");
                }
            }
            break;
        case LIST:
            {
                final ListColumnVector listColumnVector = (ListColumnVector) columnVector;
                final ListTypeInfo listTypeInfo = (ListTypeInfo) targetTypeInfo;
                final TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
                final List list = (List) object;
                final int size = list.size();
                final int childCount = listColumnVector.childCount;
                listColumnVector.offsets[batchIndex] = childCount;
                listColumnVector.lengths[batchIndex] = size;
                listColumnVector.childCount = childCount + size;
                listColumnVector.child.ensureSize(childCount + size, true);
                for (int i = 0; i < size; i++) {
                    assignRowColumn(listColumnVector.child, childCount + i, elementTypeInfo, list.get(i));
                }
            }
            break;
        case MAP:
            {
                final MapColumnVector mapColumnVector = (MapColumnVector) columnVector;
                final MapTypeInfo mapTypeInfo = (MapTypeInfo) targetTypeInfo;
                final Map<Object, Object> map = (Map<Object, Object>) object;
                final int size = map.size();
                int childCount = mapColumnVector.childCount;
                mapColumnVector.offsets[batchIndex] = childCount;
                mapColumnVector.lengths[batchIndex] = size;
                mapColumnVector.keys.ensureSize(childCount + size, true);
                mapColumnVector.values.ensureSize(childCount + size, true);
                for (Map.Entry<Object, Object> entry : map.entrySet()) {
                    assignRowColumn(mapColumnVector.keys, childCount, mapTypeInfo.getMapKeyTypeInfo(), entry.getKey());
                    assignRowColumn(mapColumnVector.values, childCount, mapTypeInfo.getMapValueTypeInfo(), entry.getValue());
                    childCount++;
                }
                mapColumnVector.childCount = childCount;
            }
            break;
        case STRUCT:
            {
                final StructColumnVector structColumnVector = (StructColumnVector) columnVector;
                final StructTypeInfo targetStructTypeInfo = (StructTypeInfo) targetTypeInfo;
                final List<TypeInfo> targetFieldTypeInfos = targetStructTypeInfo.getAllStructFieldTypeInfos();
                final int size = targetFieldTypeInfos.size();
                if (object instanceof List) {
                    final List struct = (List) object;
                    for (int i = 0; i < size; i++) {
                        assignRowColumn(structColumnVector.fields[i], batchIndex, targetFieldTypeInfos.get(i), struct.get(i));
                    }
                } else {
                    final Object[] array = (Object[]) object;
                    for (int i = 0; i < size; i++) {
                        assignRowColumn(structColumnVector.fields[i], batchIndex, targetFieldTypeInfos.get(i), array[i]);
                    }
                }
            }
            break;
        case UNION:
            {
                final StandardUnion union = (StandardUnion) object;
                final UnionColumnVector unionColumnVector = (UnionColumnVector) columnVector;
                final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) targetTypeInfo;
                final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
                final byte tag = union.getTag();
                unionColumnVector.tags[batchIndex] = tag;
                assignRowColumn(unionColumnVector.fields[tag], batchIndex, objectTypeInfos.get(tag), union.getObject());
            }
            break;
        default:
            throw new RuntimeException("Category " + targetTypeInfo.getCategory().name() + " not supported");
    }
    /*
     * We always set the null flag to false when there is a value.
     */
    columnVector.isNull[batchIndex] = false;
}
Also used : HiveChar(org.apache.hadoop.hive.common.type.HiveChar) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) List(java.util.List) ArrayList(java.util.ArrayList) LongWritable(org.apache.hadoop.io.LongWritable) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Date(org.apache.hadoop.hive.common.type.Date) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StandardUnion(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion) Map(java.util.Map) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 7 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestHBaseSerDe method testHBaseSerDeWithHiveMapToHBaseColumnFamily.

@Test
public void testHBaseSerDeWithHiveMapToHBaseColumnFamily() throws SerDeException {
    byte[] cfint = "cf-int".getBytes();
    byte[] cfbyte = "cf-byte".getBytes();
    byte[] cfshort = "cf-short".getBytes();
    byte[] cflong = "cf-long".getBytes();
    byte[] cffloat = "cf-float".getBytes();
    byte[] cfdouble = "cf-double".getBytes();
    byte[] cfbool = "cf-bool".getBytes();
    byte[][] columnFamilies = new byte[][] { cfint, cfbyte, cfshort, cflong, cffloat, cfdouble, cfbool };
    byte[][] rowKeys = new byte[][] { Integer.toString(1).getBytes(), Integer.toString(Integer.MIN_VALUE).getBytes(), Integer.toString(Integer.MAX_VALUE).getBytes() };
    byte[][][] columnQualifiersAndValues = new byte[][][] { { Bytes.toBytes(1), new byte[] { 1 }, Bytes.toBytes((short) 1), Bytes.toBytes((long) 1), Bytes.toBytes(1.0F), Bytes.toBytes(1.0), Bytes.toBytes(true) }, { Bytes.toBytes(Integer.MIN_VALUE), new byte[] { Byte.MIN_VALUE }, Bytes.toBytes(Short.MIN_VALUE), Bytes.toBytes(Long.MIN_VALUE), Bytes.toBytes(Float.MIN_VALUE), Bytes.toBytes(Double.MIN_VALUE), Bytes.toBytes(false) }, { Bytes.toBytes(Integer.MAX_VALUE), new byte[] { Byte.MAX_VALUE }, Bytes.toBytes(Short.MAX_VALUE), Bytes.toBytes(Long.MAX_VALUE), Bytes.toBytes(Float.MAX_VALUE), Bytes.toBytes(Double.MAX_VALUE), Bytes.toBytes(true) } };
    List<Cell> kvs = new ArrayList<Cell>();
    Result[] r = new Result[] { null, null, null };
    Put[] p = new Put[] { null, null, null };
    for (int i = 0; i < r.length; i++) {
        kvs.clear();
        p[i] = new Put(rowKeys[i]);
        for (int j = 0; j < columnQualifiersAndValues[i].length; j++) {
            kvs.add(new KeyValue(rowKeys[i], columnFamilies[j], columnQualifiersAndValues[i][j], columnQualifiersAndValues[i][j]));
            p[i].addColumn(columnFamilies[j], columnQualifiersAndValues[i][j], columnQualifiersAndValues[i][j]);
        }
        r[i] = Result.create(kvs);
    }
    Object[][] expectedData = { { new Text(Integer.toString(1)), new IntWritable(1), new ByteWritable((byte) 1), new ShortWritable((short) 1), new LongWritable(1), new FloatWritable(1.0F), new DoubleWritable(1.0), new BooleanWritable(true) }, { new Text(Integer.toString(Integer.MIN_VALUE)), new IntWritable(Integer.MIN_VALUE), new ByteWritable(Byte.MIN_VALUE), new ShortWritable(Short.MIN_VALUE), new LongWritable(Long.MIN_VALUE), new FloatWritable(Float.MIN_VALUE), new DoubleWritable(Double.MIN_VALUE), new BooleanWritable(false) }, { new Text(Integer.toString(Integer.MAX_VALUE)), new IntWritable(Integer.MAX_VALUE), new ByteWritable(Byte.MAX_VALUE), new ShortWritable(Short.MAX_VALUE), new LongWritable(Long.MAX_VALUE), new FloatWritable(Float.MAX_VALUE), new DoubleWritable(Double.MAX_VALUE), new BooleanWritable(true) } };
    HBaseSerDe hbaseSerDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForHiveMapHBaseColumnFamily();
    hbaseSerDe.initialize(conf, tbl, null);
    deserializeAndSerializeHiveMapHBaseColumnFamily(hbaseSerDe, r, p, expectedData, rowKeys, columnFamilies, columnQualifiersAndValues);
    hbaseSerDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesForHiveMapHBaseColumnFamilyII();
    hbaseSerDe.initialize(conf, tbl, null);
    deserializeAndSerializeHiveMapHBaseColumnFamily(hbaseSerDe, r, p, expectedData, rowKeys, columnFamilies, columnQualifiersAndValues);
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) AvroTableProperties(org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties) Properties(java.util.Properties) Put(org.apache.hadoop.hbase.client.Put) Result(org.apache.hadoop.hbase.client.Result) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) LongWritable(org.apache.hadoop.io.LongWritable) Cell(org.apache.hadoop.hbase.Cell) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 8 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestHBaseSerDe method testHBaseSerDeWithHiveMapToHBaseColumnFamilyII.

@Test
public void testHBaseSerDeWithHiveMapToHBaseColumnFamilyII() throws SerDeException {
    byte[] cfbyte = "cf-byte".getBytes();
    byte[] cfshort = "cf-short".getBytes();
    byte[] cfint = "cf-int".getBytes();
    byte[] cflong = "cf-long".getBytes();
    byte[] cffloat = "cf-float".getBytes();
    byte[] cfdouble = "cf-double".getBytes();
    byte[] cfstring = "cf-string".getBytes();
    byte[] cfbool = "cf-bool".getBytes();
    byte[][] columnFamilies = new byte[][] { cfbyte, cfshort, cfint, cflong, cffloat, cfdouble, cfstring, cfbool };
    byte[] rowKey = Bytes.toBytes("row-key");
    byte[][] columnQualifiersAndValues = new byte[][] { Bytes.toBytes("123"), Bytes.toBytes("456"), Bytes.toBytes("789"), Bytes.toBytes("1000"), Bytes.toBytes("-0.01"), Bytes.toBytes("5.3"), Bytes.toBytes("Hive"), Bytes.toBytes("true") };
    Put p = new Put(rowKey);
    List<Cell> kvs = new ArrayList<Cell>();
    for (int j = 0; j < columnQualifiersAndValues.length; j++) {
        kvs.add(new KeyValue(rowKey, columnFamilies[j], columnQualifiersAndValues[j], columnQualifiersAndValues[j]));
        p.addColumn(columnFamilies[j], columnQualifiersAndValues[j], columnQualifiersAndValues[j]);
    }
    Result r = Result.create(kvs);
    Object[] expectedData = { new Text("row-key"), new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new FloatWritable(-0.01F), new DoubleWritable(5.3), new Text("Hive"), new BooleanWritable(true) };
    HBaseSerDe hbaseSerDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForHiveMapHBaseColumnFamilyII_I();
    hbaseSerDe.initialize(conf, tbl, null);
    deserializeAndSerializeHiveMapHBaseColumnFamilyII(hbaseSerDe, r, p, expectedData, columnFamilies, columnQualifiersAndValues);
    hbaseSerDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesForHiveMapHBaseColumnFamilyII_II();
    hbaseSerDe.initialize(conf, tbl, null);
    deserializeAndSerializeHiveMapHBaseColumnFamilyII(hbaseSerDe, r, p, expectedData, columnFamilies, columnQualifiersAndValues);
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) AvroTableProperties(org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties) Properties(java.util.Properties) Put(org.apache.hadoop.hbase.client.Put) Result(org.apache.hadoop.hbase.client.Result) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) LongWritable(org.apache.hadoop.io.LongWritable) Cell(org.apache.hadoop.hbase.Cell) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 9 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestHBaseSerDe method testHBaseSerDeI.

/**
 * Test the default behavior of the Lazy family of objects and object inspectors.
 */
@Test
public void testHBaseSerDeI() throws SerDeException {
    byte[] cfa = "cola".getBytes();
    byte[] cfb = "colb".getBytes();
    byte[] cfc = "colc".getBytes();
    byte[] qualByte = "byte".getBytes();
    byte[] qualShort = "short".getBytes();
    byte[] qualInt = "int".getBytes();
    byte[] qualLong = "long".getBytes();
    byte[] qualFloat = "float".getBytes();
    byte[] qualDouble = "double".getBytes();
    byte[] qualString = "string".getBytes();
    byte[] qualBool = "boolean".getBytes();
    byte[] rowKey = Bytes.toBytes("test-row1");
    // Data
    List<Cell> kvs = new ArrayList<Cell>();
    kvs.add(new KeyValue(rowKey, cfa, qualByte, Bytes.toBytes("123")));
    kvs.add(new KeyValue(rowKey, cfb, qualShort, Bytes.toBytes("456")));
    kvs.add(new KeyValue(rowKey, cfc, qualInt, Bytes.toBytes("789")));
    kvs.add(new KeyValue(rowKey, cfa, qualLong, Bytes.toBytes("1000")));
    kvs.add(new KeyValue(rowKey, cfb, qualFloat, Bytes.toBytes("-0.01")));
    kvs.add(new KeyValue(rowKey, cfc, qualDouble, Bytes.toBytes("5.3")));
    kvs.add(new KeyValue(rowKey, cfa, qualString, Bytes.toBytes("Hadoop, HBase, and Hive")));
    kvs.add(new KeyValue(rowKey, cfb, qualBool, Bytes.toBytes("true")));
    Collections.sort(kvs, KeyValue.COMPARATOR);
    Result r = Result.create(kvs);
    Put p = new Put(rowKey);
    p.addColumn(cfa, qualByte, Bytes.toBytes("123"));
    p.addColumn(cfb, qualShort, Bytes.toBytes("456"));
    p.addColumn(cfc, qualInt, Bytes.toBytes("789"));
    p.addColumn(cfa, qualLong, Bytes.toBytes("1000"));
    p.addColumn(cfb, qualFloat, Bytes.toBytes("-0.01"));
    p.addColumn(cfc, qualDouble, Bytes.toBytes("5.3"));
    p.addColumn(cfa, qualString, Bytes.toBytes("Hadoop, HBase, and Hive"));
    p.addColumn(cfb, qualBool, Bytes.toBytes("true"));
    Object[] expectedFieldsData = { new Text("test-row1"), new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new FloatWritable(-0.01F), new DoubleWritable(5.3), new Text("Hadoop, HBase, and Hive"), new BooleanWritable(true) };
    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesI_I();
    serDe.initialize(conf, tbl, null);
    deserializeAndSerialize(serDe, r, p, expectedFieldsData);
    serDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesI_II();
    serDe.initialize(conf, tbl, null);
    deserializeAndSerialize(serDe, r, p, expectedFieldsData);
    serDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesI_III();
    serDe.initialize(conf, tbl, null);
    deserializeAndSerialize(serDe, r, p, expectedFieldsData);
    serDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesI_IV();
    serDe.initialize(conf, tbl, null);
    deserializeAndSerialize(serDe, r, p, expectedFieldsData);
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) AvroTableProperties(org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties) Properties(java.util.Properties) Put(org.apache.hadoop.hbase.client.Put) Result(org.apache.hadoop.hbase.client.Result) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) LongWritable(org.apache.hadoop.io.LongWritable) Cell(org.apache.hadoop.hbase.Cell) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 10 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestLazyHBaseObject method testLazyHBaseRow3.

/**
 * Test the LazyHBaseRow class with a one-to-one/onto mapping between Hive columns and
 * HBase column family/column qualifier pairs. The column types are primitive and fields
 * are stored in binary format in HBase.
 * @throws SerDeException
 */
@Test
public void testLazyHBaseRow3() throws SerDeException {
    List<TypeInfo> fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString("string,int,tinyint,smallint,bigint,float,double,string,boolean");
    List<String> fieldNames = Arrays.asList(new String[] { "key", "c_int", "c_byte", "c_short", "c_long", "c_float", "c_double", "c_string", "c_bool" });
    Text nullSequence = new Text("\\N");
    String hbaseColumnsMapping = ":key#str,cf-int:cq-int#bin,cf-byte:cq-byte#bin," + "cf-short:cq-short#bin,cf-long:cq-long#bin,cf-float:cq-float#bin,cf-double:cq-double#bin," + "cf-string:cq-string#str,cf-bool:cq-bool#bin";
    ColumnMappings columnMappings = null;
    try {
        columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping);
    } catch (SerDeException e) {
        fail(e.toString());
    }
    ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping();
    for (int i = 0; i < columnsMapping.length; i++) {
        ColumnMapping colMap = columnsMapping[i];
        if (i == 0 || i == 7) {
            colMap.binaryStorage.add(false);
        } else {
            colMap.binaryStorage.add(true);
        }
    }
    ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] { ' ', ':', '=' }, nullSequence, false, false, (byte) 0);
    LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings);
    byte[] rowKey = "row-key".getBytes();
    List<Cell> kvs = new ArrayList<Cell>();
    byte[] value;
    for (int i = 1; i < columnsMapping.length; i++) {
        switch(i) {
            case 1:
                value = Bytes.toBytes(1);
                break;
            case 2:
                value = new byte[] { (byte) 1 };
                break;
            case 3:
                value = Bytes.toBytes((short) 1);
                break;
            case 4:
                value = Bytes.toBytes((long) 1);
                break;
            case 5:
                value = Bytes.toBytes(1.0F);
                break;
            case 6:
                value = Bytes.toBytes(1.0);
                break;
            case 7:
                value = "Hadoop, Hive, with HBase storage handler.".getBytes();
                break;
            case 8:
                value = Bytes.toBytes(true);
                break;
            default:
                throw new RuntimeException("Not expected: " + i);
        }
        ColumnMapping colMap = columnsMapping[i];
        kvs.add(new KeyValue(rowKey, colMap.familyNameBytes, colMap.qualifierNameBytes, value));
    }
    Collections.sort(kvs, KeyValue.COMPARATOR);
    Result result = Result.create(kvs);
    o.init(result);
    List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
    for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = ((StructObjectInspector) oi).getStructFieldData(o, fieldRefs.get(i));
        assert (fieldData != null);
        assert (fieldData instanceof LazyPrimitive<?, ?>);
        Writable writable = ((LazyPrimitive<?, ?>) fieldData).getWritableObject();
        switch(i) {
            case 0:
                Text text = new Text("row-key");
                assertEquals(text, writable);
                break;
            case 1:
                IntWritable iw = new IntWritable(1);
                assertEquals(iw, writable);
                break;
            case 2:
                ByteWritable bw = new ByteWritable((byte) 1);
                assertEquals(bw, writable);
                break;
            case 3:
                ShortWritable sw = new ShortWritable((short) 1);
                assertEquals(sw, writable);
                break;
            case 4:
                LongWritable lw = new LongWritable(1);
                assertEquals(lw, writable);
                break;
            case 5:
                FloatWritable fw = new FloatWritable(1.0F);
                assertEquals(fw, writable);
                break;
            case 6:
                DoubleWritable dw = new DoubleWritable(1.0);
                assertEquals(dw, writable);
                break;
            case 7:
                Text t = new Text("Hadoop, Hive, with HBase storage handler.");
                assertEquals(t, writable);
                break;
            case 8:
                BooleanWritable boolWritable = new BooleanWritable(true);
                assertEquals(boolWritable, writable);
                break;
            default:
                fail("Error: Unanticipated value in deserializing fields for HBaseSerDe.");
                break;
        }
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) Result(org.apache.hadoop.hbase.client.Result) LongWritable(org.apache.hadoop.io.LongWritable) Cell(org.apache.hadoop.hbase.Cell) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapping(org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping) IntWritable(org.apache.hadoop.io.IntWritable) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)81 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)56 IntWritable (org.apache.hadoop.io.IntWritable)56 LongWritable (org.apache.hadoop.io.LongWritable)52 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)47 Text (org.apache.hadoop.io.Text)47 Test (org.junit.Test)44 FloatWritable (org.apache.hadoop.io.FloatWritable)40 BooleanWritable (org.apache.hadoop.io.BooleanWritable)38 BytesWritable (org.apache.hadoop.io.BytesWritable)30 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)29 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)27 ArrayList (java.util.ArrayList)22 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)22 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)21 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)21 HiveCharWritable (org.apache.hadoop.hive.serde2.io.HiveCharWritable)18 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)17 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)17 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)17