Search in sources :

Example 31 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class TestAvroDeserializer method canDeserializeArrays.

@Test
public void canDeserializeArrays() throws SerDeException, IOException {
    Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.ARRAY_WITH_PRIMITIVE_ELEMENT_TYPE);
    GenericData.Record record = new GenericData.Record(s);
    List<String> list = new ArrayList<String>();
    list.add("Eccleston");
    list.add("Tennant");
    list.add("Smith");
    record.put("anArray", list);
    assertTrue(GENERIC_DATA.validate(s, record));
    System.out.println("Array-backed record = " + record);
    AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    AvroDeserializer de = new AvroDeserializer();
    ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
    assertEquals(1, row.size());
    Object theArrayObject = row.get(0);
    assertTrue(theArrayObject instanceof List);
    List theList = (List) theArrayObject;
    // Verify the raw object that's been created
    assertEquals("Eccleston", theList.get(0));
    assertEquals("Tennant", theList.get(1));
    assertEquals("Smith", theList.get(2));
    // Now go the correct way, through objectinspectors
    StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
    StructField fieldRefToArray = oi.getStructFieldRef("anArray");
    Object anArrayData = oi.getStructFieldData(row, fieldRefToArray);
    StandardListObjectInspector anArrayOI = (StandardListObjectInspector) fieldRefToArray.getFieldObjectInspector();
    assertEquals(3, anArrayOI.getListLength(anArrayData));
    JavaStringObjectInspector elementOI = (JavaStringObjectInspector) anArrayOI.getListElementObjectInspector();
    Object firstElement = anArrayOI.getListElement(anArrayData, 0);
    assertEquals("Eccleston", elementOI.getPrimitiveJavaObject(firstElement));
    assertTrue(firstElement instanceof String);
    Object secondElement = anArrayOI.getListElement(anArrayData, 1);
    assertEquals("Tennant", elementOI.getPrimitiveJavaObject(secondElement));
    assertTrue(secondElement instanceof String);
    Object thirdElement = anArrayOI.getListElement(anArrayData, 2);
    assertEquals("Smith", elementOI.getPrimitiveJavaObject(thirdElement));
    assertTrue(thirdElement instanceof String);
}
Also used : JavaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList) List(java.util.List) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) Test(org.junit.Test)

Example 32 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class SQLOperation method decodeFromString.

private RowSet decodeFromString(List<Object> rows, RowSet rowSet) throws SQLException, SerDeException {
    getSerDe();
    StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();
    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
    Object[] deserializedFields = new Object[fieldRefs.size()];
    Object rowObj;
    ObjectInspector fieldOI;
    int protocol = getProtocolVersion().getValue();
    for (Object rowString : rows) {
        try {
            rowObj = serde.deserialize(new BytesWritable(((String) rowString).getBytes("UTF-8")));
        } catch (UnsupportedEncodingException e) {
            throw new SerDeException(e);
        }
        for (int i = 0; i < fieldRefs.size(); i++) {
            StructField fieldRef = fieldRefs.get(i);
            fieldOI = fieldRef.getFieldObjectInspector();
            Object fieldData = soi.getStructFieldData(rowObj, fieldRef);
            deserializedFields[i] = SerDeUtils.toThriftPayload(fieldData, fieldOI, protocol);
        }
        rowSet.addRow(deserializedFields);
    }
    return rowSet;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) UnsupportedEncodingException(java.io.UnsupportedEncodingException) BytesWritable(org.apache.hadoop.io.BytesWritable) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 33 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class HBaseRowSerializer method serialize.

private boolean serialize(Object obj, ObjectInspector objInspector, int level, ByteStream.Output ss) throws IOException {
    switch(objInspector.getCategory()) {
        case PRIMITIVE:
            LazyUtils.writePrimitiveUTF8(ss, obj, (PrimitiveObjectInspector) objInspector, escaped, escapeChar, needsEscape);
            return true;
        case LIST:
            char separator = (char) separators[level];
            ListObjectInspector loi = (ListObjectInspector) objInspector;
            List<?> list = loi.getList(obj);
            ObjectInspector eoi = loi.getListElementObjectInspector();
            if (list == null) {
                return false;
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        ss.write(separator);
                    }
                    serialize(list.get(i), eoi, level + 1, ss);
                }
            }
            return true;
        case MAP:
            char sep = (char) separators[level];
            char keyValueSeparator = (char) separators[level + 1];
            MapObjectInspector moi = (MapObjectInspector) objInspector;
            ObjectInspector koi = moi.getMapKeyObjectInspector();
            ObjectInspector voi = moi.getMapValueObjectInspector();
            Map<?, ?> map = moi.getMap(obj);
            if (map == null) {
                return false;
            } else {
                boolean first = true;
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    if (first) {
                        first = false;
                    } else {
                        ss.write(sep);
                    }
                    serialize(entry.getKey(), koi, level + 2, ss);
                    if (entry.getValue() != null) {
                        ss.write(keyValueSeparator);
                        serialize(entry.getValue(), voi, level + 2, ss);
                    }
                }
            }
            return true;
        case STRUCT:
            sep = (char) separators[level];
            StructObjectInspector soi = (StructObjectInspector) objInspector;
            List<? extends StructField> fields = soi.getAllStructFieldRefs();
            list = soi.getStructFieldsDataAsList(obj);
            if (list == null) {
                return false;
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        ss.write(sep);
                    }
                    serialize(list.get(i), fields.get(i).getFieldObjectInspector(), level + 1, ss);
                }
            }
            return true;
        case UNION:
            {
                // union type currently not totally supported. See HIVE-2390
                return false;
            }
        default:
            throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
    }
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 34 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class HBaseRowSerializer method serializeKeyField.

byte[] serializeKeyField(Object keyValue, StructField keyField, ColumnMapping keyMapping) throws IOException {
    if (keyValue == null) {
        throw new IOException("HBase row key cannot be NULL");
    }
    ObjectInspector keyFieldOI = keyField.getFieldObjectInspector();
    if (!keyFieldOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && keyMapping.isCategory(ObjectInspector.Category.PRIMITIVE)) {
        // we always serialize the String type using the escaped algorithm for LazyString
        return serialize(SerDeUtils.getJSONString(keyValue, keyFieldOI), PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false);
    }
    // use the serialization option switch to write primitive values as either a variable
    // length UTF8 string or a fixed width bytes if serializing in binary format
    boolean writeBinary = keyMapping.binaryStorage.get(0);
    return serialize(keyValue, keyFieldOI, 1, writeBinary);
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) IOException(java.io.IOException)

Example 35 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class HBaseRowSerializer method serializeField.

private void serializeField(Object value, StructField field, ColumnMapping colMap, Put put) throws IOException {
    if (value == null) {
        // a null object, we do not serialize it
        return;
    }
    // Get the field objectInspector and the field object.
    ObjectInspector foi = field.getFieldObjectInspector();
    // If the field corresponds to a column family in HBase
    if (colMap.qualifierName == null) {
        MapObjectInspector moi = (MapObjectInspector) foi;
        Map<?, ?> map = moi.getMap(value);
        if (map == null) {
            return;
        }
        ObjectInspector koi = moi.getMapKeyObjectInspector();
        ObjectInspector voi = moi.getMapValueObjectInspector();
        for (Map.Entry<?, ?> entry : map.entrySet()) {
            // Get the Key
            // Map keys are required to be primitive and may be serialized in binary format
            byte[] columnQualifierBytes = serialize(entry.getKey(), koi, 3, colMap.binaryStorage.get(0));
            if (columnQualifierBytes == null) {
                continue;
            }
            // Map values may be serialized in binary format when they are primitive and binary
            // serialization is the option selected
            byte[] bytes = serialize(entry.getValue(), voi, 3, colMap.binaryStorage.get(1));
            if (bytes == null) {
                continue;
            }
            put.add(colMap.familyNameBytes, columnQualifierBytes, bytes);
        }
    } else {
        byte[] bytes;
        // delimited way.
        if (!foi.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && colMap.isCategory(ObjectInspector.Category.PRIMITIVE)) {
            // we always serialize the String type using the escaped algorithm for LazyString
            bytes = serialize(SerDeUtils.getJSONString(value, foi), PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false);
        } else {
            // use the serialization option switch to write primitive values as either a variable
            // length UTF8 string or a fixed width bytes if serializing in binary format
            bytes = serialize(value, foi, 1, colMap.binaryStorage.get(0));
        }
        if (bytes == null) {
            return;
        }
        put.add(colMap.familyNameBytes, colMap.qualifierNameBytes, bytes);
    }
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) Map(java.util.Map)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)136 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)97 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)86 ArrayList (java.util.ArrayList)67 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)48 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)37 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)33 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)30 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)26 StandardStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)25 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)25 Test (org.junit.Test)24 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)22 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)21 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)21 List (java.util.List)19 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)19 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)19 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)18 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)18