Search in sources :

Example 96 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class JsonSerDe method initialize.

@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
    super.initialize(configuration, tableProperties, partitionProperties);
    jsonSerde.initialize(configuration, tableProperties, partitionProperties, false);
    StructTypeInfo rowTypeInfo = jsonSerde.getTypeInfo();
    cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
    try {
        schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
        log.debug("schema : {}", schema);
        log.debug("fields : {}", schema.getFieldNames());
    } catch (HCatException e) {
        throw new SerDeException(e);
    }
}
Also used : HCatException(org.apache.hive.hcatalog.common.HCatException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 97 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class HCatSchemaUtils method getHCatFieldSchema.

private static HCatFieldSchema getHCatFieldSchema(String fieldName, TypeInfo fieldTypeInfo, String comment) throws HCatException {
    Category typeCategory = fieldTypeInfo.getCategory();
    HCatFieldSchema hCatFieldSchema;
    if (Category.PRIMITIVE == typeCategory) {
        hCatFieldSchema = new HCatFieldSchema(fieldName, (PrimitiveTypeInfo) fieldTypeInfo, comment);
    } else if (Category.STRUCT == typeCategory) {
        HCatSchema subSchema = constructHCatSchema((StructTypeInfo) fieldTypeInfo);
        hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.STRUCT, subSchema, comment);
    } else if (Category.LIST == typeCategory) {
        HCatSchema subSchema = getHCatSchema(((ListTypeInfo) fieldTypeInfo).getListElementTypeInfo());
        hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.ARRAY, subSchema, comment);
    } else if (Category.MAP == typeCategory) {
        HCatSchema subSchema = getHCatSchema(((MapTypeInfo) fieldTypeInfo).getMapValueTypeInfo());
        hCatFieldSchema = HCatFieldSchema.createMapTypeFieldSchema(fieldName, (PrimitiveTypeInfo) ((MapTypeInfo) fieldTypeInfo).getMapKeyTypeInfo(), subSchema, comment);
    } else {
        throw new TypeNotPresentException(fieldTypeInfo.getTypeName(), null);
    }
    return hCatFieldSchema;
}
Also used : Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 98 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class TestAvroObjectInspectorGenerator method canHandleRecords.

@Test
public void canHandleRecords() throws SerDeException {
    Schema s = AvroSerdeUtils.getSchemaFor(RECORD_SCHEMA);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    // Column names
    assertEquals(1, aoig.getColumnNames().size());
    assertEquals("aRecord", aoig.getColumnNames().get(0));
    // Column types
    assertEquals(1, aoig.getColumnTypes().size());
    TypeInfo typeInfo = aoig.getColumnTypes().get(0);
    assertEquals(ObjectInspector.Category.STRUCT, typeInfo.getCategory());
    assertTrue(typeInfo instanceof StructTypeInfo);
    StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
    // Check individual elements of subrecord
    List<String> allStructFieldNames = structTypeInfo.getAllStructFieldNames();
    List<TypeInfo> allStructFieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
    assertEquals(allStructFieldNames.size(), 3);
    String[] names = new String[] { "int1", "boolean1", "long1" };
    String[] typeInfoStrings = new String[] { "int", "boolean", "bigint" };
    for (int i = 0; i < allStructFieldNames.size(); i++) {
        assertEquals("Fieldname " + allStructFieldNames.get(i) + " doesn't match expected " + names[i], names[i], allStructFieldNames.get(i));
        assertEquals("Typeinfo " + allStructFieldTypeInfos.get(i) + " doesn't match expected " + typeInfoStrings[i], typeInfoStrings[i], allStructFieldTypeInfos.get(i).getTypeName());
    }
}
Also used : Schema(org.apache.avro.Schema) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Test(org.junit.Test)

Example 99 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class TestTypeInfoToSchema method createAvroStructSchema.

@Test
public void createAvroStructSchema() throws IOException {
    StructTypeInfo structTypeInfo = new StructTypeInfo();
    ArrayList<String> names = new ArrayList<String>();
    names.add("field1");
    names.add("field2");
    names.add("field3");
    names.add("field4");
    names.add("field5");
    names.add("field6");
    names.add("field7");
    names.add("field8");
    names.add("field9");
    names.add("field10");
    names.add("field11");
    names.add("field12");
    names.add("field13");
    names.add("field14");
    structTypeInfo.setAllStructFieldNames(names);
    ArrayList<TypeInfo> typeInfos = new ArrayList<TypeInfo>();
    typeInfos.add(STRING);
    typeInfos.add(CHAR);
    typeInfos.add(VARCHAR);
    typeInfos.add(BINARY);
    typeInfos.add(BYTE);
    typeInfos.add(SHORT);
    typeInfos.add(INT);
    typeInfos.add(LONG);
    typeInfos.add(FLOAT);
    typeInfos.add(DOUBLE);
    typeInfos.add(BOOLEAN);
    typeInfos.add(DECIMAL);
    typeInfos.add(DATE);
    typeInfos.add(VOID);
    structTypeInfo.setAllStructFieldTypeInfos(typeInfos);
    LOGGER.info("structTypeInfo is " + structTypeInfo);
    final String specificSchema = IOUtils.toString(Resources.getResource("avro-struct.avsc").openStream()).replace(lineSeparator, "");
    String expectedSchema = genSchema(specificSchema);
    Assert.assertEquals("Test for struct's avro schema failed", expectedSchema, getAvroSchemaString(structTypeInfo));
}
Also used : ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Test(org.junit.Test)

Example 100 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class VerifyFast method serializeWrite.

public static void serializeWrite(SerializeWrite serializeWrite, TypeInfo typeInfo, Object object) throws IOException {
    if (object == null) {
        serializeWrite.writeNull();
        return;
    }
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
                switch(primitiveTypeInfo.getPrimitiveCategory()) {
                    case BOOLEAN:
                        {
                            boolean value = ((BooleanWritable) object).get();
                            serializeWrite.writeBoolean(value);
                        }
                        break;
                    case BYTE:
                        {
                            byte value = ((ByteWritable) object).get();
                            serializeWrite.writeByte(value);
                        }
                        break;
                    case SHORT:
                        {
                            short value = ((ShortWritable) object).get();
                            serializeWrite.writeShort(value);
                        }
                        break;
                    case INT:
                        {
                            int value = ((IntWritable) object).get();
                            serializeWrite.writeInt(value);
                        }
                        break;
                    case LONG:
                        {
                            long value = ((LongWritable) object).get();
                            serializeWrite.writeLong(value);
                        }
                        break;
                    case FLOAT:
                        {
                            float value = ((FloatWritable) object).get();
                            serializeWrite.writeFloat(value);
                        }
                        break;
                    case DOUBLE:
                        {
                            double value = ((DoubleWritable) object).get();
                            serializeWrite.writeDouble(value);
                        }
                        break;
                    case STRING:
                        {
                            Text value = (Text) object;
                            byte[] stringBytes = value.getBytes();
                            int stringLength = stringBytes.length;
                            serializeWrite.writeString(stringBytes, 0, stringLength);
                        }
                        break;
                    case CHAR:
                        {
                            HiveChar value = ((HiveCharWritable) object).getHiveChar();
                            serializeWrite.writeHiveChar(value);
                        }
                        break;
                    case VARCHAR:
                        {
                            HiveVarchar value = ((HiveVarcharWritable) object).getHiveVarchar();
                            serializeWrite.writeHiveVarchar(value);
                        }
                        break;
                    case DECIMAL:
                        {
                            HiveDecimal value = ((HiveDecimalWritable) object).getHiveDecimal();
                            DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
                            serializeWrite.writeHiveDecimal(value, decTypeInfo.scale());
                        }
                        break;
                    case DATE:
                        {
                            Date value = ((DateWritableV2) object).get();
                            serializeWrite.writeDate(value);
                        }
                        break;
                    case TIMESTAMP:
                        {
                            Timestamp value = ((TimestampWritableV2) object).getTimestamp();
                            serializeWrite.writeTimestamp(value);
                        }
                        break;
                    case INTERVAL_YEAR_MONTH:
                        {
                            HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth();
                            serializeWrite.writeHiveIntervalYearMonth(value);
                        }
                        break;
                    case INTERVAL_DAY_TIME:
                        {
                            HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime();
                            serializeWrite.writeHiveIntervalDayTime(value);
                        }
                        break;
                    case BINARY:
                        {
                            BytesWritable byteWritable = (BytesWritable) object;
                            byte[] binaryBytes = byteWritable.getBytes();
                            int length = byteWritable.getLength();
                            serializeWrite.writeBinary(binaryBytes, 0, length);
                        }
                        break;
                    default:
                        throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory().name());
                }
            }
            break;
        case LIST:
            {
                ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
                TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
                ArrayList<Object> elements = (ArrayList<Object>) object;
                serializeWrite.beginList(elements);
                boolean isFirst = true;
                for (Object elementObject : elements) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        serializeWrite.separateList();
                    }
                    if (elementObject == null) {
                        serializeWrite.writeNull();
                    } else {
                        serializeWrite(serializeWrite, elementTypeInfo, elementObject);
                    }
                }
                serializeWrite.finishList();
            }
            break;
        case MAP:
            {
                MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
                TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
                TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
                HashMap<Object, Object> hashMap = (HashMap<Object, Object>) object;
                serializeWrite.beginMap(hashMap);
                boolean isFirst = true;
                for (Entry<Object, Object> entry : hashMap.entrySet()) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        serializeWrite.separateKeyValuePair();
                    }
                    if (entry.getKey() == null) {
                        serializeWrite.writeNull();
                    } else {
                        serializeWrite(serializeWrite, keyTypeInfo, entry.getKey());
                    }
                    serializeWrite.separateKey();
                    if (entry.getValue() == null) {
                        serializeWrite.writeNull();
                    } else {
                        serializeWrite(serializeWrite, valueTypeInfo, entry.getValue());
                    }
                }
                serializeWrite.finishMap();
            }
            break;
        case STRUCT:
            {
                StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
                List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
                List<Object> fieldValues = (List<Object>) object;
                final int size = fieldValues.size();
                serializeWrite.beginStruct(fieldValues);
                boolean isFirst = true;
                for (int i = 0; i < size; i++) {
                    if (isFirst) {
                        isFirst = false;
                    } else {
                        serializeWrite.separateStruct();
                    }
                    serializeWrite(serializeWrite, fieldTypeInfos.get(i), fieldValues.get(i));
                }
                serializeWrite.finishStruct();
            }
            break;
        case UNION:
            {
                UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
                List<TypeInfo> fieldTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
                final int size = fieldTypeInfos.size();
                StandardUnion standardUnion = (StandardUnion) object;
                byte tag = standardUnion.getTag();
                serializeWrite.beginUnion(tag);
                serializeWrite(serializeWrite, fieldTypeInfos.get(tag), standardUnion.getObject());
                serializeWrite.finishUnion();
            }
            break;
        default:
            throw new Error("Unknown category " + typeInfo.getCategory().name());
    }
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) ArrayList(java.util.ArrayList) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Entry(java.util.Map.Entry) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ArrayList(java.util.ArrayList) List(java.util.List) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) Text(org.apache.hadoop.io.Text) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Date(org.apache.hadoop.hive.common.type.Date) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StandardUnion(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Aggregations

StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)100 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)78 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)59 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)54 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)54 ArrayList (java.util.ArrayList)42 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)32 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)30 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)24 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)23 List (java.util.List)21 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)21 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)17 IntWritable (org.apache.hadoop.io.IntWritable)12 Text (org.apache.hadoop.io.Text)12 BytesWritable (org.apache.hadoop.io.BytesWritable)11 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)10 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)10 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)10 BooleanWritable (org.apache.hadoop.io.BooleanWritable)10