Search in sources :

Example 21 with TypeInfoUtils.getTypeInfoFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.

the class AccumuloRowSerializer method serializeRowId.

/**
   * Serialize an Accumulo rowid
   */
protected byte[] serializeRowId(Object rowId, StructField rowIdField, ColumnMapping rowIdMapping) throws IOException {
    if (rowId == null) {
        throw new IOException("Accumulo rowId cannot be NULL");
    }
    // Reset the buffer we're going to use
    output.reset();
    ObjectInspector rowIdFieldOI = rowIdField.getFieldObjectInspector();
    String rowIdMappingType = rowIdMapping.getColumnType();
    TypeInfo rowIdTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(rowIdMappingType);
    if (!rowIdFieldOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && rowIdTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) {
        // we always serialize the String type using the escaped algorithm for LazyString
        writeString(output, SerDeUtils.getJSONString(rowId, rowIdFieldOI), PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        return output.toByteArray();
    }
    // use the serialization option switch to write primitive values as either a variable
    // length UTF8 string or a fixed width bytes if serializing in binary format
    getSerializedValue(rowIdFieldOI, rowId, output, rowIdMapping);
    return output.toByteArray();
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) IOException(java.io.IOException) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 22 with TypeInfoUtils.getTypeInfoFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.

the class TestHCatSchemaUtils method testSimpleOperation.

public void testSimpleOperation() throws Exception {
    String typeString = "struct<name:string,studentid:int," + "contact:struct<phNo:string,email:string>," + "currently_registered_courses:array<string>," + "current_grades:map<string,string>," + "phNos:array<struct<phNo:string,type:string>>,blah:array<int>>";
    TypeInfo ti = TypeInfoUtils.getTypeInfoFromTypeString(typeString);
    HCatSchema hsch = HCatSchemaUtils.getHCatSchemaFromTypeString(typeString);
    LOG.info("Type name : {}", ti.getTypeName());
    LOG.info("HCatSchema : {}", hsch);
    assertEquals(hsch.size(), 1);
    // Looks like HCatFieldSchema.getTypeString() lower-cases its results
    assertEquals(ti.getTypeName().toLowerCase(), hsch.get(0).getTypeString());
    assertEquals(hsch.get(0).getTypeString(), typeString.toLowerCase());
}
Also used : TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 23 with TypeInfoUtils.getTypeInfoFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project drill by apache.

the class HiveAbstractReader method init.

private void init() throws ExecutionSetupException {
    final JobConf job = new JobConf(hiveConf);
    // Get the configured default val
    defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname);
    Properties tableProperties;
    try {
        tableProperties = HiveUtilities.getTableMetadata(table);
        final Properties partitionProperties = (partition == null) ? tableProperties : HiveUtilities.getPartitionMetadata(partition, table);
        HiveUtilities.addConfToJob(job, partitionProperties);
        final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(), tableProperties);
        final StructObjectInspector tableOI = getStructOI(tableSerDe);
        if (partition != null) {
            partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(), partitionProperties);
            partitionOI = getStructOI(partitionSerDe);
            finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI);
            partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI);
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table));
        } else {
            // For non-partitioned tables, there is no need to create converter as there are no schema changes expected.
            partitionSerDe = tableSerDe;
            partitionOI = tableOI;
            partTblObjectInspectorConverter = null;
            finalOI = tableOI;
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table));
        }
        if (logger.isTraceEnabled()) {
            for (StructField field : finalOI.getAllStructFieldRefs()) {
                logger.trace("field in finalOI: {}", field.getClass().getName());
            }
            logger.trace("partitionSerDe class is {} {}", partitionSerDe.getClass().getName());
        }
        // Get list of partition column names
        final List<String> partitionNames = Lists.newArrayList();
        for (FieldSchema field : table.getPartitionKeys()) {
            partitionNames.add(field.getName());
        }
        // We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore
        // may not contain the schema, instead it is derived from other sources such as table properties or external file.
        // SerDe object knows how to get the schema with all the config and table properties passed in initialization.
        // ObjectInspector created from the SerDe object has the schema.
        final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI);
        final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames();
        // Select list of columns for project pushdown into Hive SerDe readers.
        final List<Integer> columnIds = Lists.newArrayList();
        if (isStarQuery()) {
            selectedColumnNames = tableColumnNames;
            for (int i = 0; i < selectedColumnNames.size(); i++) {
                columnIds.add(i);
            }
            selectedPartitionNames = partitionNames;
        } else {
            selectedColumnNames = Lists.newArrayList();
            for (SchemaPath field : getColumns()) {
                String columnName = field.getRootSegment().getPath();
                if (partitionNames.contains(columnName)) {
                    selectedPartitionNames.add(columnName);
                } else {
                    columnIds.add(tableColumnNames.indexOf(columnName));
                    selectedColumnNames.add(columnName);
                }
            }
        }
        ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames);
        for (String columnName : selectedColumnNames) {
            StructField fieldRef = finalOI.getStructFieldRef(columnName);
            selectedStructFieldRefs.add(fieldRef);
            ObjectInspector fieldOI = fieldRef.getFieldObjectInspector();
            TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName());
            selectedColumnObjInspectors.add(fieldOI);
            selectedColumnTypes.add(typeInfo);
            selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext));
        }
        for (int i = 0; i < selectedColumnNames.size(); ++i) {
            logger.trace("inspector:typeName={}, className={}, TypeInfo: {}, converter:{}", selectedColumnObjInspectors.get(i).getTypeName(), selectedColumnObjInspectors.get(i).getClass().getName(), selectedColumnTypes.get(i).toString(), selectedColumnFieldConverters.get(i).getClass().getName());
        }
        for (int i = 0; i < table.getPartitionKeys().size(); i++) {
            FieldSchema field = table.getPartitionKeys().get(i);
            if (selectedPartitionNames.contains(field.getName())) {
                TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
                selectedPartitionTypes.add(pType);
                if (partition != null) {
                    selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType, partition.getValues().get(i), defaultPartitionValue));
                }
            }
        }
    } catch (Exception e) {
        throw new ExecutionSetupException("Failure while initializing Hive Reader " + this.getClass().getName(), e);
    }
    if (!empty) {
        try {
            reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat().getRecordReader(inputSplit, job, Reporter.NULL);
            logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(), inputSplit.toString());
        } catch (Exception e) {
            throw new ExecutionSetupException("Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
        }
        internalInit(tableProperties, reader);
    }
}
Also used : SerDe(org.apache.hadoop.hive.serde2.SerDe) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) Properties(java.util.Properties) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) ExecutionException(java.util.concurrent.ExecutionException) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) SchemaPath(org.apache.drill.common.expression.SchemaPath) JobConf(org.apache.hadoop.mapred.JobConf) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 24 with TypeInfoUtils.getTypeInfoFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.

the class TypeInfoUtils method typeInfosFromStructObjectInspector.

public static ArrayList<TypeInfo> typeInfosFromStructObjectInspector(StructObjectInspector structObjectInspector) {
    List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
    ArrayList<TypeInfo> typeInfoList = new ArrayList<TypeInfo>(fields.size());
    for (StructField field : fields) {
        TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName());
        typeInfoList.add(typeInfo);
    }
    return typeInfoList;
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList)

Example 25 with TypeInfoUtils.getTypeInfoFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.

the class TestStandardObjectInspectors method testStandardUnionObjectInspector.

@SuppressWarnings("unchecked")
public void testStandardUnionObjectInspector() throws Throwable {
    try {
        ArrayList<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>();
        // add primitive types
        objectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
        objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        objectInspectors.add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
        // add a list
        objectInspectors.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector));
        // add a map
        objectInspectors.add(ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.javaStringObjectInspector));
        // add a struct
        List<String> fieldNames = new ArrayList<String>();
        fieldNames.add("myDouble");
        fieldNames.add("myLong");
        ArrayList<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>();
        fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
        fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaLongObjectInspector);
        objectInspectors.add(ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors));
        StandardUnionObjectInspector uoi1 = ObjectInspectorFactory.getStandardUnionObjectInspector(objectInspectors);
        StandardUnionObjectInspector uoi2 = ObjectInspectorFactory.getStandardUnionObjectInspector((ArrayList<ObjectInspector>) objectInspectors.clone());
        assertEquals(uoi1, uoi2);
        assertEquals(ObjectInspectorUtils.getObjectInspectorName(uoi1), ObjectInspectorUtils.getObjectInspectorName(uoi2));
        assertTrue(ObjectInspectorUtils.compareTypes(uoi1, uoi2));
        // compareSupported returns false because Union can contain
        // an object of Map
        assertFalse(ObjectInspectorUtils.compareSupported(uoi1));
        // construct unionObjectInspector without Map field.
        ArrayList<ObjectInspector> ois = (ArrayList<ObjectInspector>) objectInspectors.clone();
        ois.set(4, PrimitiveObjectInspectorFactory.javaIntObjectInspector);
        assertTrue(ObjectInspectorUtils.compareSupported(ObjectInspectorFactory.getStandardUnionObjectInspector(ois)));
        // metadata
        assertEquals(Category.UNION, uoi1.getCategory());
        List<? extends ObjectInspector> uois = uoi1.getObjectInspectors();
        assertEquals(6, uois.size());
        for (int i = 0; i < 6; i++) {
            assertEquals(objectInspectors.get(i), uois.get(i));
        }
        StringBuilder unionTypeName = new StringBuilder();
        unionTypeName.append("uniontype<");
        for (int i = 0; i < uois.size(); i++) {
            if (i > 0) {
                unionTypeName.append(",");
            }
            unionTypeName.append(uois.get(i).getTypeName());
        }
        unionTypeName.append(">");
        assertEquals(unionTypeName.toString(), uoi1.getTypeName());
        // TypeInfo
        TypeInfo typeInfo1 = TypeInfoUtils.getTypeInfoFromObjectInspector(uoi1);
        assertEquals(Category.UNION, typeInfo1.getCategory());
        assertEquals(UnionTypeInfo.class.getName(), typeInfo1.getClass().getName());
        assertEquals(typeInfo1.getTypeName(), uoi1.getTypeName());
        assertEquals(typeInfo1, TypeInfoUtils.getTypeInfoFromTypeString(uoi1.getTypeName()));
        TypeInfo typeInfo2 = TypeInfoUtils.getTypeInfoFromObjectInspector(uoi2);
        assertEquals(typeInfo1, typeInfo2);
        assertEquals(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo1), TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo2));
        assertEquals(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo1), TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo2));
        // null
        assertNull(uoi1.getField(null));
        assertEquals(-1, uoi1.getTag(null));
        // Union
        UnionObject union = new StandardUnion((byte) 0, 1);
        assertEquals(0, uoi1.getTag(union));
        assertEquals(1, uoi1.getField(union));
        assertEquals("{0:1}", SerDeUtils.getJSONString(union, uoi1));
        assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 0, 1), uoi2));
        assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(1));
        union = new StandardUnion((byte) 1, "two");
        assertEquals(1, uoi1.getTag(union));
        assertEquals("two", uoi1.getField(union));
        assertEquals("{1:\"two\"}", SerDeUtils.getJSONString(union, uoi1));
        assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 1, "two"), uoi2));
        assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals("two"));
        union = new StandardUnion((byte) 2, true);
        assertEquals(2, uoi1.getTag(union));
        assertEquals(true, uoi1.getField(union));
        assertEquals("{2:true}", SerDeUtils.getJSONString(union, uoi1));
        assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 2, true), uoi2));
        assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(true));
        ArrayList<Integer> iList = new ArrayList<Integer>();
        iList.add(4);
        iList.add(5);
        union = new StandardUnion((byte) 3, iList);
        assertEquals(3, uoi1.getTag(union));
        assertEquals(iList, uoi1.getField(union));
        assertEquals("{3:[4,5]}", SerDeUtils.getJSONString(union, uoi1));
        assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 3, iList.clone()), uoi2));
        assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(iList));
        HashMap<Integer, String> map = new HashMap<Integer, String>();
        map.put(6, "six");
        map.put(7, "seven");
        map.put(8, "eight");
        union = new StandardUnion((byte) 4, map);
        assertEquals(4, uoi1.getTag(union));
        assertEquals(map, uoi1.getField(union));
        assertEquals("{4:{6:\"six\",7:\"seven\",8:\"eight\"}}", SerDeUtils.getJSONString(union, uoi1));
        Throwable th = null;
        try {
            ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 4, map.clone()), uoi2, null);
        } catch (Throwable t) {
            th = t;
        }
        assertNotNull(th);
        assertEquals("Compare on map type not supported!", th.getMessage());
        assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(map));
        ArrayList<Object> struct = new ArrayList<Object>(2);
        struct.add(9.0);
        struct.add(10L);
        union = new StandardUnion((byte) 5, struct);
        assertEquals(5, uoi1.getTag(union));
        assertEquals(struct, uoi1.getField(union));
        assertEquals("{5:{\"mydouble\":9.0,\"mylong\":10}}", SerDeUtils.getJSONString(union, uoi1));
        assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 5, struct.clone()), uoi2));
        assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(struct));
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) StandardUnion(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)41 ArrayList (java.util.ArrayList)22 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)22 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)20 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)14 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)9 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)9 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)9 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)7 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)7 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)7 HashMap (java.util.HashMap)6 Properties (java.util.Properties)5 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)5 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)5 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)5 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)5 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)4 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)4