Search in sources :

Example 1 with StandardMapObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector in project hive by apache.

the class StatsUtils method getSizeOfComplexTypes.

/**
 * Get the size of complex data types
 * @return raw data size
 */
public static long getSizeOfComplexTypes(HiveConf conf, ObjectInspector oi) {
    long result = 0;
    int length = 0;
    int listEntries = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_LIST_NUM_ENTRIES);
    int mapEntries = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAP_NUM_ENTRIES);
    switch(oi.getCategory()) {
        case PRIMITIVE:
            String colTypeLowerCase = oi.getTypeName().toLowerCase();
            if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
                int avgColLen = (int) getAvgColLenOf(conf, oi, colTypeLowerCase);
                result += JavaDataModel.get().lengthForStringOfLength(avgColLen);
            } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
                int avgColLen = (int) getAvgColLenOf(conf, oi, colTypeLowerCase);
                result += JavaDataModel.get().lengthForByteArrayOfSize(avgColLen);
            } else {
                result += getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
            }
            break;
        case LIST:
            if (oi instanceof StandardConstantListObjectInspector) {
                // constant list projection of known length
                StandardConstantListObjectInspector scloi = (StandardConstantListObjectInspector) oi;
                List<?> value = scloi.getWritableConstantValue();
                if (null == value) {
                    length = 0;
                } else {
                    length = value.size();
                }
                // check if list elements are primitive or Objects
                ObjectInspector leoi = scloi.getListElementObjectInspector();
                if (leoi.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) {
                    int maxVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH);
                    result += getSizeOfPrimitiveTypeArraysFromType(leoi.getTypeName(), length, maxVarLen);
                } else {
                    result += JavaDataModel.get().lengthForObjectArrayOfSize(length);
                }
            } else {
                StandardListObjectInspector sloi = (StandardListObjectInspector) oi;
                // list overhead + (configured number of element in list * size of element)
                long elemSize = getSizeOfComplexTypes(conf, sloi.getListElementObjectInspector());
                result += JavaDataModel.get().arrayList() + (listEntries * elemSize);
            }
            break;
        case MAP:
            if (oi instanceof StandardConstantMapObjectInspector) {
                // constant map projection of known length
                StandardConstantMapObjectInspector scmoi = (StandardConstantMapObjectInspector) oi;
                result += getSizeOfMap(scmoi);
            } else {
                StandardMapObjectInspector smoi = (StandardMapObjectInspector) oi;
                result += getSizeOfComplexTypes(conf, smoi.getMapKeyObjectInspector());
                result += getSizeOfComplexTypes(conf, smoi.getMapValueObjectInspector());
                // hash map overhead
                result += JavaDataModel.get().hashMap(mapEntries);
            }
            break;
        case STRUCT:
            if (oi instanceof StandardConstantStructObjectInspector) {
                // constant map projection of known length
                StandardConstantStructObjectInspector scsoi = (StandardConstantStructObjectInspector) oi;
                result += getSizeOfStruct(scsoi);
            } else {
                StructObjectInspector soi = (StructObjectInspector) oi;
                // add constant object overhead for struct
                result += JavaDataModel.get().object();
                // add constant struct field names references overhead
                result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref();
                for (StructField field : soi.getAllStructFieldRefs()) {
                    result += getSizeOfComplexTypes(conf, field.getFieldObjectInspector());
                }
            }
            break;
        case UNION:
            UnionObjectInspector uoi = (UnionObjectInspector) oi;
            // add constant object overhead for union
            result += JavaDataModel.get().object();
            // add constant size for unions tags
            result += uoi.getObjectInspectors().size() * JavaDataModel.get().primitive1();
            for (ObjectInspector foi : uoi.getObjectInspectors()) {
                result += getSizeOfComplexTypes(conf, foi);
            }
            break;
        default:
            break;
    }
    return result;
}
Also used : WritableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector) WritableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) WritableTimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampLocalTZObjectInspector) StandardConstantListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector) StandardConstantMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) WritableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector) WritableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector) WritableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector) WritableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) WritableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector) WritableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) WritableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector) StandardConstantStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector) StandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) WritableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector) StandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector) StandardConstantMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StandardConstantListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) StandardConstantStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StandardConstantStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector)

Example 2 with StandardMapObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector in project cdap by caskdata.

the class StandardObjectInspectorsTest method testStandardMapObjectInspector.

@Test
public void testStandardMapObjectInspector() throws Throwable {
    try {
        StandardMapObjectInspector moi1 = ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaIntObjectInspector);
        StandardMapObjectInspector moi2 = ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaIntObjectInspector);
        Assert.assertEquals(moi1, moi2);
        // metadata
        Assert.assertEquals(Category.MAP, moi1.getCategory());
        Assert.assertEquals(moi1.getMapKeyObjectInspector(), PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        Assert.assertEquals(moi2.getMapValueObjectInspector(), PrimitiveObjectInspectorFactory.javaIntObjectInspector);
        // null
        Assert.assertNull(moi1.getMap(null));
        Assert.assertNull(moi1.getMapValueElement(null, null));
        Assert.assertNull(moi1.getMapValueElement(null, "nokey"));
        Assert.assertEquals(-1, moi1.getMapSize(null));
        Assert.assertEquals("map<" + PrimitiveObjectInspectorFactory.javaStringObjectInspector.getTypeName() + "," + PrimitiveObjectInspectorFactory.javaIntObjectInspector.getTypeName() + ">", moi1.getTypeName());
        // HashMap
        HashMap<String, Integer> map = new HashMap<>();
        map.put("one", 1);
        map.put("two", 2);
        map.put("three", 3);
        Assert.assertEquals(map, moi1.getMap(map));
        Assert.assertEquals(3, moi1.getMapSize(map));
        Assert.assertEquals(1, moi1.getMapValueElement(map, "one"));
        Assert.assertEquals(2, moi1.getMapValueElement(map, "two"));
        Assert.assertEquals(3, moi1.getMapValueElement(map, "three"));
        Assert.assertNull(moi1.getMapValueElement(map, null));
        Assert.assertNull(moi1.getMapValueElement(map, "null"));
        // Settable
        Object map3 = moi1.create();
        moi1.put(map3, "one", 1);
        moi1.put(map3, "two", 2);
        moi1.put(map3, "three", 3);
        Assert.assertEquals(map, map3);
        moi1.clear(map3);
        Assert.assertEquals(0, moi1.getMapSize(map3));
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : HashMap(java.util.HashMap) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) Test(org.junit.Test)

Aggregations

HashMap (java.util.HashMap)1 ConstantObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 StandardConstantListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector)1 StandardConstantMapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector)1 StandardConstantStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector)1 StandardListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector)1 StandardMapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector)1 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1 UnionObject (org.apache.hadoop.hive.serde2.objectinspector.UnionObject)1 UnionObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector)1 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)1 HiveCharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector)1 HiveVarcharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector)1 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)1 WritableBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector)1 WritableBooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector)1 WritableByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector)1 WritableDateObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector)1