Search in sources :

Example 1 with ObjectEstimator

use of org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator in project hive by apache.

the class TestIncrementalObjectSizeEstimator method testMetadata.

@Test
public void testMetadata() throws IOException {
    // Mostly tests that it doesn't crash.
    OrcStripeMetadata osm = OrcStripeMetadata.createDummy(0);
    HashMap<Class<?>, ObjectEstimator> map = IncrementalObjectSizeEstimator.createEstimators(osm);
    IncrementalObjectSizeEstimator.addEstimator("com.google.protobuf.LiteralByteString", map);
    ObjectEstimator root = map.get(OrcStripeMetadata.class);
    LOG.info("Estimated " + root.estimate(osm, map) + " for a dummy OSM");
    OrcBatchKey stripeKey = null;
    DummyMetadataReader mr = new DummyMetadataReader();
    mr.doStreamStep = false;
    mr.isEmpty = true;
    StripeInformation si = Mockito.mock(StripeInformation.class);
    Mockito.when(si.getNumberOfRows()).thenReturn(0L);
    osm = new OrcStripeMetadata(stripeKey, mr, si, null, null, null, null);
    LOG.info("Estimated " + root.estimate(osm, map) + " for an empty OSM");
    mr.doStreamStep = true;
    osm = new OrcStripeMetadata(stripeKey, mr, si, null, null, null, null);
    LOG.info("Estimated " + root.estimate(osm, map) + " for an empty OSM after serde");
    mr.isEmpty = false;
    stripeKey = new OrcBatchKey(0, 0, 0);
    osm = new OrcStripeMetadata(stripeKey, mr, si, null, null, null, null);
    LOG.info("Estimated " + root.estimate(osm, map) + " for a test OSM");
    osm.resetRowIndex();
    LOG.info("Estimated " + root.estimate(osm, map) + " for a test OSM w/o row index");
    mr.doStreamStep = true;
    osm = new OrcStripeMetadata(stripeKey, mr, si, null, null, null, null);
    LOG.info("Estimated " + root.estimate(osm, map) + " for a test OSM after serde");
    osm.resetRowIndex();
    LOG.info("Estimated " + root.estimate(osm, map) + " for a test OSM w/o row index after serde");
    OrcFileMetadata ofm = OrcFileMetadata.createDummy(0);
    map = IncrementalObjectSizeEstimator.createEstimators(ofm);
    IncrementalObjectSizeEstimator.addEstimator("com.google.protobuf.LiteralByteString", map);
    root = map.get(OrcFileMetadata.class);
    LOG.info("Estimated " + root.estimate(ofm, map) + " for a dummy OFM");
}
Also used : OrcFileMetadata(org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata) ObjectEstimator(org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator) OrcBatchKey(org.apache.hadoop.hive.ql.io.orc.encoded.OrcBatchKey) StripeInformation(org.apache.orc.StripeInformation) OrcStripeMetadata(org.apache.hadoop.hive.llap.io.metadata.OrcStripeMetadata) Test(org.junit.Test)

Example 2 with ObjectEstimator

use of org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator in project hive by apache.

the class IncrementalObjectSizeEstimator method createEstimators.

public static void createEstimators(Object rootObj, HashMap<Class<?>, ObjectEstimator> byType) {
    // Code initially inspired by Google ObjectExplorer.
    // TODO: roll in the direct-only estimators from fields. Various other optimizations possible.
    Deque<Object> stack = createWorkStack(rootObj, byType);
    while (!stack.isEmpty()) {
        Object obj = stack.pop();
        Class<?> clazz;
        if (obj instanceof Class<?>) {
            clazz = (Class<?>) obj;
            obj = null;
        } else {
            clazz = obj.getClass();
        }
        ObjectEstimator estimator = byType.get(clazz);
        assert estimator != null;
        if (!estimator.isFromClass && obj == null) {
            // The object was added later for the same class; see addToProcessing.
            continue;
        }
        if (estimator.isProcessed())
            continue;
        estimator.init();
        for (Field field : getAllFields(clazz, LlapCacheableBuffer.class)) {
            Class<?> fieldClass = field.getType();
            if (Modifier.isStatic(field.getModifiers()))
                continue;
            if (Class.class.isAssignableFrom(fieldClass))
                continue;
            if (fieldClass.isPrimitive()) {
                estimator.addPrimitive(fieldClass);
                continue;
            }
            if (Enum.class.isAssignableFrom(fieldClass)) {
                estimator.addEnum();
                continue;
            }
            boolean isArray = fieldClass.isArray();
            if (isArray && fieldClass.getComponentType().isPrimitive()) {
                estimator.addField(FieldType.PRIMITIVE_ARRAY, field);
                continue;
            }
            Object fieldObj = null;
            if (obj != null) {
                fieldObj = extractFieldObj(obj, field);
                fieldClass = determineRealClass(byType, stack, field, fieldClass, fieldObj);
            }
            if (isArray) {
                estimator.addField(FieldType.OBJECT_ARRAY, field);
                addArrayEstimator(byType, stack, field, fieldObj);
            } else if (Collection.class.isAssignableFrom(fieldClass)) {
                estimator.addField(FieldType.COLLECTION, field);
                addCollectionEstimator(byType, stack, field, fieldClass, fieldObj);
            } else if (Map.class.isAssignableFrom(fieldClass)) {
                estimator.addField(FieldType.MAP, field);
                addMapEstimator(byType, stack, field, fieldClass, fieldObj);
            } else {
                estimator.addField(FieldType.OTHER, field);
                addToProcessing(byType, stack, fieldObj, fieldClass);
            }
        }
        estimator.directSize = (int) JavaDataModel.alignUp(estimator.directSize, memoryModel.memoryAlign());
    }
}
Also used : Field(java.lang.reflect.Field) ObjectEstimator(org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator) Collection(java.util.Collection) AccessibleObject(java.lang.reflect.AccessibleObject)

Example 3 with ObjectEstimator

use of org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator in project hive by apache.

the class IncrementalObjectSizeEstimator method addToProcessing.

private static void addToProcessing(HashMap<Class<?>, ObjectEstimator> byType, Deque<Object> stack, Object element, Class<?> elementClass) {
    ObjectEstimator existing = byType.get(elementClass);
    if (existing != null && (!existing.isFromClass || (element == null)))
        return;
    if (elementClass.isInterface()) {
        if (element == null)
            return;
        elementClass = element.getClass();
    }
    byType.put(elementClass, new ObjectEstimator(element == null));
    stack.push(element == null ? elementClass : element);
}
Also used : ObjectEstimator(org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator)

Example 4 with ObjectEstimator

use of org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator in project hive by apache.

the class IncrementalObjectSizeEstimator method addHardcodedEstimators.

private static void addHardcodedEstimators(HashMap<Class<?>, ObjectEstimator> byType) {
    // Add hacks for well-known collections and maps to avoid estimating them.
    byType.put(ArrayList.class, new CollectionEstimator(memoryModel.arrayList(), memoryModel.ref()));
    byType.put(LinkedList.class, new CollectionEstimator(memoryModel.linkedListBase(), memoryModel.linkedListEntry()));
    byType.put(HashSet.class, new CollectionEstimator(memoryModel.hashSetBase(), memoryModel.hashSetEntry()));
    byType.put(HashMap.class, new CollectionEstimator(memoryModel.hashMapBase(), memoryModel.hashMapEntry()));
    // Add a hack for UnknownFieldSet because we assume it will never have anything (TODO: clear?)
    ObjectEstimator ufsEstimator = new ObjectEstimator(false);
    ufsEstimator.directSize = memoryModel.object() * 2 + memoryModel.ref();
    byType.put(UnknownFieldSet.class, ufsEstimator);
// TODO: 1-field hack for UnmodifiableCollection for protobuf too
}
Also used : ObjectEstimator(org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator)

Aggregations

ObjectEstimator (org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator)4 AccessibleObject (java.lang.reflect.AccessibleObject)1 Field (java.lang.reflect.Field)1 Collection (java.util.Collection)1 OrcFileMetadata (org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata)1 OrcStripeMetadata (org.apache.hadoop.hive.llap.io.metadata.OrcStripeMetadata)1 OrcBatchKey (org.apache.hadoop.hive.ql.io.orc.encoded.OrcBatchKey)1 StripeInformation (org.apache.orc.StripeInformation)1 Test (org.junit.Test)1