Search in sources :

Example 31 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class SparkDynamicPartitionPruner method prunePartitionSingleSource.

private void prunePartitionSingleSource(SourceInfo info, MapWork work) throws HiveException {
    Set<Object> values = info.values;
    // strip the column name of the targetId
    String columnName = info.columnName.substring(info.columnName.indexOf(':') + 1);
    ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(info.columnType));
    ObjectInspectorConverters.Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi);
    StructObjectInspector soi = ObjectInspectorFactory.getStandardStructObjectInspector(Collections.singletonList(columnName), Collections.singletonList(oi));
    @SuppressWarnings("rawtypes") ExprNodeEvaluator eval = ExprNodeEvaluatorFactory.get(info.partKey);
    eval.initialize(soi);
    applyFilterToPartitions(work, converter, eval, columnName, values);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspectorConverters(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 32 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class ExecReducer method configure.

@Override
public void configure(JobConf job) {
    rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector keyObjectInspector;
    if (LOG.isInfoEnabled()) {
        try {
            LOG.info("conf classpath = " + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
            LOG.info("thread classpath = " + Arrays.asList(((URLClassLoader) Thread.currentThread().getContextClassLoader()).getURLs()));
        } catch (Exception e) {
            LOG.info("cannot get classpath: " + e.getMessage());
        }
    }
    jc = job;
    ReduceWork gWork = Utilities.getReduceWork(job);
    reducer = gWork.getReducer();
    // clear out any parents as reducer is the
    reducer.setParentOperators(null);
    // root
    isTagged = gWork.getNeedsTagging();
    try {
        keyTableDesc = gWork.getKeyDesc();
        inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
        keyObjectInspector = inputKeyDeserializer.getObjectInspector();
        valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
        for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
            // We should initialize the SerDe with the TypeInfo when available.
            valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
            inputValueDeserializer[tag] = ReflectionUtils.newInstance(valueTableDesc[tag].getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null, valueTableDesc[tag].getProperties(), null);
            valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector();
            ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
            ois.add(keyObjectInspector);
            ois.add(valueObjectInspector[tag]);
            rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    MapredContext.init(false, new JobConf(jc));
    // initialize reduce operator tree
    try {
        LOG.info(reducer.dump(0));
        reducer.initialize(jc, rowObjectInspector);
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Reduce operator initialization failed", e);
        }
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) JobConf(org.apache.hadoop.mapred.JobConf) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 33 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class Utilities method constructVectorizedReduceRowOI.

/**
 * Create row key and value object inspectors for reduce vectorization.
 * The row object inspector used by ReduceWork needs to be a **standard**
 * struct object inspector, not just any struct object inspector.
 * @param keyInspector
 * @param valueInspector
 * @return OI
 * @throws HiveException
 */
public static StandardStructObjectInspector constructVectorizedReduceRowOI(StructObjectInspector keyInspector, StructObjectInspector valueInspector) throws HiveException {
    ArrayList<String> colNames = new ArrayList<String>();
    ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
    List<? extends StructField> fields = keyInspector.getAllStructFieldRefs();
    for (StructField field : fields) {
        colNames.add(Utilities.ReduceField.KEY.toString() + '.' + field.getFieldName());
        ois.add(field.getFieldObjectInspector());
    }
    fields = valueInspector.getAllStructFieldRefs();
    for (StructField field : fields) {
        colNames.add(Utilities.ReduceField.VALUE.toString() + '.' + field.getFieldName());
        ois.add(field.getFieldObjectInspector());
    }
    StandardStructObjectInspector rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois);
    return rowObjectInspector;
}
Also used : StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)

Example 34 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class GroupByOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    numRowsInput = 0;
    numRowsHashTbl = 0;
    heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT);
    countAfterReport = 0;
    ObjectInspector rowInspector = inputObjInspectors[0];
    // init keyFields
    int numKeys = conf.getKeys().size();
    keyFields = new ExprNodeEvaluator[numKeys];
    keyObjectInspectors = new ObjectInspector[numKeys];
    currentKeyObjectInspectors = new ObjectInspector[numKeys];
    for (int i = 0; i < numKeys; i++) {
        keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i), hconf);
        keyObjectInspectors[i] = keyFields[i].initialize(rowInspector);
        currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i], ObjectInspectorCopyOption.WRITABLE);
    }
    // Initialize the constants for the grouping sets, so that they can be re-used for
    // each row
    groupingSetsPresent = conf.isGroupingSetsPresent();
    if (groupingSetsPresent) {
        groupingSets = conf.getListGroupingSets();
        groupingSetsPosition = conf.getGroupingSetPosition();
        newKeysGroupingSets = new LongWritable[groupingSets.size()];
        groupingSetsBitSet = new FastBitSet[groupingSets.size()];
        int pos = 0;
        for (Long groupingSet : groupingSets) {
            // Create the mapping corresponding to the grouping set
            newKeysGroupingSets[pos] = new LongWritable(groupingSet);
            groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet, groupingSetsPosition);
            pos++;
        }
    }
    // initialize unionExpr for reduce-side
    // reduce KEY has union field as the last field if there are distinct
    // aggregates in group-by.
    List<? extends StructField> sfs = ((StructObjectInspector) rowInspector).getAllStructFieldRefs();
    if (sfs.size() > 0) {
        StructField keyField = sfs.get(0);
        if (keyField.getFieldName().toUpperCase().equals(Utilities.ReduceField.KEY.name())) {
            ObjectInspector keyObjInspector = keyField.getFieldObjectInspector();
            if (keyObjInspector instanceof StructObjectInspector) {
                List<? extends StructField> keysfs = ((StructObjectInspector) keyObjInspector).getAllStructFieldRefs();
                if (keysfs.size() > 0) {
                    // the last field is the union field, if any
                    StructField sf = keysfs.get(keysfs.size() - 1);
                    if (sf.getFieldObjectInspector().getCategory().equals(ObjectInspector.Category.UNION)) {
                        unionExprEval = ExprNodeEvaluatorFactory.get(new ExprNodeColumnDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), keyField.getFieldName() + "." + sf.getFieldName(), null, false), hconf);
                        unionExprEval.initialize(rowInspector);
                    }
                }
            }
        }
    }
    // init aggregationParameterFields
    ArrayList<AggregationDesc> aggrs = conf.getAggregators();
    aggregationParameterFields = new ExprNodeEvaluator[aggrs.size()][];
    aggregationParameterObjectInspectors = new ObjectInspector[aggrs.size()][];
    aggregationParameterStandardObjectInspectors = new ObjectInspector[aggrs.size()][];
    aggregationParameterObjects = new Object[aggrs.size()][];
    aggregationIsDistinct = new boolean[aggrs.size()];
    for (int i = 0; i < aggrs.size(); i++) {
        AggregationDesc aggr = aggrs.get(i);
        ArrayList<ExprNodeDesc> parameters = aggr.getParameters();
        aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()];
        aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters.size()];
        aggregationParameterStandardObjectInspectors[i] = new ObjectInspector[parameters.size()];
        aggregationParameterObjects[i] = new Object[parameters.size()];
        for (int j = 0; j < parameters.size(); j++) {
            aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j), hconf);
            aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j].initialize(rowInspector);
            if (unionExprEval != null) {
                String[] names = parameters.get(j).getExprString().split("\\.");
                // parameters of the form : KEY.colx:t.coly
                if (Utilities.ReduceField.KEY.name().equals(names[0]) && names.length > 2) {
                    String name = names[names.length - 2];
                    int tag = Integer.parseInt(name.split("\\:")[1]);
                    if (aggr.getDistinct()) {
                        // is distinct
                        Set<Integer> set = distinctKeyAggrs.get(tag);
                        if (null == set) {
                            set = new HashSet<Integer>();
                            distinctKeyAggrs.put(tag, set);
                        }
                        if (!set.contains(i)) {
                            set.add(i);
                        }
                    } else {
                        Set<Integer> set = nonDistinctKeyAggrs.get(tag);
                        if (null == set) {
                            set = new HashSet<Integer>();
                            nonDistinctKeyAggrs.put(tag, set);
                        }
                        if (!set.contains(i)) {
                            set.add(i);
                        }
                    }
                } else {
                    // will be KEY._COLx or VALUE._COLx
                    if (!nonDistinctAggrs.contains(i)) {
                        nonDistinctAggrs.add(i);
                    }
                }
            } else {
                if (aggr.getDistinct()) {
                    aggregationIsDistinct[i] = true;
                }
            }
            aggregationParameterStandardObjectInspectors[i][j] = ObjectInspectorUtils.getStandardObjectInspector(aggregationParameterObjectInspectors[i][j], ObjectInspectorCopyOption.WRITABLE);
            aggregationParameterObjects[i][j] = null;
        }
        if (parameters.size() == 0) {
            // for ex: count(*)
            if (!nonDistinctAggrs.contains(i)) {
                nonDistinctAggrs.add(i);
            }
        }
    }
    // init aggregationClasses
    aggregationEvaluators = new GenericUDAFEvaluator[conf.getAggregators().size()];
    for (int i = 0; i < aggregationEvaluators.length; i++) {
        AggregationDesc agg = conf.getAggregators().get(i);
        aggregationEvaluators[i] = agg.getGenericUDAFEvaluator();
    }
    MapredContext context = MapredContext.get();
    if (context != null) {
        for (GenericUDAFEvaluator genericUDAFEvaluator : aggregationEvaluators) {
            context.setup(genericUDAFEvaluator);
        }
    }
    // grouping id should be pruned, which is the last of key columns
    // see ColumnPrunerGroupByProc
    outputKeyLength = conf.pruneGroupingSetId() ? keyFields.length - 1 : keyFields.length;
    // init objectInspectors
    ObjectInspector[] objectInspectors = new ObjectInspector[outputKeyLength + aggregationEvaluators.length];
    for (int i = 0; i < outputKeyLength; i++) {
        objectInspectors[i] = currentKeyObjectInspectors[i];
    }
    for (int i = 0; i < aggregationEvaluators.length; i++) {
        objectInspectors[outputKeyLength + i] = aggregationEvaluators[i].init(conf.getAggregators().get(i).getMode(), aggregationParameterObjectInspectors[i]);
    }
    aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][];
    if ((conf.getMode() != GroupByDesc.Mode.HASH || conf.getBucketGroup()) && (!groupingSetsPresent)) {
        aggregations = newAggregations();
        hashAggr = false;
    } else {
        hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>(256);
        aggregations = newAggregations();
        hashAggr = true;
        keyPositionsSize = new ArrayList<Integer>();
        aggrPositions = new List[aggregations.length];
        groupbyMapAggrInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL);
        // compare every groupbyMapAggrInterval rows
        numRowsCompareHashAggr = groupbyMapAggrInterval;
        minReductionHashAggr = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
    }
    List<String> fieldNames = new ArrayList<String>(conf.getOutputColumnNames());
    outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, Arrays.asList(objectInspectors));
    KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors);
    newKeys = keyWrapperFactory.getKeyWrapper();
    isTez = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
    isLlap = LlapDaemonInfo.INSTANCE.isLlap();
    numExecutors = isLlap ? LlapDaemonInfo.INSTANCE.getNumExecutors() : 1;
    firstRow = true;
    // is not known, estimate that based on the number of entries
    if (hashAggr) {
        computeMaxEntriesHashAggr();
    }
    memoryMXBean = ManagementFactory.getMemoryMXBean();
    maxMemory = isTez ? getConf().getMaxMemoryAvailable() : memoryMXBean.getHeapMemoryUsage().getMax();
    memoryThreshold = this.getConf().getMemoryThreshold();
    LOG.info("isTez: {} isLlap: {} numExecutors: {} maxMemory: {}", isTez, isLlap, numExecutors, maxMemory);
}
Also used : GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LongWritable(org.apache.hadoop.io.LongWritable) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) LazyBinaryObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 35 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class JoinUtil method unflattenObjInspector.

/**
 * Checks the input object inspector to see if it is in for form of a flattened struct
 * like the ones generated by a vectorized reduce sink input:
 *   { 'key.reducesinkkey0':int, 'value._col0':int, 'value._col1':int, .. }
 * If so, then it creates an "unflattened" struct that contains nested key/value
 * structs:
 *   { key: { reducesinkkey0:int }, value: { _col0:int, _col1:int, .. } }
 *
 * @param oi
 * @return unflattened object inspector if unflattening is needed,
 *         otherwise the original object inspector
 */
private static ObjectInspector unflattenObjInspector(ObjectInspector oi) {
    if (oi instanceof StructObjectInspector) {
        // Check if all fields start with "key." or "value."
        // If so, then unflatten by adding an additional level of nested key and value structs
        // Example: { "key.reducesinkkey0":int, "key.reducesinkkey1": int, "value._col6":int }
        // Becomes
        // { "key": { "reducesinkkey0":int, "reducesinkkey1":int }, "value": { "_col6":int } }
        ArrayList<StructField> keyFields = new ArrayList<StructField>();
        ArrayList<StructField> valueFields = new ArrayList<StructField>();
        for (StructField field : ((StructObjectInspector) oi).getAllStructFieldRefs()) {
            String fieldNameLower = field.getFieldName().toLowerCase();
            if (fieldNameLower.startsWith(KEY_FIELD_PREFIX)) {
                keyFields.add(field);
            } else if (fieldNameLower.startsWith(VALUE_FIELD_PREFIX)) {
                valueFields.add(field);
            } else {
                // Not a flattened struct, no need to unflatten
                return oi;
            }
        }
        // All field names are of the form "key." or "value."
        // Create key/value structs and add the respective fields to each one
        ArrayList<ObjectInspector> reduceFieldOIs = new ArrayList<ObjectInspector>();
        reduceFieldOIs.add(createStructFromFields(keyFields, Utilities.ReduceField.KEY.toString()));
        reduceFieldOIs.add(createStructFromFields(valueFields, Utilities.ReduceField.VALUE.toString()));
        // Finally create the outer struct to contain the key, value structs
        return ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, reduceFieldOIs);
    }
    return oi;
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)78 ArrayList (java.util.ArrayList)73 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)56 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)20 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)16 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)16 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)12 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)11 Text (org.apache.hadoop.io.Text)11 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)10 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)10 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)10 Test (org.junit.Test)10 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 StandardStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)9 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)8 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)8 IOException (java.io.IOException)7 StandardListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector)7 WritableHiveCharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector)7