Search in sources :

Example 86 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class Utilities method constructVectorizedReduceRowOI.

/**
   * Create row key and value object inspectors for reduce vectorization.
   * The row object inspector used by ReduceWork needs to be a **standard**
   * struct object inspector, not just any struct object inspector.
   * @param keyInspector
   * @param valueInspector
   * @return OI
   * @throws HiveException
   */
public static StandardStructObjectInspector constructVectorizedReduceRowOI(StructObjectInspector keyInspector, StructObjectInspector valueInspector) throws HiveException {
    ArrayList<String> colNames = new ArrayList<String>();
    ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
    List<? extends StructField> fields = keyInspector.getAllStructFieldRefs();
    for (StructField field : fields) {
        colNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName());
        ois.add(field.getFieldObjectInspector());
    }
    fields = valueInspector.getAllStructFieldRefs();
    for (StructField field : fields) {
        colNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName());
        ois.add(field.getFieldObjectInspector());
    }
    StandardStructObjectInspector rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois);
    return rowObjectInspector;
}
Also used : StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)

Example 87 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class SkewJoinHandler method initiliaze.

public void initiliaze(Configuration hconf) {
    this.hconf = hconf;
    JoinDesc desc = joinOp.getConf();
    skewKeyDefinition = desc.getSkewKeyDefinition();
    skewKeysTableObjectInspector = new HashMap<Byte, StructObjectInspector>(numAliases);
    tblDesc = desc.getSkewKeysValuesTables();
    tblSerializers = new HashMap<Byte, AbstractSerDe>(numAliases);
    bigKeysExistingMap = new HashMap<Byte, Boolean>(numAliases);
    taskId = Utilities.getTaskId(hconf);
    int[][] filterMap = desc.getFilterMap();
    for (int i = 0; i < numAliases; i++) {
        Byte alias = conf.getTagOrder()[i];
        List<ObjectInspector> skewTableKeyInspectors = new ArrayList<ObjectInspector>();
        StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias];
        StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
        List<? extends StructField> keyFields = ((StructObjectInspector) sf.getFieldObjectInspector()).getAllStructFieldRefs();
        int keyFieldSize = keyFields.size();
        for (int k = 0; k < keyFieldSize; k++) {
            skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector());
        }
        TableDesc joinKeyDesc = desc.getKeyTableDesc();
        List<String> keyColNames = Utilities.getColumnNames(joinKeyDesc.getProperties());
        StructObjectInspector structTblKeyInpector = ObjectInspectorFactory.getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors);
        try {
            AbstractSerDe serializer = (AbstractSerDe) ReflectionUtils.newInstance(tblDesc.get(alias).getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(serializer, null, tblDesc.get(alias).getProperties(), null);
            tblSerializers.put((byte) i, serializer);
        } catch (SerDeException e) {
            LOG.error("Skewjoin will be disabled due to " + e.getMessage(), e);
            joinOp.handleSkewJoin = false;
            break;
        }
        boolean hasFilter = filterMap != null && filterMap[i] != null;
        TableDesc valTblDesc = JoinUtil.getSpillTableDesc(alias, joinOp.spillTableDesc, conf, !hasFilter);
        List<String> valColNames = new ArrayList<String>();
        if (valTblDesc != null) {
            valColNames = Utilities.getColumnNames(valTblDesc.getProperties());
        }
        StructObjectInspector structTblValInpector = ObjectInspectorFactory.getStandardStructObjectInspector(valColNames, joinOp.joinValuesStandardObjectInspectors[i]);
        StructObjectInspector structTblInpector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays.asList(structTblValInpector, structTblKeyInpector));
        skewKeysTableObjectInspector.put((byte) i, structTblInpector);
    }
    // reset rowcontainer's serde, objectinspector, and tableDesc.
    for (int i = 0; i < numAliases; i++) {
        Byte alias = conf.getTagOrder()[i];
        RowContainer<ArrayList<Object>> rc = (RowContainer) joinOp.storage[i];
        if (rc != null) {
            rc.setSerDe(tblSerializers.get((byte) i), skewKeysTableObjectInspector.get((byte) i));
            rc.setTableDesc(tblDesc.get(alias));
        }
    }
}
Also used : RowContainer(org.apache.hadoop.hive.ql.exec.persistence.RowContainer) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ArrayList(java.util.ArrayList) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 88 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class MapOperator method setChildren.

public void setChildren(Configuration hconf) throws Exception {
    List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
    Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf);
    Map<TableDesc, StructObjectInspector> convertedOI = getConvertedOI(tableNameToConf);
    for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
        Path onefile = entry.getKey();
        List<String> aliases = entry.getValue();
        PartitionDesc partDesc = conf.getPathToPartitionInfo().get(onefile);
        TableDesc tableDesc = partDesc.getTableDesc();
        Configuration newConf = tableNameToConf.get(tableDesc.getTableName());
        for (String alias : aliases) {
            Operator<? extends OperatorDesc> op = conf.getAliasToWork().get(alias);
            if (isLogDebugEnabled) {
                LOG.debug("Adding alias " + alias + " to work list for file " + onefile);
            }
            Map<Operator<?>, MapOpCtx> contexts = opCtxMap.get(onefile.toString());
            if (contexts == null) {
                opCtxMap.put(onefile.toString(), contexts = new LinkedHashMap<Operator<?>, MapOpCtx>());
            }
            if (contexts.containsKey(op)) {
                continue;
            }
            MapOpCtx context = new MapOpCtx(alias, op, partDesc);
            StructObjectInspector tableRowOI = convertedOI.get(partDesc.getTableDesc());
            contexts.put(op, initObjectInspector(newConf, context, tableRowOI));
            if (children.contains(op) == false) {
                op.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(1));
                op.getParentOperators().add(this);
                children.add(op);
            }
        }
    }
    initOperatorContext(children);
    // we found all the operators that we are supposed to process.
    setChildOperators(children);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 89 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class VectorExpressionWriterFactory method genVectorStructExpressionWritables.

/**
   * Compiles the appropriate vector expression writers based on a struct object
   * inspector.
   */
public static VectorExpressionWriter[] genVectorStructExpressionWritables(StructObjectInspector oi) throws HiveException {
    VectorExpressionWriter[] writers = new VectorExpressionWriter[oi.getAllStructFieldRefs().size()];
    final List<? extends StructField> fields = oi.getAllStructFieldRefs();
    int i = 0;
    for (StructField field : fields) {
        writers[i++] = genVectorExpressionWritable(field.getFieldObjectInspector());
    }
    return writers;
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField)

Example 90 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class VectorExpressionWriterFactory method processVectorInspector.

/**
   * Creates the value writers for an struct object inspector.
   * Creates an appropriate output object inspector.
   */
public static void processVectorInspector(StructObjectInspector structObjInspector, SingleOIDClosure closure) throws HiveException {
    List<? extends StructField> fields = structObjInspector.getAllStructFieldRefs();
    VectorExpressionWriter[] writers = new VectorExpressionWriter[fields.size()];
    List<ObjectInspector> oids = new ArrayList<ObjectInspector>(writers.length);
    ArrayList<String> columnNames = new ArrayList<String>();
    int i = 0;
    for (StructField field : fields) {
        ObjectInspector fieldObjInsp = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()));
        writers[i] = VectorExpressionWriterFactory.genVectorExpressionWritable(fieldObjInsp);
        columnNames.add(field.getFieldName());
        oids.add(writers[i].getObjectInspector());
        i++;
    }
    ObjectInspector objectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, oids);
    closure.assign(writers, objectInspector);
}
Also used : VoidObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector) SettableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector) SettableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector) SettableHiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveIntervalDayTimeObjectInspector) SettableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBinaryObjectInspector) SettableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableFloatObjectInspector) SettableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveDecimalObjectInspector) SettableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SettableHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) SettableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDoubleObjectInspector) SettableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBooleanObjectInspector) SettableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector) SettableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableLongObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) SettableHiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveIntervalYearMonthObjectInspector) SettableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector) SettableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector) SettableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableByteObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)232 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)113 ArrayList (java.util.ArrayList)84 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)69 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)46 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)42 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)42 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)40 Test (org.junit.Test)38 Properties (java.util.Properties)35 Text (org.apache.hadoop.io.Text)32 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)30 Path (org.apache.hadoop.fs.Path)29 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)27 IOException (java.io.IOException)25 Configuration (org.apache.hadoop.conf.Configuration)25 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)24 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)24 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)23 InputSplit (org.apache.hadoop.mapred.InputSplit)23