Search in sources :

Example 1 with ObjectInspectorFactory

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory in project hive by apache.

the class UnionOperator method initializeOp.

/**
 * UnionOperator will transform the input rows if the inputObjInspectors from
 * different parents are different. If one parent has exactly the same
 * ObjectInspector as the output ObjectInspector, then we don't need to do
 * transformation for that parent. This information is recorded in
 * needsTransform[].
 */
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    int parents = parentOperators.size();
    parentObjInspectors = new StructObjectInspector[parents];
    parentFields = new List[parents];
    int columns = 0;
    for (int p = 0; p < parents; p++) {
        parentObjInspectors[p] = (StructObjectInspector) inputObjInspectors[p];
        parentFields[p] = parentObjInspectors[p].getAllStructFieldRefs();
        if (p == 0 || parentFields[p].size() < columns) {
            columns = parentFields[p].size();
        }
    }
    // Get columnNames from the first parent
    ArrayList<String> columnNames = new ArrayList<String>(columns);
    for (int c = 0; c < columns; c++) {
        columnNames.add(parentFields[0].get(c).getFieldName());
    }
    // Get outputFieldOIs
    columnTypeResolvers = new ReturnObjectInspectorResolver[columns];
    for (int c = 0; c < columns; c++) {
        columnTypeResolvers[c] = new ReturnObjectInspectorResolver(true);
    }
    for (int p = 0; p < parents; p++) {
        // When columns is 0, the union operator is empty.
        assert (columns == 0 || parentFields[p].size() == columns);
        for (int c = 0; c < columns; c++) {
            if (!columnTypeResolvers[c].updateForUnionAll(parentFields[p].get(c).getFieldObjectInspector())) {
                // checked in SemanticAnalyzer. Should not happen
                throw new HiveException("Incompatible types for union operator");
            }
        }
    }
    ArrayList<ObjectInspector> outputFieldOIs = new ArrayList<ObjectInspector>(columns);
    for (int c = 0; c < columns; c++) {
        // can be null for void type
        ObjectInspector fieldOI = parentFields[0].get(c).getFieldObjectInspector();
        outputFieldOIs.add(columnTypeResolvers[c].get(fieldOI));
    }
    // create output row ObjectInspector
    outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, outputFieldOIs);
    outputRow = new ArrayList<Object>(columns);
    for (int c = 0; c < columns; c++) {
        outputRow.add(null);
    }
    // whether we need to do transformation for each parent
    needsTransform = new boolean[parents];
    for (int p = 0; p < parents; p++) {
        // Testing using != is good enough, because we use ObjectInspectorFactory
        // to
        // create ObjectInspectors.
        needsTransform[p] = (inputObjInspectors[p] != outputObjInspector);
        if (needsTransform[p]) {
            LOG.info("Union Operator needs to transform row from parent[" + p + "] from " + inputObjInspectors[p] + " to " + outputObjInspector);
        }
    }
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ReturnObjectInspectorResolver(org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver) ArrayList(java.util.ArrayList)

Aggregations

ArrayList (java.util.ArrayList)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 ReturnObjectInspectorResolver (org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1