Search in sources :

Example 41 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class ReduceRecordSource method init.

void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum) throws Exception {
    this.vectorizedVertexNum = vectorizedVertexNum;
    ObjectInspector keyObjectInspector;
    this.reducer = reducer;
    this.vectorized = vectorized;
    this.keyTableDesc = keyTableDesc;
    if (reader instanceof KeyValueReader) {
        this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
    } else {
        this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
    }
    this.handleGroupKey = handleGroupKey;
    this.tag = tag;
    try {
        inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
        keyObjectInspector = inputKeyDeserializer.getObjectInspector();
        if (vectorized) {
            keyStructInspector = (StructObjectInspector) keyObjectInspector;
            firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
        }
        // We should initialize the SerDe with the TypeInfo when available.
        this.valueTableDesc = valueTableDesc;
        inputValueDeserializer = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
        valueObjectInspector = inputValueDeserializer.getObjectInspector();
        ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
        if (vectorized) {
            /* vectorization only works with struct object inspectors */
            valueStructInspectors = (StructObjectInspector) valueObjectInspector;
            final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
            valueStringWriters = new ArrayList<VectorExpressionWriter>(totalColumns);
            valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(keyStructInspector)));
            valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(valueStructInspectors)));
            rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
            batch = batchContext.createVectorizedRowBatch();
            // Setup vectorized deserialization for the key and value.
            BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
            keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
            true, binarySortableSerDe.getSortOrders()));
            keyBinarySortableDeserializeToRow.init(0);
            final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
            if (valuesSize > 0) {
                valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), /* useExternalBuffer */
                true));
                valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
                // Create data buffers for value bytes column vectors.
                for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
                    ColumnVector colVector = batch.cols[i];
                    if (colVector instanceof BytesColumnVector) {
                        BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
                        bytesColumnVector.initBuffer();
                    }
                }
            }
        } else {
            ois.add(keyObjectInspector);
            ois.add(valueObjectInspector);
            rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Reduce operator initialization failed", e);
        }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) ArrayList(java.util.ArrayList) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 42 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class MapOperator method initObjectInspector.

private MapOpCtx initObjectInspector(Configuration hconf, MapOpCtx opCtx, StructObjectInspector tableRowOI) throws Exception {
    PartitionDesc pd = opCtx.partDesc;
    TableDesc td = pd.getTableDesc();
    // Use table properties in case of unpartitioned tables,
    // and the union of table properties and partition properties, with partition
    // taking precedence, in the case of partitioned tables
    Properties overlayedProps = SerDeUtils.createOverlayedProperties(td.getProperties(), pd.getProperties());
    Map<String, String> partSpec = pd.getPartSpec();
    opCtx.tableName = String.valueOf(overlayedProps.getProperty("name"));
    opCtx.partName = String.valueOf(partSpec);
    opCtx.deserializer = pd.getDeserializer(hconf);
    StructObjectInspector partRawRowObjectInspector;
    boolean isAcid = AcidUtils.isTablePropertyTransactional(td.getProperties());
    if (Utilities.isSchemaEvolutionEnabled(hconf, isAcid) && Utilities.isInputFileFormatSelfDescribing(pd)) {
        partRawRowObjectInspector = tableRowOI;
    } else {
        partRawRowObjectInspector = (StructObjectInspector) opCtx.deserializer.getObjectInspector();
    }
    opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partRawRowObjectInspector, tableRowOI);
    // Next check if this table has partitions and if so
    // get the list of partition names as well as allocate
    // the serdes for the partition columns
    String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
    if (pcols != null && pcols.length() > 0) {
        String[] partKeys = pcols.trim().split("/");
        String pcolTypes = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
        String[] partKeyTypes = pcolTypes.trim().split(":");
        if (partKeys.length > partKeyTypes.length) {
            throw new HiveException("Internal error : partKeys length, " + partKeys.length + " greater than partKeyTypes length, " + partKeyTypes.length);
        }
        List<String> partNames = new ArrayList<String>(partKeys.length);
        Object[] partValues = new Object[partKeys.length];
        List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(partKeys.length);
        for (int i = 0; i < partKeys.length; i++) {
            String key = partKeys[i];
            partNames.add(key);
            ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]));
            // Partitions do not exist for this table
            if (partSpec == null) {
                // for partitionless table, initialize partValue to null
                partValues[i] = null;
            } else {
                partValues[i] = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi).convert(partSpec.get(key));
            }
            partObjectInspectors.add(oi);
        }
        opCtx.rowWithPart = new Object[] { null, partValues };
        opCtx.partObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors);
    }
    // In that case, it will be a Select, but the rowOI need not be amended
    if (opCtx.op instanceof TableScanOperator) {
        TableScanOperator tsOp = (TableScanOperator) opCtx.op;
        TableScanDesc tsDesc = tsOp.getConf();
        if (tsDesc != null && tsDesc.hasVirtualCols()) {
            opCtx.vcs = tsDesc.getVirtualCols();
            opCtx.vcValues = new Object[opCtx.vcs.size()];
            opCtx.vcsObjectInspector = VirtualColumn.getVCSObjectInspector(opCtx.vcs);
            if (opCtx.isPartitioned()) {
                opCtx.rowWithPartAndVC = Arrays.copyOfRange(opCtx.rowWithPart, 0, 3);
            } else {
                opCtx.rowWithPartAndVC = new Object[2];
            }
        }
    }
    if (!opCtx.hasVC() && !opCtx.isPartitioned()) {
        opCtx.rowObjectInspector = tableRowOI;
        return opCtx;
    }
    List<StructObjectInspector> inspectors = new ArrayList<StructObjectInspector>();
    inspectors.add(tableRowOI);
    if (opCtx.isPartitioned()) {
        inspectors.add(opCtx.partObjectInspector);
    }
    if (opCtx.hasVC()) {
        inspectors.add(opCtx.vcsObjectInspector);
    }
    opCtx.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(inspectors);
    return opCtx;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Properties(java.util.Properties) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 43 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class PartExprEvalUtils method prepareExpr.

public static synchronized ObjectPair<PrimitiveObjectInspector, ExprNodeEvaluator> prepareExpr(ExprNodeGenericFuncDesc expr, List<String> partColumnNames, List<PrimitiveTypeInfo> partColumnTypeInfos) throws HiveException {
    // Create the row object
    List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>();
    for (int i = 0; i < partColumnNames.size(); i++) {
        partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(partColumnTypeInfos.get(i)));
    }
    StructObjectInspector objectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(partColumnNames, partObjectInspectors);
    ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(expr);
    ObjectInspector evaluateResultOI = evaluator.initialize(objectInspector);
    return ObjectPair.create((PrimitiveObjectInspector) evaluateResultOI, evaluator);
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 44 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class GenericUDTFJSONTuple method initialize.

@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
    inputOIs = args;
    numCols = args.length - 1;
    jsonObjectCache = new HashCache<>();
    if (numCols < 1) {
        throw new UDFArgumentException("json_tuple() takes at least two arguments: " + "the json string and a path expression");
    }
    for (int i = 0; i < args.length; ++i) {
        if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE || !args[i].getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
            throw new UDFArgumentException("json_tuple()'s arguments have to be string type");
        }
    }
    seenErrors = false;
    pathParsed = false;
    paths = new String[numCols];
    cols = new Text[numCols];
    retCols = new Text[numCols];
    nullCols = new Object[numCols];
    for (int i = 0; i < numCols; ++i) {
        cols[i] = new Text();
        retCols[i] = cols[i];
        nullCols[i] = null;
    }
    // construct output object inspector
    ArrayList<String> fieldNames = new ArrayList<String>(numCols);
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numCols);
    for (int i = 0; i < numCols; ++i) {
        // column name can be anything since it will be named by UDTF as clause
        fieldNames.add("c" + i);
        // all returned type will be Text
        fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text)

Example 45 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class GenericUDTFParseUrlTuple method initialize.

@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
    inputOIs = args;
    numCols = args.length - 1;
    if (numCols < 1) {
        throw new UDFArgumentException("parse_url_tuple() takes at least two arguments: " + "the url string and a part name");
    }
    for (int i = 0; i < args.length; ++i) {
        if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE || !args[i].getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
            throw new UDFArgumentException("parse_url_tuple()'s arguments have to be string type");
        }
    }
    seenErrors = false;
    pathParsed = false;
    url = null;
    p = null;
    lastKey = null;
    paths = new String[numCols];
    partnames = new PARTNAME[numCols];
    cols = new Text[numCols];
    retCols = new Text[numCols];
    nullCols = new Object[numCols];
    for (int i = 0; i < numCols; ++i) {
        cols[i] = new Text();
        retCols[i] = cols[i];
        nullCols[i] = null;
    }
    // construct output object inspector
    ArrayList<String> fieldNames = new ArrayList<String>(numCols);
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numCols);
    for (int i = 0; i < numCols; ++i) {
        // column name can be anything since it will be named by UDTF as clause
        fieldNames.add("c" + i);
        // all returned type will be Text
        fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)78 ArrayList (java.util.ArrayList)73 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)56 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)20 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)16 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)16 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)12 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)11 Text (org.apache.hadoop.io.Text)11 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)10 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)10 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)10 Test (org.junit.Test)10 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 StandardStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)9 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)8 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)8 IOException (java.io.IOException)7 StandardListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector)7 WritableHiveCharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector)7