Search in sources :

Example 91 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class JoinOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    try {
        reportProgress();
        lastAlias = alias;
        alias = (byte) tag;
        if (!alias.equals(lastAlias)) {
            nextSz = joinEmitInterval;
        }
        List<Object> nr = getFilteredValue(alias, row);
        if (handleSkewJoin) {
            skewJoinKeyContext.handleSkew(tag);
        }
        // number of rows for the key in the given table
        long sz = storage[alias].rowCount();
        StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag];
        StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
        List keyObject = (List) soi.getStructFieldData(row, sf);
        // Are we consuming too much memory
        if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0) && !hasLeftSemiJoin) {
            if (sz == joinEmitInterval && !hasFilter(condn[alias - 1].getLeft()) && !hasFilter(condn[alias - 1].getRight())) {
                // The input is sorted by alias, so if we are already in the last join
                // operand,
                // we can emit some results now.
                // Note this has to be done before adding the current row to the
                // storage,
                // to preserve the correctness for outer joins.
                checkAndGenObject();
                storage[alias].clearRows();
            }
        } else {
            if (LOG.isInfoEnabled() && (sz == nextSz)) {
                // Print a message if we reached at least 1000 rows for a join operand
                // We won't print a message for the last join operand since the size
                // will never goes to joinEmitInterval.
                LOG.info("table " + alias + " has " + sz + " rows for join key " + keyObject);
                nextSz = getNextSize(nextSz);
            }
        }
        // Add the value to the vector
        // if join-key is null, process each row in different group.
        StructObjectInspector inspector = (StructObjectInspector) sf.getFieldObjectInspector();
        if (SerDeUtils.hasAnyNullObject(keyObject, inspector, nullsafes)) {
            endGroup();
            startGroup();
        }
        storage[alias].addRow(nr);
    } catch (Exception e) {
        e.printStackTrace();
        throw new HiveException(e);
    }
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) List(java.util.List) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 92 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class JoinUtil method unflattenObjInspector.

/**
 * Checks the input object inspector to see if it is in for form of a flattened struct
 * like the ones generated by a vectorized reduce sink input:
 *   { 'key.reducesinkkey0':int, 'value._col0':int, 'value._col1':int, .. }
 * If so, then it creates an "unflattened" struct that contains nested key/value
 * structs:
 *   { key: { reducesinkkey0:int }, value: { _col0:int, _col1:int, .. } }
 *
 * @param oi
 * @return unflattened object inspector if unflattening is needed,
 *         otherwise the original object inspector
 */
private static ObjectInspector unflattenObjInspector(ObjectInspector oi) {
    if (oi instanceof StructObjectInspector) {
        // Check if all fields start with "key." or "value."
        // If so, then unflatten by adding an additional level of nested key and value structs
        // Example: { "key.reducesinkkey0":int, "key.reducesinkkey1": int, "value._col6":int }
        // Becomes
        // { "key": { "reducesinkkey0":int, "reducesinkkey1":int }, "value": { "_col6":int } }
        ArrayList<StructField> keyFields = new ArrayList<StructField>();
        ArrayList<StructField> valueFields = new ArrayList<StructField>();
        for (StructField field : ((StructObjectInspector) oi).getAllStructFieldRefs()) {
            String fieldNameLower = field.getFieldName().toLowerCase();
            if (fieldNameLower.startsWith(KEY_FIELD_PREFIX)) {
                keyFields.add(field);
            } else if (fieldNameLower.startsWith(VALUE_FIELD_PREFIX)) {
                valueFields.add(field);
            } else {
                // Not a flattened struct, no need to unflatten
                return oi;
            }
        }
        // All field names are of the form "key." or "value."
        // Create key/value structs and add the respective fields to each one
        ArrayList<ObjectInspector> reduceFieldOIs = new ArrayList<ObjectInspector>();
        reduceFieldOIs.add(createStructFromFields(keyFields, Utilities.ReduceField.KEY.toString()));
        reduceFieldOIs.add(createStructFromFields(valueFields, Utilities.ReduceField.VALUE.toString()));
        // Finally create the outer struct to contain the key, value structs
        return ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, reduceFieldOIs);
    }
    return oi;
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 93 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class VectorDeserializeOrcWriter method create.

// TODO: if more writers are added, separate out an EncodingWriterFactory
public static EncodingWriter create(InputFormat<?, ?> sourceIf, Deserializer serDe, Map<Path, PartitionDesc> parts, Configuration daemonConf, Configuration jobConf, Path splitPath, StructObjectInspector sourceOi, List<Integer> sourceIncludes, boolean[] cacheIncludes, int allocSize) throws IOException {
    // Vector SerDe can be disabled both on client and server side.
    if (!HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !HiveConf.getBoolVar(jobConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !(sourceIf instanceof TextInputFormat) || !(serDe instanceof LazySimpleSerDe)) {
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    Path path = splitPath.getFileSystem(daemonConf).makeQualified(splitPath);
    PartitionDesc partDesc = HiveFileFormatUtils.getFromPathRecursively(parts, path, null);
    if (partDesc == null) {
        LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: no partition desc for " + path);
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    Properties tblProps = partDesc.getTableDesc().getProperties();
    if ("true".equalsIgnoreCase(tblProps.getProperty(serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST))) {
        LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter due to " + serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST);
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    for (StructField sf : sourceOi.getAllStructFieldRefs()) {
        Category c = sf.getFieldObjectInspector().getCategory();
        if (c != Category.PRIMITIVE) {
            LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: " + c + " is not supported");
            return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
        }
    }
    LlapIoImpl.LOG.info("Creating VertorDeserializeOrcWriter for " + path);
    return new VectorDeserializeOrcWriter(daemonConf, tblProps, sourceOi, sourceIncludes, cacheIncludes, allocSize);
}
Also used : DeserializerOrcWriter(org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.DeserializerOrcWriter) Path(org.apache.hadoop.fs.Path) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) Properties(java.util.Properties)

Example 94 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class HiveMetaStoreUtils method getFieldsFromDeserializer.

/**
 * @param tableName name of the table
 * @param deserializer deserializer to use
 * @return the list of fields
 * @throws SerDeException if the serde throws an exception
 * @throws MetaException if one of the fields or types in the table is invalid
 */
public static List<FieldSchema> getFieldsFromDeserializer(String tableName, Deserializer deserializer) throws SerDeException, MetaException {
    ObjectInspector oi = deserializer.getObjectInspector();
    String[] names = tableName.split("\\.");
    String last_name = names[names.length - 1];
    for (int i = 1; i < names.length; i++) {
        if (oi instanceof StructObjectInspector) {
            StructObjectInspector soi = (StructObjectInspector) oi;
            StructField sf = soi.getStructFieldRef(names[i]);
            if (sf == null) {
                throw new MetaException("Invalid Field " + names[i]);
            } else {
                oi = sf.getFieldObjectInspector();
            }
        } else if (oi instanceof ListObjectInspector && names[i].equalsIgnoreCase("$elem$")) {
            ListObjectInspector loi = (ListObjectInspector) oi;
            oi = loi.getListElementObjectInspector();
        } else if (oi instanceof MapObjectInspector && names[i].equalsIgnoreCase("$key$")) {
            MapObjectInspector moi = (MapObjectInspector) oi;
            oi = moi.getMapKeyObjectInspector();
        } else if (oi instanceof MapObjectInspector && names[i].equalsIgnoreCase("$value$")) {
            MapObjectInspector moi = (MapObjectInspector) oi;
            oi = moi.getMapValueObjectInspector();
        } else {
            throw new MetaException("Unknown type for " + names[i]);
        }
    }
    ArrayList<FieldSchema> str_fields = new ArrayList<>();
    // rules on how to recurse the ObjectInspector based on its type
    if (oi.getCategory() != Category.STRUCT) {
        str_fields.add(new FieldSchema(last_name, oi.getTypeName(), FROM_SERIALIZER));
    } else {
        List<? extends StructField> fields = ((StructObjectInspector) oi).getAllStructFieldRefs();
        for (int i = 0; i < fields.size(); i++) {
            StructField structField = fields.get(i);
            String fieldName = structField.getFieldName();
            String fieldTypeName = structField.getFieldObjectInspector().getTypeName();
            String fieldComment = determineFieldComment(structField.getFieldComment());
            str_fields.add(new FieldSchema(fieldName, fieldTypeName, fieldComment));
        }
    }
    return str_fields;
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Example 95 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class LlapRowRecordReader method convertValue.

static Object convertValue(Object val, ObjectInspector oi) {
    if (val == null) {
        return null;
    }
    Object convertedVal = null;
    ObjectInspector.Category oiCategory = oi.getCategory();
    switch(oiCategory) {
        case PRIMITIVE:
            convertedVal = convertPrimitive(val, (PrimitiveObjectInspector) oi);
            break;
        case LIST:
            ListObjectInspector loi = (ListObjectInspector) oi;
            int listSize = loi.getListLength(val);
            // Per ListObjectInpsector.getListLength(), -1 length means null list.
            if (listSize < 0) {
                return null;
            }
            List<Object> convertedList = new ArrayList<Object>(listSize);
            ObjectInspector listElementOI = loi.getListElementObjectInspector();
            for (int idx = 0; idx < listSize; ++idx) {
                convertedList.add(convertValue(loi.getListElement(val, idx), listElementOI));
            }
            convertedVal = convertedList;
            break;
        case MAP:
            MapObjectInspector moi = (MapObjectInspector) oi;
            int mapSize = moi.getMapSize(val);
            // Per MapObjectInpsector.getMapSize(), -1 length means null map.
            if (mapSize < 0) {
                return null;
            }
            Map<Object, Object> convertedMap = new LinkedHashMap<Object, Object>(mapSize);
            ObjectInspector mapKeyOI = moi.getMapKeyObjectInspector();
            ObjectInspector mapValOI = moi.getMapValueObjectInspector();
            Map<?, ?> mapCol = moi.getMap(val);
            for (Object mapKey : mapCol.keySet()) {
                Object convertedMapKey = convertValue(mapKey, mapKeyOI);
                Object convertedMapVal = convertValue(mapCol.get(mapKey), mapValOI);
                convertedMap.put(convertedMapKey, convertedMapVal);
            }
            convertedVal = convertedMap;
            break;
        case STRUCT:
            StructObjectInspector soi = (StructObjectInspector) oi;
            List<Object> convertedRow = new ArrayList<Object>();
            for (StructField structField : soi.getAllStructFieldRefs()) {
                Object convertedFieldValue = convertValue(soi.getStructFieldData(val, structField), structField.getFieldObjectInspector());
                convertedRow.add(convertedFieldValue);
            }
            convertedVal = convertedRow;
            break;
        default:
            throw new IllegalArgumentException("Cannot convert type " + oiCategory);
    }
    return convertedVal;
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)147 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)107 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)93 ArrayList (java.util.ArrayList)75 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)56 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)46 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)42 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)33 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)29 StandardStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)28 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)26 List (java.util.List)25 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)24 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)24 Test (org.junit.Test)24 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)22 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)22 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)22 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)20 UnionObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector)19