Search in sources :

Example 6 with BaseCharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project hive by apache.

the class JsonSerDe method extractCurrentField.

/**
 * Utility method to extract current expected field from given JsonParser
 *
 * isTokenCurrent is a boolean variable also passed in, which determines
 * if the JsonParser is already at the token we expect to read next, or
 * needs advancing to the next before we read.
 */
private Object extractCurrentField(JsonParser p, HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException {
    Object val = null;
    JsonToken valueToken;
    if (isTokenCurrent) {
        valueToken = p.getCurrentToken();
    } else {
        valueToken = p.nextToken();
    }
    switch(hcatFieldSchema.getType()) {
        case INT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue();
            break;
        case TINYINT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue();
            break;
        case SMALLINT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue();
            break;
        case BIGINT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue();
            break;
        case BOOLEAN:
            String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
            if (bval != null) {
                val = Boolean.valueOf(bval);
            } else {
                val = null;
            }
            break;
        case FLOAT:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue();
            break;
        case DOUBLE:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue();
            break;
        case STRING:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
            break;
        case BINARY:
            String b = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
            if (b != null) {
                try {
                    String t = Text.decode(b.getBytes(), 0, b.getBytes().length);
                    return t.getBytes();
                } catch (CharacterCodingException e) {
                    LOG.warn("Error generating json binary type from object.", e);
                    return null;
                }
            } else {
                val = null;
            }
            break;
        case DATE:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : Date.valueOf(p.getText());
            break;
        case TIMESTAMP:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : tsParser.parseTimestamp(p.getText());
            break;
        case DECIMAL:
            val = (valueToken == JsonToken.VALUE_NULL) ? null : HiveDecimal.create(p.getText());
            break;
        case VARCHAR:
            int vLen = ((BaseCharTypeInfo) hcatFieldSchema.getTypeInfo()).getLength();
            val = (valueToken == JsonToken.VALUE_NULL) ? null : new HiveVarchar(p.getText(), vLen);
            break;
        case CHAR:
            int cLen = ((BaseCharTypeInfo) hcatFieldSchema.getTypeInfo()).getLength();
            val = (valueToken == JsonToken.VALUE_NULL) ? null : new HiveChar(p.getText(), cLen);
            break;
        case ARRAY:
            if (valueToken == JsonToken.VALUE_NULL) {
                val = null;
                break;
            }
            if (valueToken != JsonToken.START_ARRAY) {
                throw new IOException("Start of Array expected");
            }
            List<Object> arr = new ArrayList<Object>();
            while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) {
                arr.add(extractCurrentField(p, hcatFieldSchema.getArrayElementSchema().get(0), true));
            }
            val = arr;
            break;
        case MAP:
            if (valueToken == JsonToken.VALUE_NULL) {
                val = null;
                break;
            }
            if (valueToken != JsonToken.START_OBJECT) {
                throw new IOException("Start of Object expected");
            }
            Map<Object, Object> map = new LinkedHashMap<Object, Object>();
            HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0);
            while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
                Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), hcatFieldSchema.getMapKeyTypeInfo());
                Object v = extractCurrentField(p, valueSchema, false);
                map.put(k, v);
            }
            val = map;
            break;
        case STRUCT:
            if (valueToken == JsonToken.VALUE_NULL) {
                val = null;
                break;
            }
            if (valueToken != JsonToken.START_OBJECT) {
                throw new IOException("Start of Object expected");
            }
            HCatSchema subSchema = hcatFieldSchema.getStructSubSchema();
            int sz = subSchema.getFieldNames().size();
            List<Object> struct = new ArrayList<Object>(Collections.nCopies(sz, null));
            while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
                populateRecord(struct, valueToken, p, subSchema);
            }
            val = struct;
            break;
        default:
            LOG.error("Unknown type found: " + hcatFieldSchema.getType());
            return null;
    }
    return val;
}
Also used : BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) ArrayList(java.util.ArrayList) CharacterCodingException(java.nio.charset.CharacterCodingException) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) IOException(java.io.IOException) LinkedHashMap(java.util.LinkedHashMap) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) JsonToken(org.codehaus.jackson.JsonToken)

Example 7 with BaseCharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo in project hive by apache.

the class GenericUDFConcat method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    // Loop through all the inputs to determine the appropriate return type/length.
    // Return type:
    // All CHAR inputs: return CHAR
    // All VARCHAR inputs: return VARCHAR
    // All CHAR/VARCHAR inputs: return VARCHAR
    // All BINARY inputs: return BINARY
    // Otherwise return STRING
    argumentOIs = arguments;
    PrimitiveCategory currentCategory;
    PrimitiveObjectInspector poi;
    boolean fixedLengthReturnValue = true;
    // Only for char/varchar return types
    int returnLength = 0;
    for (int idx = 0; idx < arguments.length; ++idx) {
        if (arguments[idx].getCategory() != Category.PRIMITIVE) {
            throw new UDFArgumentException("CONCAT only takes primitive arguments");
        }
        poi = (PrimitiveObjectInspector) arguments[idx];
        currentCategory = poi.getPrimitiveCategory();
        if (idx == 0) {
            returnType = currentCategory;
        }
        switch(currentCategory) {
            case BINARY:
                fixedLengthReturnValue = false;
                if (returnType != currentCategory) {
                    // mix of binary/non-binary args
                    returnType = PrimitiveCategory.STRING;
                }
                break;
            case CHAR:
            case VARCHAR:
                if (!fixedLengthReturnValue) {
                    returnType = PrimitiveCategory.STRING;
                }
                if (fixedLengthReturnValue && currentCategory == PrimitiveCategory.VARCHAR) {
                    returnType = PrimitiveCategory.VARCHAR;
                }
                break;
            default:
                returnType = PrimitiveCategory.STRING;
                fixedLengthReturnValue = false;
                break;
        }
        // max length for the char/varchar, then the return type reverts to string.
        if (fixedLengthReturnValue) {
            returnLength += GenericUDFUtils.StringHelper.getFixedStringSizeForType(poi);
            if ((returnType == PrimitiveCategory.VARCHAR && returnLength > HiveVarchar.MAX_VARCHAR_LENGTH) || (returnType == PrimitiveCategory.CHAR && returnLength > HiveChar.MAX_CHAR_LENGTH)) {
                returnType = PrimitiveCategory.STRING;
                fixedLengthReturnValue = false;
            }
        }
    }
    if (returnType == PrimitiveCategory.BINARY) {
        bw = new BytesWritable[arguments.length];
        return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
    } else {
        // treat all inputs as string, the return value will be converted to the appropriate type.
        createStringConverters();
        returnHelper = new GenericUDFUtils.StringHelper(returnType);
        BaseCharTypeInfo typeInfo;
        switch(returnType) {
            case STRING:
                return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
            case CHAR:
                typeInfo = TypeInfoFactory.getCharTypeInfo(returnLength);
                return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
            case VARCHAR:
                typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength);
                return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
            default:
                throw new UDFArgumentException("Unexpected CONCAT return type of " + returnType);
        }
    }
}
Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Aggregations

BaseCharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)6 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)3 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)3 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)3 UDFArgumentLengthException (org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 PrimitiveObjectInspectorConverter (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter)2 StringConverter (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter)2 IOException (java.io.IOException)1 CharacterCodingException (java.nio.charset.CharacterCodingException)1 ArrayList (java.util.ArrayList)1 LinkedHashMap (java.util.LinkedHashMap)1 RelDataType (org.apache.calcite.rel.type.RelDataType)1 SqlIntervalQualifier (org.apache.calcite.sql.SqlIntervalQualifier)1 SqlParserPos (org.apache.calcite.sql.parser.SqlParserPos)1 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)1 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)1 VectorUDAFMaxString (org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString)1