Search in sources :

Example 26 with TypeInfoUtils.getTypeInfoFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.

the class SpecialCases method addSpecialCasesParametersToOutputJobProperties.

/**
   * Method to do any file-format specific special casing while
   * instantiating a storage handler to write. We set any parameters
   * we want to be visible to the job in jobProperties, and this will
   * be available to the job via jobconf at run time.
   *
   * This is mostly intended to be used by StorageHandlers that wrap
   * File-based OutputFormats such as FosterStorageHandler that wraps
   * RCFile, ORC, etc.
   *
   * @param jobProperties : map to write to
   * @param jobInfo : information about this output job to read from
   * @param ofclass : the output format in use
   */
public static void addSpecialCasesParametersToOutputJobProperties(Map<String, String> jobProperties, OutputJobInfo jobInfo, Class<? extends OutputFormat> ofclass) {
    if (ofclass == RCFileOutputFormat.class) {
        // RCFile specific parameter
        jobProperties.put(HiveConf.ConfVars.HIVE_RCFILE_COLUMN_NUMBER_CONF.varname, Integer.toOctalString(jobInfo.getOutputSchema().getFields().size()));
    } else if (ofclass == OrcOutputFormat.class) {
        // Special cases for ORC
        // We need to check table properties to see if a couple of parameters,
        // such as compression parameters are defined. If they are, then we copy
        // them to job properties, so that it will be available in jobconf at runtime
        // See HIVE-5504 for details
        Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters();
        for (OrcConf property : OrcConf.values()) {
            String propName = property.getAttribute();
            if (tableProps.containsKey(propName)) {
                jobProperties.put(propName, tableProps.get(propName));
            }
        }
    } else if (ofclass == AvroContainerOutputFormat.class) {
        // Special cases for Avro. As with ORC, we make table properties that
        // Avro is interested in available in jobconf at runtime
        Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters();
        for (AvroSerdeUtils.AvroTableProperties property : AvroSerdeUtils.AvroTableProperties.values()) {
            String propName = property.getPropName();
            if (tableProps.containsKey(propName)) {
                String propVal = tableProps.get(propName);
                jobProperties.put(propName, tableProps.get(propName));
            }
        }
        Properties properties = new Properties();
        properties.put("name", jobInfo.getTableName());
        List<String> colNames = jobInfo.getOutputSchema().getFieldNames();
        List<TypeInfo> colTypes = new ArrayList<TypeInfo>();
        for (HCatFieldSchema field : jobInfo.getOutputSchema().getFields()) {
            colTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getTypeString()));
        }
        if (jobProperties.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()) == null || jobProperties.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()).isEmpty()) {
            jobProperties.put(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), AvroSerDe.getSchemaFromCols(properties, colNames, colTypes, null).toString());
        }
    }
}
Also used : OrcConf(org.apache.orc.OrcConf) ArrayList(java.util.ArrayList) Properties(java.util.Properties) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Map(java.util.Map) AvroSerdeUtils(org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils)

Example 27 with TypeInfoUtils.getTypeInfoFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.

the class VectorizationContext method getVectorExpressionForUdf.

private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, Class<?> udfClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    int numChildren = (childExpr == null) ? 0 : childExpr.size();
    if (numChildren > 2 && genericeUdf != null && mode == VectorExpressionDescriptor.Mode.FILTER && ((genericeUdf instanceof GenericUDFOPOr) || (genericeUdf instanceof GenericUDFOPAnd))) {
        for (int i = 0; i < numChildren; i++) {
            ExprNodeDesc child = childExpr.get(i);
            String childTypeString = child.getTypeString();
            if (childTypeString == null) {
                throw new HiveException("Null child type name string");
            }
            TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(childTypeString);
            Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
            if (columnVectorType != ColumnVector.Type.LONG) {
                return null;
            }
            if (!(child instanceof ExprNodeGenericFuncDesc) && !(child instanceof ExprNodeColumnDesc)) {
                return null;
            }
        }
        Class<?> vclass;
        if (genericeUdf instanceof GenericUDFOPOr) {
            vclass = FilterExprOrExpr.class;
        } else if (genericeUdf instanceof GenericUDFOPAnd) {
            vclass = FilterExprAndExpr.class;
        } else {
            throw new RuntimeException("Unexpected multi-child UDF");
        }
        VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass);
        return createVectorExpression(vclass, childExpr, childrenMode, returnType);
    }
    if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) {
        return null;
    }
    VectorExpressionDescriptor.Builder builder = new VectorExpressionDescriptor.Builder();
    builder.setNumArguments(numChildren);
    builder.setMode(mode);
    for (int i = 0; i < numChildren; i++) {
        ExprNodeDesc child = childExpr.get(i);
        String childTypeString = child.getTypeString();
        if (childTypeString == null) {
            throw new HiveException("Null child type name string");
        }
        String undecoratedTypeName = getUndecoratedName(childTypeString);
        if (undecoratedTypeName == null) {
            throw new HiveException("No match for type string " + childTypeString + " from undecorated type name method");
        }
        builder.setArgumentType(i, undecoratedTypeName);
        if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeColumnDesc)) {
            builder.setInputExpressionType(i, InputExpressionType.COLUMN);
        } else if (child instanceof ExprNodeConstantDesc) {
            builder.setInputExpressionType(i, InputExpressionType.SCALAR);
        } else if (child instanceof ExprNodeDynamicValueDesc) {
            builder.setInputExpressionType(i, InputExpressionType.DYNAMICVALUE);
        } else {
            throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
        }
    }
    VectorExpressionDescriptor.Descriptor descriptor = builder.build();
    Class<?> vclass = this.vMap.getVectorExpressionClass(udfClass, descriptor);
    if (vclass == null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("No vector udf found for " + udfClass.getSimpleName() + ", descriptor: " + descriptor);
        }
        return null;
    }
    VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass);
    return createVectorExpression(vclass, childExpr, childrenMode, returnType);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) VectorUDAFMaxString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString) VectorUDAFMinString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) InputExpressionType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType) ArgumentType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 28 with TypeInfoUtils.getTypeInfoFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.

the class VectorAssignRow method init.

/*
   * Initialize using an StructObjectInspector and a column projection list.
   */
public void init(StructObjectInspector structObjectInspector, List<Integer> projectedColumns) throws HiveException {
    List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
    final int count = fields.size();
    allocateArrays(count);
    for (int i = 0; i < count; i++) {
        int projectionColumnNum = projectedColumns.get(i);
        StructField field = fields.get(i);
        ObjectInspector fieldInspector = field.getFieldObjectInspector();
        TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldInspector.getTypeName());
        initTargetEntry(i, projectionColumnNum, typeInfo);
    }
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)

Example 29 with TypeInfoUtils.getTypeInfoFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.

the class VectorSerializeRow method init.

public void init(List<String> typeNames) throws HiveException {
    final int size = typeNames.size();
    categories = new Category[size];
    primitiveCategories = new PrimitiveCategory[size];
    outputColumnNums = new int[size];
    TypeInfo typeInfo;
    for (int i = 0; i < size; i++) {
        typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i));
        categories[i] = typeInfo.getCategory();
        if (categories[i] == Category.PRIMITIVE) {
            primitiveCategories[i] = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
        }
        outputColumnNums[i] = i;
    }
}
Also used : TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 30 with TypeInfoUtils.getTypeInfoFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.

the class VectorizationContext method instantiateExpression.

private VectorExpression instantiateExpression(Class<?> vclass, TypeInfo returnType, Object... args) throws HiveException {
    VectorExpression ve = null;
    Constructor<?> ctor = getConstructor(vclass);
    int numParams = ctor.getParameterTypes().length;
    int argsLength = (args == null) ? 0 : args.length;
    if (numParams == 0) {
        try {
            ve = (VectorExpression) ctor.newInstance();
        } catch (Exception ex) {
            throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + getStackTraceAsSingleLine(ex));
        }
    } else if (numParams == argsLength) {
        try {
            ve = (VectorExpression) ctor.newInstance(args);
        } catch (Exception ex) {
            throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + getStackTraceAsSingleLine(ex));
        }
    } else if (numParams == argsLength + 1) {
        // Additional argument is needed, which is the outputcolumn.
        Object[] newArgs = null;
        try {
            String returnTypeName;
            if (returnType == null) {
                returnTypeName = ((VectorExpression) vclass.newInstance()).getOutputType().toLowerCase();
                if (returnTypeName.equals("long")) {
                    returnTypeName = "bigint";
                }
                returnType = TypeInfoUtils.getTypeInfoFromTypeString(returnTypeName);
            } else {
                returnTypeName = returnType.getTypeName();
            }
            // Special handling for decimal because decimal types need scale and precision parameter.
            // This special handling should be avoided by using returnType uniformly for all cases.
            int outputCol = ocm.allocateOutputColumn(returnType);
            newArgs = Arrays.copyOf(args, numParams);
            newArgs[numParams - 1] = outputCol;
            ve = (VectorExpression) ctor.newInstance(newArgs);
            ve.setOutputType(returnTypeName);
        } catch (Exception ex) {
            throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + getStackTraceAsSingleLine(ex));
        }
    }
    // Add maxLength parameter to UDFs that have CHAR or VARCHAR output.
    if (ve instanceof TruncStringOutput) {
        TruncStringOutput truncStringOutput = (TruncStringOutput) ve;
        if (returnType instanceof BaseCharTypeInfo) {
            BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) returnType;
            truncStringOutput.setMaxLength(baseCharTypeInfo.getLength());
        }
    }
    return ve;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) VectorUDAFMaxString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString) VectorUDAFMinString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)41 ArrayList (java.util.ArrayList)22 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)22 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)20 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)14 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)9 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)9 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)9 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)7 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)7 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)7 HashMap (java.util.HashMap)6 Properties (java.util.Properties)5 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)5 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)5 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)5 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)5 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)4 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)4