use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class SpecialCases method addSpecialCasesParametersToOutputJobProperties.
/**
* Method to do any file-format specific special casing while
* instantiating a storage handler to write. We set any parameters
* we want to be visible to the job in jobProperties, and this will
* be available to the job via jobconf at run time.
*
* This is mostly intended to be used by StorageHandlers that wrap
* File-based OutputFormats such as FosterStorageHandler that wraps
* RCFile, ORC, etc.
*
* @param jobProperties : map to write to
* @param jobInfo : information about this output job to read from
* @param ofclass : the output format in use
*/
public static void addSpecialCasesParametersToOutputJobProperties(Map<String, String> jobProperties, OutputJobInfo jobInfo, Class<? extends OutputFormat> ofclass) {
if (ofclass == RCFileOutputFormat.class) {
// RCFile specific parameter
jobProperties.put(HiveConf.ConfVars.HIVE_RCFILE_COLUMN_NUMBER_CONF.varname, Integer.toOctalString(jobInfo.getOutputSchema().getFields().size()));
} else if (ofclass == OrcOutputFormat.class) {
// Special cases for ORC
// We need to check table properties to see if a couple of parameters,
// such as compression parameters are defined. If they are, then we copy
// them to job properties, so that it will be available in jobconf at runtime
// See HIVE-5504 for details
Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters();
for (OrcConf property : OrcConf.values()) {
String propName = property.getAttribute();
if (tableProps.containsKey(propName)) {
jobProperties.put(propName, tableProps.get(propName));
}
}
} else if (ofclass == AvroContainerOutputFormat.class) {
// Special cases for Avro. As with ORC, we make table properties that
// Avro is interested in available in jobconf at runtime
Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters();
for (AvroSerdeUtils.AvroTableProperties property : AvroSerdeUtils.AvroTableProperties.values()) {
String propName = property.getPropName();
if (tableProps.containsKey(propName)) {
String propVal = tableProps.get(propName);
jobProperties.put(propName, tableProps.get(propName));
}
}
Properties properties = new Properties();
properties.put("name", jobInfo.getTableName());
List<String> colNames = jobInfo.getOutputSchema().getFieldNames();
List<TypeInfo> colTypes = new ArrayList<TypeInfo>();
for (HCatFieldSchema field : jobInfo.getOutputSchema().getFields()) {
colTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getTypeString()));
}
if (jobProperties.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()) == null || jobProperties.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()).isEmpty()) {
jobProperties.put(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), AvroSerDe.getSchemaFromCols(properties, colNames, colTypes, null).toString());
}
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class VectorizationContext method getVectorExpressionForUdf.
private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, Class<?> udfClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
int numChildren = (childExpr == null) ? 0 : childExpr.size();
if (numChildren > 2 && genericeUdf != null && mode == VectorExpressionDescriptor.Mode.FILTER && ((genericeUdf instanceof GenericUDFOPOr) || (genericeUdf instanceof GenericUDFOPAnd))) {
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
String childTypeString = child.getTypeString();
if (childTypeString == null) {
throw new HiveException("Null child type name string");
}
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(childTypeString);
Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
if (columnVectorType != ColumnVector.Type.LONG) {
return null;
}
if (!(child instanceof ExprNodeGenericFuncDesc) && !(child instanceof ExprNodeColumnDesc)) {
return null;
}
}
Class<?> vclass;
if (genericeUdf instanceof GenericUDFOPOr) {
vclass = FilterExprOrExpr.class;
} else if (genericeUdf instanceof GenericUDFOPAnd) {
vclass = FilterExprAndExpr.class;
} else {
throw new RuntimeException("Unexpected multi-child UDF");
}
VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass);
return createVectorExpression(vclass, childExpr, childrenMode, returnType);
}
if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) {
return null;
}
VectorExpressionDescriptor.Builder builder = new VectorExpressionDescriptor.Builder();
builder.setNumArguments(numChildren);
builder.setMode(mode);
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
String childTypeString = child.getTypeString();
if (childTypeString == null) {
throw new HiveException("Null child type name string");
}
String undecoratedTypeName = getUndecoratedName(childTypeString);
if (undecoratedTypeName == null) {
throw new HiveException("No match for type string " + childTypeString + " from undecorated type name method");
}
builder.setArgumentType(i, undecoratedTypeName);
if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeColumnDesc)) {
builder.setInputExpressionType(i, InputExpressionType.COLUMN);
} else if (child instanceof ExprNodeConstantDesc) {
builder.setInputExpressionType(i, InputExpressionType.SCALAR);
} else if (child instanceof ExprNodeDynamicValueDesc) {
builder.setInputExpressionType(i, InputExpressionType.DYNAMICVALUE);
} else {
throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
}
}
VectorExpressionDescriptor.Descriptor descriptor = builder.build();
Class<?> vclass = this.vMap.getVectorExpressionClass(udfClass, descriptor);
if (vclass == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("No vector udf found for " + udfClass.getSimpleName() + ", descriptor: " + descriptor);
}
return null;
}
VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass);
return createVectorExpression(vclass, childExpr, childrenMode, returnType);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class VectorAssignRow method init.
/*
* Initialize using an StructObjectInspector and a column projection list.
*/
public void init(StructObjectInspector structObjectInspector, List<Integer> projectedColumns) throws HiveException {
List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
final int count = fields.size();
allocateArrays(count);
for (int i = 0; i < count; i++) {
int projectionColumnNum = projectedColumns.get(i);
StructField field = fields.get(i);
ObjectInspector fieldInspector = field.getFieldObjectInspector();
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldInspector.getTypeName());
initTargetEntry(i, projectionColumnNum, typeInfo);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class VectorSerializeRow method init.
public void init(List<String> typeNames) throws HiveException {
final int size = typeNames.size();
categories = new Category[size];
primitiveCategories = new PrimitiveCategory[size];
outputColumnNums = new int[size];
TypeInfo typeInfo;
for (int i = 0; i < size; i++) {
typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i));
categories[i] = typeInfo.getCategory();
if (categories[i] == Category.PRIMITIVE) {
primitiveCategories[i] = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
}
outputColumnNums[i] = i;
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class VectorizationContext method instantiateExpression.
private VectorExpression instantiateExpression(Class<?> vclass, TypeInfo returnType, Object... args) throws HiveException {
VectorExpression ve = null;
Constructor<?> ctor = getConstructor(vclass);
int numParams = ctor.getParameterTypes().length;
int argsLength = (args == null) ? 0 : args.length;
if (numParams == 0) {
try {
ve = (VectorExpression) ctor.newInstance();
} catch (Exception ex) {
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + getStackTraceAsSingleLine(ex));
}
} else if (numParams == argsLength) {
try {
ve = (VectorExpression) ctor.newInstance(args);
} catch (Exception ex) {
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + getStackTraceAsSingleLine(ex));
}
} else if (numParams == argsLength + 1) {
// Additional argument is needed, which is the outputcolumn.
Object[] newArgs = null;
try {
String returnTypeName;
if (returnType == null) {
returnTypeName = ((VectorExpression) vclass.newInstance()).getOutputType().toLowerCase();
if (returnTypeName.equals("long")) {
returnTypeName = "bigint";
}
returnType = TypeInfoUtils.getTypeInfoFromTypeString(returnTypeName);
} else {
returnTypeName = returnType.getTypeName();
}
// Special handling for decimal because decimal types need scale and precision parameter.
// This special handling should be avoided by using returnType uniformly for all cases.
int outputCol = ocm.allocateOutputColumn(returnType);
newArgs = Arrays.copyOf(args, numParams);
newArgs[numParams - 1] = outputCol;
ve = (VectorExpression) ctor.newInstance(newArgs);
ve.setOutputType(returnTypeName);
} catch (Exception ex) {
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + getStackTraceAsSingleLine(ex));
}
}
// Add maxLength parameter to UDFs that have CHAR or VARCHAR output.
if (ve instanceof TruncStringOutput) {
TruncStringOutput truncStringOutput = (TruncStringOutput) ve;
if (returnType instanceof BaseCharTypeInfo) {
BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) returnType;
truncStringOutput.setMaxLength(baseCharTypeInfo.getLength());
}
}
return ve;
}
Aggregations