use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorAssignRow method assignRowColumn.
private void assignRowColumn(ColumnVector columnVector, int batchIndex, TypeInfo targetTypeInfo, Object object) {
if (object == null) {
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
}
switch(targetTypeInfo.getCategory()) {
case PRIMITIVE:
{
final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfo).getPrimitiveCategory();
switch(targetPrimitiveCategory) {
case VOID:
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
case BOOLEAN:
if (object instanceof Boolean) {
((LongColumnVector) columnVector).vector[batchIndex] = (((Boolean) object) ? 1 : 0);
} else {
((LongColumnVector) columnVector).vector[batchIndex] = (((BooleanWritable) object).get() ? 1 : 0);
}
break;
case BYTE:
if (object instanceof Byte) {
((LongColumnVector) columnVector).vector[batchIndex] = ((Byte) object);
} else {
((LongColumnVector) columnVector).vector[batchIndex] = ((ByteWritable) object).get();
}
break;
case SHORT:
if (object instanceof Short) {
((LongColumnVector) columnVector).vector[batchIndex] = ((Short) object);
} else {
((LongColumnVector) columnVector).vector[batchIndex] = ((ShortWritable) object).get();
}
break;
case INT:
if (object instanceof Integer) {
((LongColumnVector) columnVector).vector[batchIndex] = ((Integer) object);
} else {
((LongColumnVector) columnVector).vector[batchIndex] = ((IntWritable) object).get();
}
break;
case LONG:
if (object instanceof Long) {
((LongColumnVector) columnVector).vector[batchIndex] = ((Long) object);
} else {
((LongColumnVector) columnVector).vector[batchIndex] = ((LongWritable) object).get();
}
break;
case TIMESTAMP:
if (object instanceof Timestamp) {
((TimestampColumnVector) columnVector).set(batchIndex, ((Timestamp) object));
} else {
((TimestampColumnVector) columnVector).set(batchIndex, ((TimestampWritable) object).getTimestamp());
}
break;
case DATE:
if (object instanceof Date) {
((LongColumnVector) columnVector).vector[batchIndex] = DateWritable.dateToDays((Date) object);
} else {
((LongColumnVector) columnVector).vector[batchIndex] = ((DateWritable) object).getDays();
}
break;
case FLOAT:
if (object instanceof Float) {
((DoubleColumnVector) columnVector).vector[batchIndex] = ((Float) object);
} else {
((DoubleColumnVector) columnVector).vector[batchIndex] = ((FloatWritable) object).get();
}
break;
case DOUBLE:
if (object instanceof Double) {
((DoubleColumnVector) columnVector).vector[batchIndex] = ((Double) object);
} else {
((DoubleColumnVector) columnVector).vector[batchIndex] = ((DoubleWritable) object).get();
}
break;
case BINARY:
{
if (object instanceof byte[]) {
byte[] bytes = (byte[]) object;
((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
} else {
BytesWritable bw = (BytesWritable) object;
((BytesColumnVector) columnVector).setVal(batchIndex, bw.getBytes(), 0, bw.getLength());
}
}
break;
case STRING:
{
if (object instanceof String) {
String string = (String) object;
byte[] bytes = string.getBytes();
((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
} else {
Text tw = (Text) object;
((BytesColumnVector) columnVector).setVal(batchIndex, tw.getBytes(), 0, tw.getLength());
}
}
break;
case VARCHAR:
{
// UNDONE: Performance problem with conversion to String, then bytes...
// We store VARCHAR type stripped of pads.
HiveVarchar hiveVarchar;
if (object instanceof HiveVarchar) {
hiveVarchar = (HiveVarchar) object;
} else {
hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar();
}
// TODO: HIVE-13624 Do we need maxLength checking?
byte[] bytes = hiveVarchar.getValue().getBytes();
((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
}
break;
case CHAR:
{
// UNDONE: Performance problem with conversion to String, then bytes...
// We store CHAR type stripped of pads.
HiveChar hiveChar;
if (object instanceof HiveChar) {
hiveChar = (HiveChar) object;
} else {
hiveChar = ((HiveCharWritable) object).getHiveChar();
}
// TODO: HIVE-13624 Do we need maxLength checking?
// We store CHAR in vector row batch with padding stripped.
byte[] bytes = hiveChar.getStrippedValue().getBytes();
((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
}
break;
case DECIMAL:
if (object instanceof HiveDecimal) {
((DecimalColumnVector) columnVector).set(batchIndex, (HiveDecimal) object);
} else {
((DecimalColumnVector) columnVector).set(batchIndex, (HiveDecimalWritable) object);
}
break;
case INTERVAL_YEAR_MONTH:
if (object instanceof HiveIntervalYearMonth) {
((LongColumnVector) columnVector).vector[batchIndex] = ((HiveIntervalYearMonth) object).getTotalMonths();
} else {
((LongColumnVector) columnVector).vector[batchIndex] = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth().getTotalMonths();
}
break;
case INTERVAL_DAY_TIME:
if (object instanceof HiveIntervalDayTime) {
((IntervalDayTimeColumnVector) columnVector).set(batchIndex, (HiveIntervalDayTime) object);
} else {
((IntervalDayTimeColumnVector) columnVector).set(batchIndex, ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime());
}
break;
default:
throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + " not supported");
}
}
break;
case LIST:
{
final ListColumnVector listColumnVector = (ListColumnVector) columnVector;
final ListTypeInfo listTypeInfo = (ListTypeInfo) targetTypeInfo;
final TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
final List list = (List) object;
final int size = list.size();
final int childCount = listColumnVector.childCount;
listColumnVector.offsets[batchIndex] = childCount;
listColumnVector.lengths[batchIndex] = size;
listColumnVector.childCount = childCount + size;
listColumnVector.child.ensureSize(childCount + size, true);
for (int i = 0; i < size; i++) {
assignRowColumn(listColumnVector.child, childCount + i, elementTypeInfo, list.get(i));
}
}
break;
case MAP:
{
final MapColumnVector mapColumnVector = (MapColumnVector) columnVector;
final MapTypeInfo mapTypeInfo = (MapTypeInfo) targetTypeInfo;
final Map<Object, Object> map = (Map<Object, Object>) object;
final int size = map.size();
int childCount = mapColumnVector.childCount;
mapColumnVector.offsets[batchIndex] = childCount;
mapColumnVector.lengths[batchIndex] = size;
mapColumnVector.keys.ensureSize(childCount + size, true);
mapColumnVector.values.ensureSize(childCount + size, true);
for (Map.Entry<Object, Object> entry : map.entrySet()) {
assignRowColumn(mapColumnVector.keys, childCount, mapTypeInfo.getMapKeyTypeInfo(), entry.getKey());
assignRowColumn(mapColumnVector.values, childCount, mapTypeInfo.getMapValueTypeInfo(), entry.getValue());
childCount++;
}
mapColumnVector.childCount = childCount;
}
break;
case STRUCT:
{
final StructColumnVector structColumnVector = (StructColumnVector) columnVector;
final StructTypeInfo targetStructTypeInfo = (StructTypeInfo) targetTypeInfo;
final List<TypeInfo> targetFieldTypeInfos = targetStructTypeInfo.getAllStructFieldTypeInfos();
final int size = targetFieldTypeInfos.size();
if (object instanceof List) {
final List struct = (List) object;
for (int i = 0; i < size; i++) {
assignRowColumn(structColumnVector.fields[i], batchIndex, targetFieldTypeInfos.get(i), struct.get(i));
}
} else {
final Object[] array = (Object[]) object;
for (int i = 0; i < size; i++) {
assignRowColumn(structColumnVector.fields[i], batchIndex, targetFieldTypeInfos.get(i), array[i]);
}
}
}
break;
case UNION:
{
final StandardUnion union = (StandardUnion) object;
final UnionColumnVector unionColumnVector = (UnionColumnVector) columnVector;
final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) targetTypeInfo;
final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
final byte tag = union.getTag();
unionColumnVector.tags[batchIndex] = tag;
assignRowColumn(unionColumnVector.fields[tag], batchIndex, objectTypeInfos.get(tag), union.getObject());
}
break;
default:
throw new RuntimeException("Category " + targetTypeInfo.getCategory().name() + " not supported");
}
/*
* We always set the null flag to false when there is a value.
*/
columnVector.isNull[batchIndex] = false;
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorAssignRow method initConvertSourceEntry.
/*
* Initialize one column's source conversion related arrays.
* Assumes initTargetEntry has already been called.
*/
private void initConvertSourceEntry(int logicalColumnIndex, TypeInfo convertSourceTypeInfo) {
isConvert[logicalColumnIndex] = true;
final Category convertSourceCategory = convertSourceTypeInfo.getCategory();
convertSourceOI[logicalColumnIndex] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(convertSourceTypeInfo);
if (convertSourceCategory == Category.PRIMITIVE) {
// These need to be based on the target.
final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfos[logicalColumnIndex]).getPrimitiveCategory();
switch(targetPrimitiveCategory) {
case DATE:
convertTargetWritables[logicalColumnIndex] = new DateWritable();
break;
case STRING:
convertTargetWritables[logicalColumnIndex] = new Text();
break;
default:
// No additional data type specific setting.
break;
}
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class FunctionRegistry method getCommonClassForComparison.
/**
* Find a common class that objects of both TypeInfo a and TypeInfo b can
* convert to. This is used for comparing objects of type a and type b.
*
* When we are comparing string and double, we will always convert both of
* them to double and then compare.
*
* @return null if no common class could be found.
*/
public static synchronized TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) {
// If same return one of them
if (a.equals(b)) {
return a;
}
if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) {
return null;
}
PrimitiveCategory pcA = ((PrimitiveTypeInfo) a).getPrimitiveCategory();
PrimitiveCategory pcB = ((PrimitiveTypeInfo) b).getPrimitiveCategory();
if (pcA == pcB) {
// Rely on getTypeInfoForPrimitiveCategory() to sort out the type params.
return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, pcA);
}
PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA);
PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB);
if (pgA == pgB) {
// grouping is same, but category is not.
if (pgA == PrimitiveGrouping.DATE_GROUP) {
Integer ai = TypeInfoUtils.dateTypes.get(pcA);
Integer bi = TypeInfoUtils.dateTypes.get(pcB);
return (ai > bi) ? a : b;
}
}
// handle string types properly
if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.STRING_GROUP) {
// Compare as strings. Char comparison semantics may be different if/when implemented.
return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, PrimitiveCategory.STRING);
}
// timestamp/date is higher precedence than String_GROUP
if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.DATE_GROUP) {
return b;
}
// date/timestamp is higher precedence than String_GROUP
if (pgB == PrimitiveGrouping.STRING_GROUP && pgA == PrimitiveGrouping.DATE_GROUP) {
return a;
}
// Another special case, because timestamp is not implicitly convertible to numeric types.
if ((pgA == PrimitiveGrouping.NUMERIC_GROUP || pgB == PrimitiveGrouping.NUMERIC_GROUP) && (pcA == PrimitiveCategory.TIMESTAMP || pcB == PrimitiveCategory.TIMESTAMP)) {
return TypeInfoFactory.doubleTypeInfo;
}
for (PrimitiveCategory t : TypeInfoUtils.numericTypeList) {
if (TypeInfoUtils.implicitConvertible(pcA, t) && TypeInfoUtils.implicitConvertible(pcB, t)) {
return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, t);
}
}
return null;
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class FunctionRegistry method getMethodInternal.
/**
* Gets the closest matching method corresponding to the argument list from a
* list of methods.
*
* @param mlist
* The list of methods to inspect.
* @param exact
* Boolean to indicate whether this is an exact match or not.
* @param argumentsPassed
* The classes for the argument.
* @return The matching method.
*/
public static Method getMethodInternal(Class<?> udfClass, List<Method> mlist, boolean exact, List<TypeInfo> argumentsPassed) throws UDFArgumentException {
// result
List<Method> udfMethods = new ArrayList<Method>();
// The cost of the result
int leastConversionCost = Integer.MAX_VALUE;
for (Method m : mlist) {
List<TypeInfo> argumentsAccepted = TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size());
if (argumentsAccepted == null) {
// null means the method does not accept number of arguments passed.
continue;
}
boolean match = (argumentsAccepted.size() == argumentsPassed.size());
int conversionCost = 0;
for (int i = 0; i < argumentsPassed.size() && match; i++) {
int cost = matchCost(argumentsPassed.get(i), argumentsAccepted.get(i), exact);
if (cost == -1) {
match = false;
} else {
conversionCost += cost;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Method " + (match ? "did" : "didn't") + " match: passed = " + argumentsPassed + " accepted = " + argumentsAccepted + " method = " + m);
}
if (match) {
// Always choose the function with least implicit conversions.
if (conversionCost < leastConversionCost) {
udfMethods.clear();
udfMethods.add(m);
leastConversionCost = conversionCost;
// Found an exact match
if (leastConversionCost == 0) {
break;
}
} else if (conversionCost == leastConversionCost) {
// Ambiguous call: two methods with the same number of implicit
// conversions
udfMethods.add(m);
// Don't break! We might find a better match later.
} else {
// do nothing if implicitConversions > leastImplicitConversions
}
}
}
if (udfMethods.size() == 0) {
// No matching methods found
throw new NoMatchingMethodException(udfClass, argumentsPassed, mlist);
}
if (udfMethods.size() > 1) {
// First try selecting methods based on the type affinity of the arguments passed
// to the candidate method arguments.
filterMethodsByTypeAffinity(udfMethods, argumentsPassed);
}
if (udfMethods.size() > 1) {
// if the only difference is numeric types, pick the method
// with the smallest overall numeric type.
int lowestNumericType = Integer.MAX_VALUE;
boolean multiple = true;
Method candidate = null;
List<TypeInfo> referenceArguments = null;
for (Method m : udfMethods) {
int maxNumericType = 0;
List<TypeInfo> argumentsAccepted = TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size());
if (referenceArguments == null) {
// keep the arguments for reference - we want all the non-numeric
// arguments to be the same
referenceArguments = argumentsAccepted;
}
Iterator<TypeInfo> referenceIterator = referenceArguments.iterator();
for (TypeInfo accepted : argumentsAccepted) {
TypeInfo reference = referenceIterator.next();
boolean acceptedIsPrimitive = false;
PrimitiveCategory acceptedPrimCat = PrimitiveCategory.UNKNOWN;
if (accepted.getCategory() == Category.PRIMITIVE) {
acceptedIsPrimitive = true;
acceptedPrimCat = ((PrimitiveTypeInfo) accepted).getPrimitiveCategory();
}
if (acceptedIsPrimitive && TypeInfoUtils.numericTypes.containsKey(acceptedPrimCat)) {
// We're looking for the udf with the smallest maximum numeric type.
int typeValue = TypeInfoUtils.numericTypes.get(acceptedPrimCat);
maxNumericType = typeValue > maxNumericType ? typeValue : maxNumericType;
} else if (!accepted.equals(reference)) {
// another. We give up at this point.
throw new AmbiguousMethodException(udfClass, argumentsPassed, mlist);
}
}
if (lowestNumericType > maxNumericType) {
multiple = false;
lowestNumericType = maxNumericType;
candidate = m;
} else if (maxNumericType == lowestNumericType) {
// multiple udfs with the same max type. Unless we find a lower one
// we'll give up.
multiple = true;
}
}
if (!multiple) {
return candidate;
} else {
throw new AmbiguousMethodException(udfClass, argumentsPassed, mlist);
}
}
return udfMethods.get(0);
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorizationContext method getColumnVectorExpression.
private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, VectorExpressionDescriptor.Mode mode) throws HiveException {
int columnNum = getInputColumnIndex(exprDesc.getColumn());
VectorExpression expr = null;
switch(mode) {
case FILTER:
// Evaluate the column as a boolean, converting if necessary.
TypeInfo typeInfo = exprDesc.getTypeInfo();
if (typeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
expr = new SelectColumnIsTrue(columnNum);
} else {
// Ok, we need to convert.
ArrayList<ExprNodeDesc> exprAsList = new ArrayList<ExprNodeDesc>(1);
exprAsList.add(exprDesc);
// First try our cast method that will handle a few special cases.
VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList);
if (castToBooleanExpr == null) {
// Ok, try the UDF.
castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList, VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo);
if (castToBooleanExpr == null) {
throw new HiveException("Cannot vectorize converting expression " + exprDesc.getExprString() + " to boolean");
}
}
final int outputColumnNum = castToBooleanExpr.getOutputColumnNum();
expr = new SelectColumnIsTrue(outputColumnNum);
expr.setChildExpressions(new VectorExpression[] { castToBooleanExpr });
expr.setInputTypeInfos(castToBooleanExpr.getOutputTypeInfo());
expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
}
break;
case PROJECTION:
{
expr = new IdentityExpression(columnNum);
TypeInfo identityTypeInfo = exprDesc.getTypeInfo();
DataTypePhysicalVariation identityDataTypePhysicalVariation = getDataTypePhysicalVariation(columnNum);
expr.setInputTypeInfos(identityTypeInfo);
expr.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation);
expr.setOutputTypeInfo(identityTypeInfo);
expr.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation);
}
break;
}
return expr;
}
Aggregations