use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.
the class FunctionRegistry method getCommonClassForUnionAll.
/**
* Find a common type for union-all operator. Only the common types for the same
* type group will resolve to a common type. No implicit conversion across different
* type groups will be done.
*/
public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) {
if (a.equals(b)) {
return a;
}
if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) {
return null;
}
PrimitiveCategory pcA = ((PrimitiveTypeInfo) a).getPrimitiveCategory();
PrimitiveCategory pcB = ((PrimitiveTypeInfo) b).getPrimitiveCategory();
if (pcA == pcB) {
// Same primitive category but different qualifiers.
return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, pcA);
}
PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA);
PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB);
// untyped nulls
if (pgA == PrimitiveGrouping.VOID_GROUP) {
return b;
}
if (pgB == PrimitiveGrouping.VOID_GROUP) {
return a;
}
if (pgA != pgB) {
return null;
}
switch(pgA) {
case STRING_GROUP:
return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, PrimitiveCategory.STRING);
case NUMERIC_GROUP:
return TypeInfoUtils.implicitConvertible(a, b) ? b : a;
case DATE_GROUP:
return TypeInfoFactory.timestampTypeInfo;
default:
return null;
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.
the class Vectorizer method validatePTFOperator.
private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
if (!isPtfVectorizationEnabled) {
setNodeIssue("Vectorization of PTF is not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_ENABLED.varname + " IS false)");
return false;
}
PTFDesc ptfDesc = (PTFDesc) op.getConf();
boolean isMapSide = ptfDesc.isMapSide();
if (isMapSide) {
setOperatorIssue("PTF Mapper not supported");
return false;
}
List<Operator<? extends OperatorDesc>> ptfParents = op.getParentOperators();
if (ptfParents != null && ptfParents.size() > 0) {
Operator<? extends OperatorDesc> ptfParent = op.getParentOperators().get(0);
if (!(ptfParent instanceof ReduceSinkOperator)) {
boolean isReduceShufflePtf = false;
if (ptfParent instanceof SelectOperator) {
ptfParents = ptfParent.getParentOperators();
if (ptfParents == null || ptfParents.size() == 0) {
isReduceShufflePtf = true;
} else {
ptfParent = ptfParent.getParentOperators().get(0);
isReduceShufflePtf = (ptfParent instanceof ReduceSinkOperator);
}
}
if (!isReduceShufflePtf) {
setOperatorIssue("Only PTF directly under reduce-shuffle is supported");
return false;
}
}
}
boolean forNoop = ptfDesc.forNoop();
if (forNoop) {
setOperatorIssue("NOOP not supported");
return false;
}
boolean forWindowing = ptfDesc.forWindowing();
if (!forWindowing) {
setOperatorIssue("Windowing required");
return false;
}
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef);
if (!isWindowTableFunctionDef) {
setOperatorIssue("Must be a WindowTableFunctionDef");
return false;
}
try {
createVectorPTFDesc(op, ptfDesc, vContext, vectorPTFDesc, vectorizedPTFMaxMemoryBufferingBatchCount);
} catch (HiveException e) {
setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e));
return false;
}
// Output columns ok?
String[] outputColumnNames = vectorPTFDesc.getOutputColumnNames();
TypeInfo[] outputTypeInfos = vectorPTFDesc.getOutputTypeInfos();
final int outputCount = outputColumnNames.length;
for (int i = 0; i < outputCount; i++) {
String typeName = outputTypeInfos[i].getTypeName();
boolean ret = validateDataType(typeName, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */
false);
if (!ret) {
setExpressionIssue("PTF Output Columns", "Data type " + typeName + " of column " + outputColumnNames[i] + " not supported");
return false;
}
}
boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy();
String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
final int count = evaluatorFunctionNames.length;
WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
for (int i = 0; i < count; i++) {
String functionName = evaluatorFunctionNames[i];
SupportedFunctionType supportedFunctionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
if (supportedFunctionType == null) {
setOperatorIssue(functionName + " not in supported functions " + VectorPTFDesc.supportedFunctionNames);
return false;
}
WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
if (!windowFrameDef.isStartUnbounded()) {
setOperatorIssue(functionName + " only UNBOUNDED start frame is supported");
return false;
}
switch(windowFrameDef.getWindowType()) {
case RANGE:
if (!windowFrameDef.getEnd().isCurrentRow()) {
setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE");
return false;
}
break;
case ROWS:
if (!windowFrameDef.isEndUnbounded()) {
setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type");
return false;
}
break;
default:
throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType());
}
List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
if (exprNodeDescList != null && exprNodeDescList.size() > 1) {
setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
return false;
}
if (exprNodeDescList != null) {
ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
if (containsLeadLag(exprNodeDesc)) {
setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
return false;
}
if (supportedFunctionType != SupportedFunctionType.COUNT && supportedFunctionType != SupportedFunctionType.DENSE_RANK && supportedFunctionType != SupportedFunctionType.RANK) {
// COUNT, DENSE_RANK, and RANK do not care about column types. The rest do.
TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
Category category = typeInfo.getCategory();
boolean isSupportedType;
if (category != Category.PRIMITIVE) {
isSupportedType = false;
} else {
ColumnVector.Type colVecType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
switch(colVecType) {
case LONG:
case DOUBLE:
case DECIMAL:
isSupportedType = true;
break;
default:
isSupportedType = false;
break;
}
}
if (!isSupportedType) {
setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
return false;
}
}
}
}
return true;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.
the class Vectorizer method canSpecializeMapJoin.
private boolean canSpecializeMapJoin(Operator<? extends OperatorDesc> op, MapJoinDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorMapJoinDesc vectorDesc) throws HiveException {
Preconditions.checkState(op instanceof MapJoinOperator);
VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
boolean isVectorizationMapJoinNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED);
String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
boolean oneMapJoinCondition = (desc.getConds().length == 1);
boolean hasNullSafes = onExpressionHasNullSafes(desc);
byte posBigTable = (byte) desc.getPosBigTable();
// Since we want to display all the met and not met conditions in EXPLAIN, we determine all
// information first....
List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
boolean outerJoinHasNoKeys = (!desc.isNoOuterJoin() && keyDesc.size() == 0);
// For now, we don't support joins on or using DECIMAL_64.
VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressionsUpConvertDecimal64(keyDesc);
final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length;
// Assume.
boolean supportsKeyTypes = true;
HashSet<String> notSupportedKeyTypes = new HashSet<String>();
// Since a key expression can be a calculation and the key will go into a scratch column,
// we need the mapping and type information.
int[] bigTableKeyColumnMap = new int[allBigTableKeyExpressionsLength];
String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength];
TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength];
ArrayList<VectorExpression> bigTableKeyExpressionsList = new ArrayList<VectorExpression>();
VectorExpression[] slimmedBigTableKeyExpressions;
for (int i = 0; i < allBigTableKeyExpressionsLength; i++) {
VectorExpression ve = allBigTableKeyExpressions[i];
if (!IdentityExpression.isColumnOnly(ve)) {
bigTableKeyExpressionsList.add(ve);
}
bigTableKeyColumnMap[i] = ve.getOutputColumnNum();
ExprNodeDesc exprNode = keyDesc.get(i);
bigTableKeyColumnNames[i] = exprNode.toString();
TypeInfo typeInfo = exprNode.getTypeInfo();
// same check used in HashTableLoader.
if (!MapJoinKey.isSupportedField(typeInfo)) {
supportsKeyTypes = false;
Category category = typeInfo.getCategory();
notSupportedKeyTypes.add((category != Category.PRIMITIVE ? category.toString() : ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().toString()));
}
bigTableKeyTypeInfos[i] = typeInfo;
}
if (bigTableKeyExpressionsList.size() == 0) {
slimmedBigTableKeyExpressions = null;
} else {
slimmedBigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]);
}
List<ExprNodeDesc> bigTableExprs = desc.getExprs().get(posBigTable);
// For now, we don't support joins on or using DECIMAL_64.
VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressionsUpConvertDecimal64(bigTableExprs);
boolean isFastHashTableEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED);
// Especially since LLAP is prone to turn it off in the MapJoinDesc in later
// physical optimizer stages...
boolean isHybridHashJoin = desc.isHybridHashJoin();
/*
* Populate vectorMapJoininfo.
*/
/*
* Similarly, we need a mapping since a value expression can be a calculation and the value
* will go into a scratch column.
*/
int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length];
String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length];
TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length];
ArrayList<VectorExpression> bigTableValueExpressionsList = new ArrayList<VectorExpression>();
VectorExpression[] slimmedBigTableValueExpressions;
for (int i = 0; i < bigTableValueColumnMap.length; i++) {
VectorExpression ve = allBigTableValueExpressions[i];
if (!IdentityExpression.isColumnOnly(ve)) {
bigTableValueExpressionsList.add(ve);
}
bigTableValueColumnMap[i] = ve.getOutputColumnNum();
ExprNodeDesc exprNode = bigTableExprs.get(i);
bigTableValueColumnNames[i] = exprNode.toString();
bigTableValueTypeInfos[i] = exprNode.getTypeInfo();
}
if (bigTableValueExpressionsList.size() == 0) {
slimmedBigTableValueExpressions = null;
} else {
slimmedBigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]);
}
vectorMapJoinInfo.setBigTableKeyColumnMap(bigTableKeyColumnMap);
vectorMapJoinInfo.setBigTableKeyColumnNames(bigTableKeyColumnNames);
vectorMapJoinInfo.setBigTableKeyTypeInfos(bigTableKeyTypeInfos);
vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
vectorDesc.setAllBigTableKeyExpressions(allBigTableKeyExpressions);
vectorMapJoinInfo.setBigTableValueColumnMap(bigTableValueColumnMap);
vectorMapJoinInfo.setBigTableValueColumnNames(bigTableValueColumnNames);
vectorMapJoinInfo.setBigTableValueTypeInfos(bigTableValueTypeInfos);
vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
vectorDesc.setAllBigTableValueExpressions(allBigTableValueExpressions);
/*
* Small table information.
*/
VectorColumnOutputMapping bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping");
VectorColumnOutputMapping bigTableOuterKeyMapping = new VectorColumnOutputMapping("Big Table Outer Key Mapping");
// The order of the fields in the LazyBinary small table value must be used, so
// we use the source ordering flavor for the mapping.
VectorColumnSourceMapping smallTableMapping = new VectorColumnSourceMapping("Small Table Mapping");
Byte[] order = desc.getTagOrder();
Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
boolean isOuterJoin = !desc.getNoOuterJoin();
/*
* Gather up big and small table output result information from the MapJoinDesc.
*/
List<Integer> bigTableRetainList = desc.getRetainList().get(posBigTable);
int bigTableRetainSize = bigTableRetainList.size();
int[] smallTableIndices;
int smallTableIndicesSize;
List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable);
if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) {
smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable);
smallTableIndicesSize = smallTableIndices.length;
} else {
smallTableIndices = null;
smallTableIndicesSize = 0;
}
List<Integer> smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable);
int smallTableRetainSize = smallTableRetainList.size();
int smallTableResultSize = 0;
if (smallTableIndicesSize > 0) {
smallTableResultSize = smallTableIndicesSize;
} else if (smallTableRetainSize > 0) {
smallTableResultSize = smallTableRetainSize;
}
/*
* Determine the big table retained mapping first so we can optimize out (with
* projection) copying inner join big table keys in the subsequent small table results section.
*/
// We use a mapping object here so we can build the projection in any order and
// get the ordered by 0 to n-1 output columns at the end.
//
// Also, to avoid copying a big table key into the small table result area for inner joins,
// we reference it with the projection so there can be duplicate output columns
// in the projection.
VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping");
int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize);
for (int i = 0; i < bigTableRetainSize; i++) {
// Since bigTableValueExpressions may do a calculation and produce a scratch column, we
// need to map to the right batch column.
int retainColumn = bigTableRetainList.get(i);
int batchColumnIndex = bigTableValueColumnMap[retainColumn];
TypeInfo typeInfo = bigTableValueTypeInfos[i];
// With this map we project the big table batch to make it look like an output batch.
projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo);
// Collect columns we copy from the big table batch to the overflow batch.
if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) {
// Tolerate repeated use of a big table column.
bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeInfo);
}
nextOutputColumn++;
}
/*
* Now determine the small table results.
*/
boolean smallTableExprVectorizes = true;
int firstSmallTableOutputColumn;
firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0);
int smallTableOutputCount = 0;
nextOutputColumn = firstSmallTableOutputColumn;
// Small table indices has more information (i.e. keys) than retain, so use it if it exists...
String[] bigTableRetainedNames;
if (smallTableIndicesSize > 0) {
smallTableOutputCount = smallTableIndicesSize;
bigTableRetainedNames = new String[smallTableOutputCount];
for (int i = 0; i < smallTableIndicesSize; i++) {
if (smallTableIndices[i] >= 0) {
// Zero and above numbers indicate a big table key is needed for
// small table result "area".
int keyIndex = smallTableIndices[i];
// Since bigTableKeyExpressions may do a calculation and produce a scratch column, we
// need to map the right column.
int batchKeyColumn = bigTableKeyColumnMap[keyIndex];
bigTableRetainedNames[i] = bigTableKeyColumnNames[keyIndex];
TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex];
if (!isOuterJoin) {
// Optimize inner join keys of small table results.
// Project the big table key into the small table result "area".
projectionMapping.add(nextOutputColumn, batchKeyColumn, typeInfo);
if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) {
// If necessary, copy the big table key into the overflow batch's small table
// result "area".
bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeInfo);
}
} else {
// For outer joins, since the small table key can be null when there is no match,
// we must have a physical (scratch) column for those keys. We cannot use the
// projection optimization used by inner joins above.
int scratchColumn = vContext.allocateScratchColumn(typeInfo);
projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeInfo);
bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeInfo);
}
} else {
// Negative numbers indicate a column to be (deserialize) read from the small table's
// LazyBinary value row.
int smallTableValueIndex = -smallTableIndices[i] - 1;
ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
clearNotVectorizedReason();
smallTableExprVectorizes = false;
}
bigTableRetainedNames[i] = smallTableExprNode.toString();
TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
// Make a new big table scratch column for the small table value.
int scratchColumn = vContext.allocateScratchColumn(typeInfo);
projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
}
nextOutputColumn++;
}
} else if (smallTableRetainSize > 0) {
smallTableOutputCount = smallTableRetainSize;
bigTableRetainedNames = new String[smallTableOutputCount];
for (int i = 0; i < smallTableRetainSize; i++) {
int smallTableValueIndex = smallTableRetainList.get(i);
ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
clearNotVectorizedReason();
smallTableExprVectorizes = false;
}
bigTableRetainedNames[i] = smallTableExprNode.toString();
// Make a new big table scratch column for the small table value.
TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
int scratchColumn = vContext.allocateScratchColumn(typeInfo);
projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
nextOutputColumn++;
}
} else {
bigTableRetainedNames = new String[0];
}
boolean useOptimizedTable = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
// Remember the condition variables for EXPLAIN regardless of whether we specialize or not.
vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
vectorDesc.setUseOptimizedTable(useOptimizedTable);
vectorDesc.setIsVectorizationMapJoinNativeEnabled(isVectorizationMapJoinNativeEnabled);
vectorDesc.setEngine(engine);
vectorDesc.setOneMapJoinCondition(oneMapJoinCondition);
vectorDesc.setHasNullSafes(hasNullSafes);
vectorDesc.setSmallTableExprVectorizes(smallTableExprVectorizes);
vectorDesc.setOuterJoinHasNoKeys(outerJoinHasNoKeys);
vectorDesc.setIsFastHashTableEnabled(isFastHashTableEnabled);
vectorDesc.setIsHybridHashJoin(isHybridHashJoin);
vectorDesc.setSupportsKeyTypes(supportsKeyTypes);
if (!supportsKeyTypes) {
vectorDesc.setNotSupportedKeyTypes(new ArrayList(notSupportedKeyTypes));
}
// Check common conditions for both Optimized and Fast Hash Tables.
// Assume.
boolean result = true;
if (!useOptimizedTable || !isVectorizationMapJoinNativeEnabled || !isTezOrSpark || !oneMapJoinCondition || hasNullSafes || !smallTableExprVectorizes || outerJoinHasNoKeys) {
result = false;
}
if (!isFastHashTableEnabled) {
// Check optimized-only hash table restrictions.
if (!supportsKeyTypes) {
result = false;
}
} else {
if (isHybridHashJoin) {
result = false;
}
}
// Convert dynamic arrays and maps to simple arrays.
bigTableRetainedMapping.finalize();
bigTableOuterKeyMapping.finalize();
smallTableMapping.finalize();
vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping);
vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping);
vectorMapJoinInfo.setSmallTableMapping(smallTableMapping);
projectionMapping.finalize();
// Verify we added an entry for each output.
assert projectionMapping.isSourceSequenceGood();
vectorMapJoinInfo.setProjectionMapping(projectionMapping);
return result;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.
the class Vectorizer method validateStructInExpression.
private boolean validateStructInExpression(ExprNodeDesc desc, String expressionTitle, VectorExpressionDescriptor.Mode mode) {
for (ExprNodeDesc d : desc.getChildren()) {
TypeInfo typeInfo = d.getTypeInfo();
if (typeInfo.getCategory() != Category.STRUCT) {
return false;
}
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
final int fieldCount = fieldTypeInfos.size();
for (int f = 0; f < fieldCount; f++) {
TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
Category category = fieldTypeInfo.getCategory();
if (category != Category.PRIMITIVE) {
setExpressionIssue(expressionTitle, "Cannot vectorize struct field " + fieldNames.get(f) + " of type " + fieldTypeInfo.getTypeName());
return false;
}
PrimitiveTypeInfo fieldPrimitiveTypeInfo = (PrimitiveTypeInfo) fieldTypeInfo;
InConstantType inConstantType = VectorizationContext.getInConstantTypeFromPrimitiveCategory(fieldPrimitiveTypeInfo.getPrimitiveCategory());
// For now, limit the data types we support for Vectorized Struct IN().
if (inConstantType != InConstantType.INT_FAMILY && inConstantType != InConstantType.FLOAT_FAMILY && inConstantType != InConstantType.STRING_FAMILY) {
setExpressionIssue(expressionTitle, "Cannot vectorize struct field " + fieldNames.get(f) + " of type " + fieldTypeInfo.getTypeName());
return false;
}
}
}
return true;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.
the class BinarySortableDeserializeRead method readPrimitive.
private boolean readPrimitive(Field field) throws IOException {
final int fieldIndex = root.index;
field.start = inputByteBuffer.tell();
/*
* We have a field and are positioned to it. Read it.
*/
switch(field.primitiveCategory) {
case BOOLEAN:
currentBoolean = (inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]) == 2);
return true;
case BYTE:
currentByte = (byte) (inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]) ^ 0x80);
return true;
case SHORT:
{
final boolean invert = columnSortOrderIsDesc[fieldIndex];
int v = inputByteBuffer.read(invert) ^ 0x80;
v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
currentShort = (short) v;
}
return true;
case INT:
{
final boolean invert = columnSortOrderIsDesc[fieldIndex];
int v = inputByteBuffer.read(invert) ^ 0x80;
for (int i = 0; i < 3; i++) {
v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
}
currentInt = v;
}
return true;
case LONG:
{
final boolean invert = columnSortOrderIsDesc[fieldIndex];
long v = inputByteBuffer.read(invert) ^ 0x80;
for (int i = 0; i < 7; i++) {
v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
}
currentLong = v;
}
return true;
case DATE:
{
final boolean invert = columnSortOrderIsDesc[fieldIndex];
int v = inputByteBuffer.read(invert) ^ 0x80;
for (int i = 0; i < 3; i++) {
v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
}
currentDateWritable.set(v);
}
return true;
case TIMESTAMP:
{
if (tempTimestampBytes == null) {
tempTimestampBytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH];
}
final boolean invert = columnSortOrderIsDesc[fieldIndex];
for (int i = 0; i < tempTimestampBytes.length; i++) {
tempTimestampBytes[i] = inputByteBuffer.read(invert);
}
currentTimestampWritable.setBinarySortable(tempTimestampBytes, 0);
}
return true;
case FLOAT:
{
final boolean invert = columnSortOrderIsDesc[fieldIndex];
int v = 0;
for (int i = 0; i < 4; i++) {
v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
}
if ((v & (1 << 31)) == 0) {
// negative number, flip all bits
v = ~v;
} else {
// positive number, flip the first bit
v = v ^ (1 << 31);
}
currentFloat = Float.intBitsToFloat(v);
}
return true;
case DOUBLE:
{
final boolean invert = columnSortOrderIsDesc[fieldIndex];
long v = 0;
for (int i = 0; i < 8; i++) {
v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
}
if ((v & (1L << 63)) == 0) {
// negative number, flip all bits
v = ~v;
} else {
// positive number, flip the first bit
v = v ^ (1L << 63);
}
currentDouble = Double.longBitsToDouble(v);
}
return true;
case BINARY:
case STRING:
case CHAR:
case VARCHAR:
{
/*
* This code is a modified version of BinarySortableSerDe.deserializeText that lets us
* detect if we can return a reference to the bytes directly.
*/
// Get the actual length first
bytesStart = inputByteBuffer.tell();
final boolean invert = columnSortOrderIsDesc[fieldIndex];
int length = 0;
do {
byte b = inputByteBuffer.read(invert);
if (b == 0) {
// end of string
break;
}
if (b == 1) {
// the last char is an escape char. read the actual char
inputByteBuffer.read(invert);
}
length++;
} while (true);
if (length == 0 || (!invert && length == inputByteBuffer.tell() - bytesStart - 1)) {
// No inversion or escaping happened, so we are can reference directly.
currentExternalBufferNeeded = false;
currentBytes = inputByteBuffer.getData();
currentBytesStart = bytesStart;
currentBytesLength = length;
} else {
// We are now positioned at the end of this field's bytes.
if (useExternalBuffer) {
// If we decided not to reposition and re-read the buffer to copy it with
// copyToExternalBuffer, we we will still be correctly positioned for the next field.
currentExternalBufferNeeded = true;
currentExternalBufferNeededLen = length;
} else {
// The copyToBuffer will reposition and re-read the input buffer.
currentExternalBufferNeeded = false;
if (internalBufferLen < length) {
internalBufferLen = length;
internalBuffer = new byte[internalBufferLen];
}
copyToBuffer(internalBuffer, 0, length);
currentBytes = internalBuffer;
currentBytesStart = 0;
currentBytesLength = length;
}
}
}
return true;
case INTERVAL_YEAR_MONTH:
{
final boolean invert = columnSortOrderIsDesc[fieldIndex];
int v = inputByteBuffer.read(invert) ^ 0x80;
for (int i = 0; i < 3; i++) {
v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
}
currentHiveIntervalYearMonthWritable.set(v);
}
return true;
case INTERVAL_DAY_TIME:
{
final boolean invert = columnSortOrderIsDesc[fieldIndex];
long totalSecs = inputByteBuffer.read(invert) ^ 0x80;
for (int i = 0; i < 7; i++) {
totalSecs = (totalSecs << 8) + (inputByteBuffer.read(invert) & 0xff);
}
int nanos = inputByteBuffer.read(invert) ^ 0x80;
for (int i = 0; i < 3; i++) {
nanos = (nanos << 8) + (inputByteBuffer.read(invert) & 0xff);
}
currentHiveIntervalDayTimeWritable.set(totalSecs, nanos);
}
return true;
case DECIMAL:
{
// Since enforcing precision and scale can cause a HiveDecimal to become NULL,
// we must read it, enforce it here, and either return NULL or buffer the result.
final boolean invert = columnSortOrderIsDesc[fieldIndex];
int b = inputByteBuffer.read(invert) - 1;
if (!(b == 1 || b == -1 || b == 0)) {
throw new IOException("Unexpected byte value " + (int) b + " in binary sortable format data (invert " + invert + ")");
}
final boolean positive = b != -1;
int factor = inputByteBuffer.read(invert) ^ 0x80;
for (int i = 0; i < 3; i++) {
factor = (factor << 8) + (inputByteBuffer.read(invert) & 0xff);
}
if (!positive) {
factor = -factor;
}
final int decimalStart = inputByteBuffer.tell();
int length = 0;
do {
b = inputByteBuffer.read(positive ? invert : !invert);
if (b == 1) {
throw new IOException("Expected -1 and found byte value " + (int) b + " in binary sortable format data (invert " + invert + ")");
}
if (b == 0) {
// end of digits
break;
}
length++;
} while (true);
// CONSIDER: Allocate a larger initial size.
if (tempDecimalBuffer == null || tempDecimalBuffer.length < length) {
tempDecimalBuffer = new byte[length];
}
inputByteBuffer.seek(decimalStart);
for (int i = 0; i < length; ++i) {
tempDecimalBuffer[i] = inputByteBuffer.read(positive ? invert : !invert);
}
// read the null byte again
inputByteBuffer.read(positive ? invert : !invert);
// Set the value of the writable from the decimal digits that were written with no dot.
final int scale = length - factor;
currentHiveDecimalWritable.setFromDigitsOnlyBytesWithScale(!positive, tempDecimalBuffer, 0, length, scale);
boolean decimalIsNull = !currentHiveDecimalWritable.isSet();
if (!decimalIsNull) {
// We have a decimal. After we enforce precision and scale, will it become a NULL?
final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) field.typeInfo;
final int enforcePrecision = decimalTypeInfo.getPrecision();
final int enforceScale = decimalTypeInfo.getScale();
decimalIsNull = !currentHiveDecimalWritable.mutateEnforcePrecisionScale(enforcePrecision, enforceScale);
}
if (decimalIsNull) {
return false;
}
}
return true;
default:
throw new RuntimeException("Unexpected primitive type category " + field.primitiveCategory);
}
}
Aggregations