use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorExpressionDescriptor method getVectorExpressionClass.
public Class<?> getVectorExpressionClass(Class<?> udf, Descriptor descriptor, boolean useCheckedExpressionIfAvailable) throws HiveException {
VectorizedExpressions annotation = AnnotationUtils.getAnnotation(udf, VectorizedExpressions.class);
if (annotation == null || annotation.value() == null) {
return null;
}
Class<? extends VectorExpression>[] list = annotation.value();
Class<? extends VectorExpression> matchedVe = null;
for (Class<? extends VectorExpression> ve : list) {
try {
VectorExpression candidateVe = ve.newInstance();
if (candidateVe.getDescriptor().matches(descriptor)) {
if (!useCheckedExpressionIfAvailable) {
// no need to look further for a checked variant of this expression
return ve;
} else if (candidateVe.supportsCheckedExecution()) {
return ve;
} else {
// vector expression doesn't support checked execution
// hold on to it in case there is no available checked variant
matchedVe = ve;
}
}
} catch (Exception ex) {
throw new HiveException("Could not instantiate VectorExpression class " + ve.getSimpleName(), ex);
}
}
if (matchedVe != null) {
return matchedVe;
}
if (LOG.isDebugEnabled()) {
LOG.debug("getVectorExpressionClass udf " + udf.getSimpleName() + " descriptor: " + descriptor.toString());
for (Class<? extends VectorExpression> ve : list) {
try {
LOG.debug("getVectorExpressionClass doesn't match " + ve.getSimpleName() + " " + ve.newInstance().getDescriptor().toString());
} catch (Exception ex) {
throw new HiveException(ex);
}
}
}
return null;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorTopNKeyOperator method process.
@Override
public void process(Object data, int tag) throws HiveException {
VectorizedRowBatch batch = (VectorizedRowBatch) data;
if (!disabledPartitions.isEmpty() && disabledPartitions.size() == topNKeyFilters.size()) {
// all filters are disabled due to efficiency check
vectorForward(batch);
return;
}
incomingBatches++;
// The selected vector represents selected rows.
// Clone the selected vector
System.arraycopy(batch.selected, 0, temporarySelected, 0, batch.size);
int[] selectedBackup = batch.selected;
batch.selected = temporarySelected;
int sizeBackup = batch.size;
boolean selectedInUseBackup = batch.selectedInUse;
for (VectorExpression keyExpression : vectorDesc.getKeyExpressions()) {
keyExpression.evaluate(batch);
}
partitionKeyWrapperBatch.evaluateBatch(batch);
VectorHashKeyWrapperBase[] partitionKeyWrappers = partitionKeyWrapperBatch.getVectorHashKeyWrappers();
keyWrappersBatch.evaluateBatch(batch);
VectorHashKeyWrapperBase[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers();
// Filter rows with top n keys
int size = 0;
int[] selected = new int[batch.selected.length];
for (int i = 0; i < batch.size; i++) {
int j;
if (batch.selectedInUse) {
j = batch.selected[i];
} else {
j = i;
}
VectorHashKeyWrapperBase partitionKey = partitionKeyWrappers[i];
if (disabledPartitions.contains(partitionKey)) {
// filter for this partition is disabled
selected[size++] = j;
} else {
TopNKeyFilter topNKeyFilter = topNKeyFilters.get(partitionKey);
if (topNKeyFilter == null && topNKeyFilters.size() < conf.getMaxNumberOfPartitions()) {
topNKeyFilter = new TopNKeyFilter(conf.getTopN(), keyWrapperComparator);
topNKeyFilters.put(partitionKey.copyKey(), topNKeyFilter);
}
if (topNKeyFilter == null || topNKeyFilter.canForward(keyWrappers[i])) {
selected[size++] = j;
}
}
}
// Apply selection to batch
if (batch.size != size) {
batch.selectedInUse = true;
batch.selected = selected;
batch.size = size;
}
// Forward the result
if (size > 0) {
vectorForward(batch);
}
// Restore the original selected vector
batch.selected = selectedBackup;
batch.size = sizeBackup;
batch.selectedInUse = selectedInUseBackup;
if (incomingBatches % conf.getCheckEfficiencyNumBatches() == 0) {
checkTopNFilterEfficiency(topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), LOG, conf.getCheckEfficiencyNumRows());
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getInExpression.
/**
* Create a filter or boolean-valued expression for column IN ( <list-of-constants> )
*/
private VectorExpression getInExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
ExprNodeDesc colExpr = childExpr.get(0);
List<ExprNodeDesc> inChildren = childExpr.subList(1, childExpr.size());
String colType = colExpr.getTypeString();
colType = VectorizationContext.mapTypeNameSynonyms(colType);
TypeInfo colTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(colType);
Category category = colTypeInfo.getCategory();
if (category == Category.STRUCT) {
return getStructInExpression(childExpr, colExpr, colTypeInfo, inChildren, mode, returnType);
} else if (category != Category.PRIMITIVE) {
return null;
}
// prepare arguments for createVectorExpression
List<ExprNodeDesc> childrenForInList = evaluateCastOnConstants(inChildren);
/* This method assumes that the IN list has no NULL entries. That is enforced elsewhere,
* in the Vectorizer class. If NULL is passed in as a list entry, behavior is not defined.
* If in the future, NULL values are allowed in the IN list, be sure to handle 3-valued
* logic correctly. E.g. NOT (col IN (null)) should be considered UNKNOWN, so that would
* become FALSE in the WHERE clause, and cause the row in question to be filtered out.
* See the discussion in Jira HIVE-5583.
*/
VectorExpression expr = null;
// Validate the IN items are only constants.
for (ExprNodeDesc inListChild : childrenForInList) {
if (!(inListChild instanceof ExprNodeConstantDesc)) {
throw new HiveException("Vectorizing IN expression only supported for constant values");
}
}
// determine class
Class<?> cl;
// non-vectorized validates that explicitly during UDF init.
if (isIntFamily(colType)) {
cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
long[] inVals = new long[childrenForInList.size()];
for (int i = 0; i != inVals.length; i++) {
inVals[i] = getIntFamilyScalarAsLong((ExprNodeConstantDesc) childrenForInList.get(i));
}
expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
((ILongInExpr) expr).setInListValues(inVals);
} else if (isTimestampFamily(colType)) {
cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterTimestampColumnInList.class : TimestampColumnInList.class);
Timestamp[] inVals = new Timestamp[childrenForInList.size()];
for (int i = 0; i != inVals.length; i++) {
inVals[i] = getTimestampScalar(childrenForInList.get(i));
}
expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
((ITimestampInExpr) expr).setInListValues(inVals);
} else if (isStringFamily(colType)) {
cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStringColumnInList.class : StringColumnInList.class);
byte[][] inVals = new byte[childrenForInList.size()][];
for (int i = 0; i != inVals.length; i++) {
inVals[i] = getStringScalarAsByteArray((ExprNodeConstantDesc) childrenForInList.get(i));
}
expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
((IStringInExpr) expr).setInListValues(inVals);
} else if (isFloatFamily(colType)) {
cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDoubleColumnInList.class : DoubleColumnInList.class);
double[] inValsD = new double[childrenForInList.size()];
for (int i = 0; i != inValsD.length; i++) {
inValsD[i] = getNumericScalarAsDouble(childrenForInList.get(i));
}
expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
((IDoubleInExpr) expr).setInListValues(inValsD);
} else if (isDecimalFamily(colType)) {
final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr);
if (tryDecimal64) {
cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimal64ColumnInList.class : Decimal64ColumnInList.class);
final int scale = ((DecimalTypeInfo) colExpr.getTypeInfo()).getScale();
expr = createDecimal64VectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, /* isDecimal64ScaleEstablished */
true, /* decimal64ColumnScale */
scale, returnType, DataTypePhysicalVariation.NONE, /* dontRescaleArguments */
false, new GenericUDFIn());
if (expr != null) {
long[] inVals = new long[childrenForInList.size()];
for (int i = 0; i != inVals.length; i++) {
ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) childrenForInList.get(i);
HiveDecimal hiveDecimal = (HiveDecimal) constDesc.getValue();
final long decimal64Scalar = new HiveDecimalWritable(hiveDecimal).serialize64(scale);
inVals[i] = decimal64Scalar;
}
((ILongInExpr) expr).setInListValues(inVals);
}
}
if (expr == null) {
cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimalColumnInList.class : DecimalColumnInList.class);
expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()];
for (int i = 0; i != inValsD.length; i++) {
inValsD[i] = (HiveDecimal) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i));
}
((IDecimalInExpr) expr).setInListValues(inValsD);
}
} else if (isDateFamily(colType)) {
cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
long[] inVals = new long[childrenForInList.size()];
for (int i = 0; i != inVals.length; i++) {
inVals[i] = (Long) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i));
}
expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
((ILongInExpr) expr).setInListValues(inVals);
}
// execution to fall back to row mode.
return expr;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getCoalesceExpression.
private VectorExpression getCoalesceExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
int[] inputColumns = new int[childExpr.size()];
VectorExpression[] vectorChildren = getVectorExpressions(childExpr, VectorExpressionDescriptor.Mode.PROJECTION);
final int size = vectorChildren.length;
TypeInfo[] inputTypeInfos = new TypeInfo[size];
DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[size];
DataTypePhysicalVariation outputDataTypePhysicalVariation = DataTypePhysicalVariation.DECIMAL_64;
boolean fixConstants = false;
for (int i = 0; i < vectorChildren.length; ++i) {
VectorExpression ve = vectorChildren[i];
inputColumns[i] = ve.getOutputColumnNum();
inputTypeInfos[i] = ve.getOutputTypeInfo();
inputDataTypePhysicalVariations[i] = ve.getOutputDataTypePhysicalVariation();
if (inputDataTypePhysicalVariations[i] == DataTypePhysicalVariation.NONE || inputDataTypePhysicalVariations[i] == null) {
if (childExpr.get(i) instanceof ExprNodeConstantDesc && inputTypeInfos[i] instanceof DecimalTypeInfo && ((DecimalTypeInfo) inputTypeInfos[i]).precision() <= 18) {
fixConstants = true;
} else {
outputDataTypePhysicalVariation = DataTypePhysicalVariation.NONE;
}
}
}
if (outputDataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64 && fixConstants) {
for (int i = 0; i < vectorChildren.length; ++i) {
if ((inputDataTypePhysicalVariations[i] == DataTypePhysicalVariation.NONE || inputDataTypePhysicalVariations[i] == null) && vectorChildren[i] instanceof ConstantVectorExpression) {
ConstantVectorExpression cve = ((ConstantVectorExpression) vectorChildren[i]);
HiveDecimal hd = cve.getDecimalValue();
Long longValue = new HiveDecimalWritable(hd).serialize64(((DecimalTypeInfo) cve.getOutputTypeInfo()).getScale());
((ConstantVectorExpression) vectorChildren[i]).setLongValue(longValue);
vectorChildren[i].setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.DECIMAL_64);
int scratchColIndex = vectorChildren[i].getOutputColumnNum() - ocm.initialOutputCol;
ocm.scratchDataTypePhysicalVariations[scratchColIndex] = DataTypePhysicalVariation.DECIMAL_64;
}
}
}
final int outputColumnNum = ocm.allocateOutputColumn(returnType, outputDataTypePhysicalVariation);
VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outputColumnNum);
vectorCoalesce.setChildExpressions(vectorChildren);
vectorCoalesce.setInputTypeInfos(inputTypeInfos);
vectorCoalesce.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
vectorCoalesce.setOutputTypeInfo(returnType);
vectorCoalesce.setOutputDataTypePhysicalVariation(outputDataTypePhysicalVariation);
freeNonColumns(vectorChildren);
// Assume.
boolean isFilter = false;
if (mode == VectorExpressionDescriptor.Mode.FILTER) {
// Is output type a BOOLEAN?
if (returnType.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) returnType).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
isFilter = true;
} else {
return null;
}
}
if (isFilter) {
// Wrap the PROJECTION IF expression output with a filter.
SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(vectorCoalesce.getOutputColumnNum());
filterVectorExpr.setChildExpressions(new VectorExpression[] { vectorCoalesce });
filterVectorExpr.setInputTypeInfos(vectorCoalesce.getOutputTypeInfo());
filterVectorExpr.setInputDataTypePhysicalVariations(vectorCoalesce.getOutputDataTypePhysicalVariation());
return filterVectorExpr;
} else {
return vectorCoalesce;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getIdentityExpression.
/**
* Used as a fast path for operations that don't modify their input, like unary +
* and casting boolean to long. IdentityExpression and its children are always
* projections.
*/
private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList) throws HiveException {
if (childExprList.size() != 1) {
return null;
}
ExprNodeDesc childExpr = childExprList.get(0);
if (!(childExpr instanceof ExprNodeColumnDesc)) {
// child expression here instead of IdentityExpression.
return getVectorExpression(childExpr);
}
int identityCol;
TypeInfo identityTypeInfo;
DataTypePhysicalVariation identityDataTypePhysicalVariation;
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
identityCol = getInputColumnIndex(colDesc.getColumn());
identityTypeInfo = colDesc.getTypeInfo();
identityDataTypePhysicalVariation = getDataTypePhysicalVariation(identityCol);
VectorExpression ve = new IdentityExpression(identityCol);
ve.setInputTypeInfos(identityTypeInfo);
ve.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation);
ve.setOutputTypeInfo(identityTypeInfo);
ve.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation);
return ve;
}
Aggregations