use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinOperator method initializeOp.
@Override
public void initializeOp(Configuration hconf) throws HiveException {
VectorExpression.doTransientInit(bigTableFilterExpressions, hconf);
VectorExpression.doTransientInit(keyExpressions, hconf);
VectorExpression.doTransientInit(bigTableValueExpressions, hconf);
// Use a final variable to properly parameterize the processVectorInspector closure.
// Using a member variable in the closure will not do the right thing...
final int parameterizePosBigTable = conf.getPosBigTable();
// Code borrowed from VectorReduceSinkOperator.initializeOp
VectorExpressionWriterFactory.processVectorInspector((StructObjectInspector) inputObjInspectors[parameterizePosBigTable], new VectorExpressionWriterFactory.SingleOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, ObjectInspector objectInspector) {
rowWriters = writers;
inputObjInspectors[parameterizePosBigTable] = objectInspector;
}
});
singleRow = new Object[rowWriters.length];
super.initializeOp(hconf);
List<ExprNodeDesc> keyDesc = conf.getKeys().get(posBigTable);
keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
valueWriters = writers;
joinValuesObjectInspectors[posBigTable] = oids;
}
});
// We're hijacking the big table evaluators an replace them with our own custom ones
// which are going to return values from the input batch vector expressions
List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
for (int i = 0; i < bigTableExpressions.size(); ++i) {
ExprNodeDesc desc = bigTableExpressions.get(i);
VectorExpression vectorExpr = bigTableValueExpressions[i];
// This is a vectorized aware evaluator
ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {
int columnIndex;
int writerIndex;
public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
this.columnIndex = columnIndex;
this.writerIndex = writerIndex;
return this;
}
@Override
public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
throw new HiveException("should never reach here");
}
@Override
protected Object _evaluate(Object row, int version) throws HiveException {
VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
}
}.initVectorExpr(vectorExpr.getOutputColumnNum(), i);
vectorNodeEvaluators.add(eval);
}
// Now replace the old evaluators with our own
joinValues[posBigTable] = vectorNodeEvaluators;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorSMBMapJoinOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
VectorExpression.doTransientInit(bigTableFilterExpressions, hconf);
VectorExpression.doTransientInit(keyExpressions, hconf);
VectorExpression.doTransientInit(bigTableValueExpressions, hconf);
vrbCtx = new VectorizedRowBatchCtx();
vrbCtx.init((StructObjectInspector) this.outputObjInspector, vOutContext.getScratchColumnTypeNames());
outputBatch = vrbCtx.createVectorizedRowBatch();
keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
outputVectorAssignRowMap = new HashMap<ObjectInspector, VectorAssignRow>();
// This key evaluator translates from the vectorized VectorHashKeyWrapper format
// into the row-mode MapJoinKey
keyEvaluator = new SMBJoinKeyEvaluator() {
private List<Object> key;
public SMBJoinKeyEvaluator init() {
key = new ArrayList<Object>();
for (int i = 0; i < keyExpressions.length; ++i) {
key.add(null);
}
return this;
}
@Override
public List<Object> evaluate(VectorHashKeyWrapperBase kw) throws HiveException {
for (int i = 0; i < keyExpressions.length; ++i) {
key.set(i, keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]));
}
return key;
}
}.init();
Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
// We're hijacking the big table evaluators and replacing them with our own custom ones
// which are going to return values from the input batch vector expressions
List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
valueWriters = writers;
joinValuesObjectInspectors[posBigTable] = oids;
}
});
for (int i = 0; i < bigTableExpressions.size(); ++i) {
ExprNodeDesc desc = bigTableExpressions.get(i);
VectorExpression vectorExpr = bigTableValueExpressions[i];
// This is a vectorized aware evaluator
ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {
int columnIndex;
int writerIndex;
public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
this.columnIndex = columnIndex;
this.writerIndex = writerIndex;
return this;
}
@Override
public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
throw new HiveException("should never reach here");
}
@Override
protected Object _evaluate(Object row, int version) throws HiveException {
VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
}
}.initVectorExpr(vectorExpr.getOutputColumnNum(), i);
vectorNodeEvaluators.add(eval);
}
// Now replace the old evaluators with our own
joinValues[posBigTable] = vectorNodeEvaluators;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method wrapWithDecimal64ToDecimalConversion.
public VectorExpression wrapWithDecimal64ToDecimalConversion(VectorExpression inputExpression) throws HiveException {
VectorExpression wrapExpression = createDecimal64ToDecimalConversion(inputExpression.getOutputColumnNum(), inputExpression.getOutputTypeInfo());
if (inputExpression instanceof IdentityExpression) {
return wrapExpression;
}
// CONCERN: Leaking scratch column?
VectorExpression[] child = new VectorExpression[1];
child[0] = inputExpression;
wrapExpression.setChildExpressions(child);
return wrapExpression;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getStructInExpression.
private VectorExpression getStructInExpression(List<ExprNodeDesc> childExpr, ExprNodeDesc colExpr, TypeInfo colTypeInfo, List<ExprNodeDesc> inChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
VectorExpression expr;
StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo;
List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
final int fieldCount = fieldTypeInfos.size();
ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount];
InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount];
for (int f = 0; f < fieldCount; f++) {
TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
// Only primitive fields supports for now.
if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) {
return null;
}
// We are going to serialize using the 4 basic types.
ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo);
fieldVectorColumnTypes[f] = fieldVectorColumnType;
// We currently evaluate the IN (..) constants in special ways.
PrimitiveCategory fieldPrimitiveCategory = ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory);
fieldInConstantTypes[f] = inConstantType;
}
Output buffer = new Output();
BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(fieldCount);
final int inChildrenCount = inChildren.size();
byte[][] serializedInChildren = new byte[inChildrenCount][];
try {
for (int i = 0; i < inChildrenCount; i++) {
final ExprNodeDesc node = inChildren.get(i);
final Object[] constants;
if (node instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node;
ConstantObjectInspector output = constNode.getWritableObjectInspector();
constants = ((List<?>) output.getWritableConstantValue()).toArray();
} else {
ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node;
ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprNode);
ObjectInspector output = evaluator.initialize(exprNode.getWritableObjectInspector());
constants = (Object[]) evaluator.evaluate(null);
}
binarySortableSerializeWrite.set(buffer);
for (int f = 0; f < fieldCount; f++) {
Object constant = constants[f];
if (constant == null) {
binarySortableSerializeWrite.writeNull();
} else {
InConstantType inConstantType = fieldInConstantTypes[f];
switch(inConstantType) {
case STRING_FAMILY:
{
byte[] bytes;
if (constant instanceof Text) {
Text text = (Text) constant;
bytes = text.getBytes();
binarySortableSerializeWrite.writeString(bytes, 0, text.getLength());
} else {
throw new HiveException("Unexpected constant String type " + constant.getClass().getSimpleName());
}
}
break;
case INT_FAMILY:
{
long value;
if (constant instanceof IntWritable) {
value = ((IntWritable) constant).get();
} else if (constant instanceof LongWritable) {
value = ((LongWritable) constant).get();
} else {
throw new HiveException("Unexpected constant Long type " + constant.getClass().getSimpleName());
}
binarySortableSerializeWrite.writeLong(value);
}
break;
case FLOAT_FAMILY:
{
double value;
if (constant instanceof DoubleWritable) {
value = ((DoubleWritable) constant).get();
} else {
throw new HiveException("Unexpected constant Double type " + constant.getClass().getSimpleName());
}
binarySortableSerializeWrite.writeDouble(value);
}
break;
// UNDONE...
case DATE:
case TIMESTAMP:
case DECIMAL:
default:
throw new RuntimeException("Unexpected IN constant type " + inConstantType.name());
}
}
}
serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength());
}
} catch (Exception e) {
throw new HiveException(e);
}
// Create a single child representing the scratch column where we will
// generate the serialized keys of the batch.
int scratchBytesCol = ocm.allocateOutputColumn(TypeInfoFactory.stringTypeInfo);
Class<?> cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class);
expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
((IStringInExpr) expr).setInListValues(serializedInChildren);
((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol);
((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(), fieldVectorColumnTypes);
return expr;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method instantiateExpression.
public VectorExpression instantiateExpression(Class<?> vclass, TypeInfo returnTypeInfo, DataTypePhysicalVariation returnDataTypePhysicalVariation, Object... args) throws HiveException {
VectorExpression ve = null;
Constructor<?> ctor = getConstructor(vclass);
int numParams = ctor.getParameterTypes().length;
int argsLength = (args == null) ? 0 : args.length;
if (numParams == 0) {
try {
ve = (VectorExpression) ctor.newInstance();
} catch (Exception ex) {
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + getStackTraceAsSingleLine(ex));
}
} else if (numParams == argsLength) {
try {
ve = (VectorExpression) ctor.newInstance(args);
} catch (Exception ex) {
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + getStackTraceAsSingleLine(ex));
}
} else if (numParams == argsLength + 1) {
// Additional argument is needed, which is the outputcolumn.
Object[] newArgs = null;
try {
if (returnTypeInfo == null) {
throw new HiveException("Missing output type information");
}
String returnTypeName = returnTypeInfo.getTypeName();
// Special handling for decimal because decimal types need scale and precision parameter.
// This special handling should be avoided by using returnType uniformly for all cases.
final int outputColumnNum = ocm.allocateOutputColumn(returnTypeInfo, returnDataTypePhysicalVariation);
newArgs = Arrays.copyOf(Objects.requireNonNull(args), numParams);
newArgs[numParams - 1] = outputColumnNum;
ve = (VectorExpression) ctor.newInstance(newArgs);
/*
* Caller is responsible for setting children and input type information.
*/
ve.setOutputTypeInfo(returnTypeInfo);
ve.setOutputDataTypePhysicalVariation(returnDataTypePhysicalVariation);
} catch (Exception ex) {
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + getStackTraceAsSingleLine(ex));
}
}
// Add maxLength parameter to UDFs that have CHAR or VARCHAR output.
if (ve instanceof TruncStringOutput) {
TruncStringOutput truncStringOutput = (TruncStringOutput) ve;
if (returnTypeInfo instanceof BaseCharTypeInfo) {
BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) returnTypeInfo;
truncStringOutput.setMaxLength(baseCharTypeInfo.getLength());
}
}
return ve;
}
Aggregations