use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class VectorMapJoinOperator method initializeOp.
@Override
public void initializeOp(Configuration hconf) throws HiveException {
// Use a final variable to properly parameterize the processVectorInspector closure.
// Using a member variable in the closure will not do the right thing...
final int parameterizePosBigTable = conf.getPosBigTable();
// Code borrowed from VectorReduceSinkOperator.initializeOp
VectorExpressionWriterFactory.processVectorInspector((StructObjectInspector) inputObjInspectors[parameterizePosBigTable], new VectorExpressionWriterFactory.SingleOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, ObjectInspector objectInspector) {
rowWriters = writers;
inputObjInspectors[parameterizePosBigTable] = objectInspector;
}
});
singleRow = new Object[rowWriters.length];
super.initializeOp(hconf);
List<ExprNodeDesc> keyDesc = conf.getKeys().get(posBigTable);
keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
valueWriters = writers;
joinValuesObjectInspectors[posBigTable] = oids;
}
});
// We're hijacking the big table evaluators an replace them with our own custom ones
// which are going to return values from the input batch vector expressions
List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
for (int i = 0; i < bigTableExpressions.size(); ++i) {
ExprNodeDesc desc = bigTableExpressions.get(i);
VectorExpression vectorExpr = bigTableValueExpressions[i];
// This is a vectorized aware evaluator
ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {
int columnIndex;
int writerIndex;
public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
this.columnIndex = columnIndex;
this.writerIndex = writerIndex;
return this;
}
@Override
public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
throw new HiveException("should never reach here");
}
@Override
protected Object _evaluate(Object row, int version) throws HiveException {
VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
}
}.initVectorExpr(vectorExpr.getOutputColumn(), i);
vectorNodeEvaluators.add(eval);
}
// Now replace the old evaluators with our own
joinValues[posBigTable] = vectorNodeEvaluators;
// Filtering is handled in the input batch processing
if (filterMaps != null) {
filterMaps[posBigTable] = null;
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class VectorSMBMapJoinOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
vrbCtx = new VectorizedRowBatchCtx();
vrbCtx.init((StructObjectInspector) this.outputObjInspector, vOutContext.getScratchColumnTypeNames());
outputBatch = vrbCtx.createVectorizedRowBatch();
keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
outputVectorAssignRowMap = new HashMap<ObjectInspector, VectorAssignRow>();
// This key evaluator translates from the vectorized VectorHashKeyWrapper format
// into the row-mode MapJoinKey
keyEvaluator = new SMBJoinKeyEvaluator() {
private List<Object> key;
public SMBJoinKeyEvaluator init() {
key = new ArrayList<Object>();
for (int i = 0; i < keyExpressions.length; ++i) {
key.add(null);
}
return this;
}
@Override
public List<Object> evaluate(VectorHashKeyWrapper kw) throws HiveException {
for (int i = 0; i < keyExpressions.length; ++i) {
key.set(i, keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]));
}
return key;
}
;
}.init();
Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
// We're hijacking the big table evaluators and replacing them with our own custom ones
// which are going to return values from the input batch vector expressions
List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
valueWriters = writers;
joinValuesObjectInspectors[posBigTable] = oids;
}
});
for (int i = 0; i < bigTableExpressions.size(); ++i) {
ExprNodeDesc desc = bigTableExpressions.get(i);
VectorExpression vectorExpr = bigTableValueExpressions[i];
// This is a vectorized aware evaluator
ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {
int columnIndex;
;
int writerIndex;
public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
this.columnIndex = columnIndex;
this.writerIndex = writerIndex;
return this;
}
@Override
public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
throw new HiveException("should never reach here");
}
@Override
protected Object _evaluate(Object row, int version) throws HiveException {
VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
}
}.initVectorExpr(vectorExpr.getOutputColumn(), i);
vectorNodeEvaluators.add(eval);
}
// Now replace the old evaluators with our own
joinValues[posBigTable] = vectorNodeEvaluators;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class VectorSelectOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
// Just forward the row as is
if (conf.isSelStarNoCompute()) {
return;
}
List<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>();
List<ExprNodeDesc> colList = conf.getColList();
valueWriters = VectorExpressionWriterFactory.getExpressionWriters(colList);
for (VectorExpressionWriter vew : valueWriters) {
objectInspectors.add(vew.getObjectInspector());
}
List<String> outputFieldNames = conf.getOutputColumnNames();
outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(outputFieldNames, objectInspectors);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class VectorizationContext method getAggregatorExpression.
public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc) throws HiveException {
ArrayList<ExprNodeDesc> paramDescList = desc.getParameters();
VectorExpression[] vectorParams = new VectorExpression[paramDescList.size()];
for (int i = 0; i < paramDescList.size(); ++i) {
ExprNodeDesc exprDesc = paramDescList.get(i);
vectorParams[i] = this.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
}
String aggregateName = desc.getGenericUDAFName();
VectorExpressionDescriptor.ArgumentType inputType = VectorExpressionDescriptor.ArgumentType.NONE;
if (paramDescList.size() > 0) {
ExprNodeDesc inputExpr = paramDescList.get(0);
inputType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(inputExpr.getTypeString());
if (inputType == VectorExpressionDescriptor.ArgumentType.NONE) {
throw new HiveException("No vector argument type for Hive type name " + inputExpr.getTypeString());
}
}
GenericUDAFEvaluator.Mode udafEvaluatorMode = desc.getMode();
for (AggregateDefinition aggDef : aggregatesDefinition) {
if (aggregateName.equalsIgnoreCase(aggDef.getName()) && ((aggDef.getType() == VectorExpressionDescriptor.ArgumentType.NONE && inputType == VectorExpressionDescriptor.ArgumentType.NONE) || (aggDef.getType().isSameTypeOrFamily(inputType)))) {
// A null means all modes are ok.
GenericUDAFEvaluator.Mode aggDefUdafEvaluatorMode = aggDef.getUdafEvaluatorMode();
if (aggDefUdafEvaluatorMode != null && aggDefUdafEvaluatorMode != udafEvaluatorMode) {
continue;
}
Class<? extends VectorAggregateExpression> aggClass = aggDef.getAggClass();
try {
Constructor<? extends VectorAggregateExpression> ctor = aggClass.getConstructor(VectorExpression.class);
VectorAggregateExpression aggExpr = ctor.newInstance(vectorParams.length > 0 ? vectorParams[0] : null);
aggExpr.init(desc);
return aggExpr;
} catch (Exception e) {
throw new HiveException("Internal exception for vector aggregate : \"" + aggregateName + "\" for type: \"" + inputType + "", e);
}
}
}
throw new HiveException("Vector aggregate not implemented: \"" + aggregateName + "\" for type: \"" + inputType.name() + " (UDAF evaluator mode = " + (udafEvaluatorMode == null ? "NULL" : udafEvaluatorMode.name()) + ")");
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class VectorizationContext method getStructInExpression.
private VectorExpression getStructInExpression(List<ExprNodeDesc> childExpr, ExprNodeDesc colExpr, TypeInfo colTypeInfo, List<ExprNodeDesc> inChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
VectorExpression expr = null;
StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo;
ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
final int fieldCount = fieldTypeInfos.size();
ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount];
InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount];
for (int f = 0; f < fieldCount; f++) {
TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
// Only primitive fields supports for now.
if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) {
return null;
}
// We are going to serialize using the 4 basic types.
ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo);
fieldVectorColumnTypes[f] = fieldVectorColumnType;
// We currently evaluate the IN (..) constants in special ways.
PrimitiveCategory fieldPrimitiveCategory = ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory);
fieldInConstantTypes[f] = inConstantType;
}
Output buffer = new Output();
BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(fieldCount);
final int inChildrenCount = inChildren.size();
byte[][] serializedInChildren = new byte[inChildrenCount][];
try {
for (int i = 0; i < inChildrenCount; i++) {
final ExprNodeDesc node = inChildren.get(i);
final Object[] constants;
if (node instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node;
ConstantObjectInspector output = constNode.getWritableObjectInspector();
constants = ((List<?>) output.getWritableConstantValue()).toArray();
} else {
ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node;
ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprNode);
ObjectInspector output = evaluator.initialize(exprNode.getWritableObjectInspector());
constants = (Object[]) evaluator.evaluate(null);
}
binarySortableSerializeWrite.set(buffer);
for (int f = 0; f < fieldCount; f++) {
Object constant = constants[f];
if (constant == null) {
binarySortableSerializeWrite.writeNull();
} else {
InConstantType inConstantType = fieldInConstantTypes[f];
switch(inConstantType) {
case STRING_FAMILY:
{
byte[] bytes;
if (constant instanceof Text) {
Text text = (Text) constant;
bytes = text.getBytes();
binarySortableSerializeWrite.writeString(bytes, 0, text.getLength());
} else {
throw new HiveException("Unexpected constant String type " + constant.getClass().getSimpleName());
}
}
break;
case INT_FAMILY:
{
long value;
if (constant instanceof IntWritable) {
value = ((IntWritable) constant).get();
} else if (constant instanceof LongWritable) {
value = ((LongWritable) constant).get();
} else {
throw new HiveException("Unexpected constant Long type " + constant.getClass().getSimpleName());
}
binarySortableSerializeWrite.writeLong(value);
}
break;
case FLOAT_FAMILY:
{
double value;
if (constant instanceof DoubleWritable) {
value = ((DoubleWritable) constant).get();
} else {
throw new HiveException("Unexpected constant Double type " + constant.getClass().getSimpleName());
}
binarySortableSerializeWrite.writeDouble(value);
}
break;
// UNDONE...
case DATE:
case TIMESTAMP:
case DECIMAL:
default:
throw new RuntimeException("Unexpected IN constant type " + inConstantType.name());
}
}
}
serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength());
}
} catch (Exception e) {
throw new HiveException(e);
}
// Create a single child representing the scratch column where we will
// generate the serialized keys of the batch.
int scratchBytesCol = ocm.allocateOutputColumn(TypeInfoFactory.stringTypeInfo);
Class<?> cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class);
expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
((IStringInExpr) expr).setInListValues(serializedInChildren);
((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol);
((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(), fieldVectorColumnTypes);
return expr;
}
Aggregations