use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter in project hive by apache.
the class MapJoinKey method serializeVector.
/**
* Serializes row to output for vectorized path.
* @param byteStream Output to reuse. Can be null, in that case a new one would be created.
*/
public static Output serializeVector(Output byteStream, VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, boolean[] nulls, boolean[] sortableSortOrders, byte[] nullMarkers, byte[] notNullMarkers) throws HiveException, SerDeException {
Object[] fieldData = new Object[keyOutputWriters.length];
List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>();
for (int i = 0; i < keyOutputWriters.length; ++i) {
VectorExpressionWriter writer = keyOutputWriters[i];
fieldOis.add(writer.getObjectInspector());
// This is rather convoluted... to simplify for perf, we could call getRawKeyValue
// instead of writable, and serialize based on Java type as opposed to OI.
fieldData[i] = keyWrapperBatch.getWritableKeyValue(kw, i, writer);
if (nulls != null) {
nulls[i] = (fieldData[i] == null);
}
}
return serializeRow(byteStream, fieldData, fieldOis, sortableSortOrders, nullMarkers, notNullMarkers);
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter in project hive by apache.
the class VectorMapJoinOperator method initializeOp.
@Override
public void initializeOp(Configuration hconf) throws HiveException {
// Use a final variable to properly parameterize the processVectorInspector closure.
// Using a member variable in the closure will not do the right thing...
final int parameterizePosBigTable = conf.getPosBigTable();
// Code borrowed from VectorReduceSinkOperator.initializeOp
VectorExpressionWriterFactory.processVectorInspector((StructObjectInspector) inputObjInspectors[parameterizePosBigTable], new VectorExpressionWriterFactory.SingleOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, ObjectInspector objectInspector) {
rowWriters = writers;
inputObjInspectors[parameterizePosBigTable] = objectInspector;
}
});
singleRow = new Object[rowWriters.length];
super.initializeOp(hconf);
List<ExprNodeDesc> keyDesc = conf.getKeys().get(posBigTable);
keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
valueWriters = writers;
joinValuesObjectInspectors[posBigTable] = oids;
}
});
// We're hijacking the big table evaluators an replace them with our own custom ones
// which are going to return values from the input batch vector expressions
List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
for (int i = 0; i < bigTableExpressions.size(); ++i) {
ExprNodeDesc desc = bigTableExpressions.get(i);
VectorExpression vectorExpr = bigTableValueExpressions[i];
// This is a vectorized aware evaluator
ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {
int columnIndex;
int writerIndex;
public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
this.columnIndex = columnIndex;
this.writerIndex = writerIndex;
return this;
}
@Override
public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
throw new HiveException("should never reach here");
}
@Override
protected Object _evaluate(Object row, int version) throws HiveException {
VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
}
}.initVectorExpr(vectorExpr.getOutputColumn(), i);
vectorNodeEvaluators.add(eval);
}
// Now replace the old evaluators with our own
joinValues[posBigTable] = vectorNodeEvaluators;
// Filtering is handled in the input batch processing
if (filterMaps != null) {
filterMaps[posBigTable] = null;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter in project hive by apache.
the class VectorSMBMapJoinOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
vrbCtx = new VectorizedRowBatchCtx();
vrbCtx.init((StructObjectInspector) this.outputObjInspector, vOutContext.getScratchColumnTypeNames());
outputBatch = vrbCtx.createVectorizedRowBatch();
keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
outputVectorAssignRowMap = new HashMap<ObjectInspector, VectorAssignRow>();
// This key evaluator translates from the vectorized VectorHashKeyWrapper format
// into the row-mode MapJoinKey
keyEvaluator = new SMBJoinKeyEvaluator() {
private List<Object> key;
public SMBJoinKeyEvaluator init() {
key = new ArrayList<Object>();
for (int i = 0; i < keyExpressions.length; ++i) {
key.add(null);
}
return this;
}
@Override
public List<Object> evaluate(VectorHashKeyWrapper kw) throws HiveException {
for (int i = 0; i < keyExpressions.length; ++i) {
key.set(i, keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]));
}
return key;
}
;
}.init();
Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
// We're hijacking the big table evaluators and replacing them with our own custom ones
// which are going to return values from the input batch vector expressions
List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
valueWriters = writers;
joinValuesObjectInspectors[posBigTable] = oids;
}
});
for (int i = 0; i < bigTableExpressions.size(); ++i) {
ExprNodeDesc desc = bigTableExpressions.get(i);
VectorExpression vectorExpr = bigTableValueExpressions[i];
// This is a vectorized aware evaluator
ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {
int columnIndex;
;
int writerIndex;
public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
this.columnIndex = columnIndex;
this.writerIndex = writerIndex;
return this;
}
@Override
public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
throw new HiveException("should never reach here");
}
@Override
protected Object _evaluate(Object row, int version) throws HiveException {
VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
}
}.initVectorExpr(vectorExpr.getOutputColumn(), i);
vectorNodeEvaluators.add(eval);
}
// Now replace the old evaluators with our own
joinValues[posBigTable] = vectorNodeEvaluators;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter in project hive by apache.
the class VectorSelectOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
// Just forward the row as is
if (conf.isSelStarNoCompute()) {
return;
}
List<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>();
List<ExprNodeDesc> colList = conf.getColList();
valueWriters = VectorExpressionWriterFactory.getExpressionWriters(colList);
for (VectorExpressionWriter vew : valueWriters) {
objectInspectors.add(vew.getObjectInspector());
}
List<String> outputFieldNames = conf.getOutputColumnNames();
outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(outputFieldNames, objectInspectors);
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter in project hive by apache.
the class SparkReduceRecordHandler method init.
@Override
@SuppressWarnings("unchecked")
public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
super.init(job, output, reporter);
rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector keyObjectInspector;
ReduceWork gWork = Utilities.getReduceWork(job);
reducer = gWork.getReducer();
vectorized = gWork.getVectorMode();
// clear out any parents as reducer is the
reducer.setParentOperators(null);
// root
isTagged = gWork.getNeedsTagging();
try {
keyTableDesc = gWork.getKeyDesc();
inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
if (vectorized) {
final int maxTags = gWork.getTagToValueDesc().size();
keyStructInspector = (StructObjectInspector) keyObjectInspector;
batches = new VectorizedRowBatch[maxTags];
valueStructInspectors = new StructObjectInspector[maxTags];
valueStringWriters = new List[maxTags];
keysColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
buffer = new DataOutputBuffer();
}
for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
// We should initialize the SerDe with the TypeInfo when available.
valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
inputValueDeserializer[tag] = ReflectionUtils.newInstance(valueTableDesc[tag].getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null, valueTableDesc[tag].getProperties(), null);
valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspectors[tag] = (StructObjectInspector) valueObjectInspector[tag];
final int totalColumns = keysColumnOffset + valueStructInspectors[tag].getAllStructFieldRefs().size();
valueStringWriters[tag] = new ArrayList<VectorExpressionWriter>(totalColumns);
valueStringWriters[tag].addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(keyStructInspector)));
valueStringWriters[tag].addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(valueStructInspectors[tag])));
rowObjectInspector[tag] = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors[tag]);
batches[tag] = gWork.getVectorizedRowBatchCtx().createVectorizedRowBatch();
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector[tag]);
//reducer.setGroupKeyObjectInspector(keyObjectInspector);
rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
ExecMapperContext execContext = new ExecMapperContext(job);
localWork = gWork.getMapRedLocalWork();
execContext.setJc(jc);
execContext.setLocalWork(localWork);
reducer.passExecContext(execContext);
reducer.setReporter(rp);
OperatorUtils.setChildrenCollector(Arrays.<Operator<? extends OperatorDesc>>asList(reducer), output);
// initialize reduce operator tree
try {
LOG.info(reducer.dump(0));
reducer.initialize(jc, rowObjectInspector);
if (localWork != null) {
for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
dummyOp.setExecContext(execContext);
dummyOp.initialize(jc, null);
}
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
Aggregations