use of org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase in project hive by apache.
the class VectorSMBMapJoinOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
VectorExpression.doTransientInit(bigTableFilterExpressions, hconf);
VectorExpression.doTransientInit(keyExpressions, hconf);
VectorExpression.doTransientInit(bigTableValueExpressions, hconf);
vrbCtx = new VectorizedRowBatchCtx();
vrbCtx.init((StructObjectInspector) this.outputObjInspector, vOutContext.getScratchColumnTypeNames());
outputBatch = vrbCtx.createVectorizedRowBatch();
keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
outputVectorAssignRowMap = new HashMap<ObjectInspector, VectorAssignRow>();
// This key evaluator translates from the vectorized VectorHashKeyWrapper format
// into the row-mode MapJoinKey
keyEvaluator = new SMBJoinKeyEvaluator() {
private List<Object> key;
public SMBJoinKeyEvaluator init() {
key = new ArrayList<Object>();
for (int i = 0; i < keyExpressions.length; ++i) {
key.add(null);
}
return this;
}
@Override
public List<Object> evaluate(VectorHashKeyWrapperBase kw) throws HiveException {
for (int i = 0; i < keyExpressions.length; ++i) {
key.set(i, keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]));
}
return key;
}
}.init();
Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
// We're hijacking the big table evaluators and replacing them with our own custom ones
// which are going to return values from the input batch vector expressions
List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
valueWriters = writers;
joinValuesObjectInspectors[posBigTable] = oids;
}
});
for (int i = 0; i < bigTableExpressions.size(); ++i) {
ExprNodeDesc desc = bigTableExpressions.get(i);
VectorExpression vectorExpr = bigTableValueExpressions[i];
// This is a vectorized aware evaluator
ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {
int columnIndex;
int writerIndex;
public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
this.columnIndex = columnIndex;
this.writerIndex = writerIndex;
return this;
}
@Override
public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
throw new HiveException("should never reach here");
}
@Override
protected Object _evaluate(Object row, int version) throws HiveException {
VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
}
}.initVectorExpr(vectorExpr.getOutputColumnNum(), i);
vectorNodeEvaluators.add(eval);
}
// Now replace the old evaluators with our own
joinValues[posBigTable] = vectorNodeEvaluators;
}
use of org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase in project hive by apache.
the class VectorTopNKeyOperator method process.
@Override
public void process(Object data, int tag) throws HiveException {
VectorizedRowBatch batch = (VectorizedRowBatch) data;
if (!disabledPartitions.isEmpty() && disabledPartitions.size() == topNKeyFilters.size()) {
// all filters are disabled due to efficiency check
vectorForward(batch);
return;
}
incomingBatches++;
// The selected vector represents selected rows.
// Clone the selected vector
System.arraycopy(batch.selected, 0, temporarySelected, 0, batch.size);
int[] selectedBackup = batch.selected;
batch.selected = temporarySelected;
int sizeBackup = batch.size;
boolean selectedInUseBackup = batch.selectedInUse;
for (VectorExpression keyExpression : vectorDesc.getKeyExpressions()) {
keyExpression.evaluate(batch);
}
partitionKeyWrapperBatch.evaluateBatch(batch);
VectorHashKeyWrapperBase[] partitionKeyWrappers = partitionKeyWrapperBatch.getVectorHashKeyWrappers();
keyWrappersBatch.evaluateBatch(batch);
VectorHashKeyWrapperBase[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers();
// Filter rows with top n keys
int size = 0;
int[] selected = new int[batch.selected.length];
for (int i = 0; i < batch.size; i++) {
int j;
if (batch.selectedInUse) {
j = batch.selected[i];
} else {
j = i;
}
VectorHashKeyWrapperBase partitionKey = partitionKeyWrappers[i];
if (disabledPartitions.contains(partitionKey)) {
// filter for this partition is disabled
selected[size++] = j;
} else {
TopNKeyFilter topNKeyFilter = topNKeyFilters.get(partitionKey);
if (topNKeyFilter == null && topNKeyFilters.size() < conf.getMaxNumberOfPartitions()) {
topNKeyFilter = new TopNKeyFilter(conf.getTopN(), keyWrapperComparator);
topNKeyFilters.put(partitionKey.copyKey(), topNKeyFilter);
}
if (topNKeyFilter == null || topNKeyFilter.canForward(keyWrappers[i])) {
selected[size++] = j;
}
}
}
// Apply selection to batch
if (batch.size != size) {
batch.selectedInUse = true;
batch.selected = selected;
batch.size = size;
}
// Forward the result
if (size > 0) {
vectorForward(batch);
}
// Restore the original selected vector
batch.selected = selectedBackup;
batch.size = sizeBackup;
batch.selectedInUse = selectedInUseBackup;
if (incomingBatches % conf.getCheckEfficiencyNumBatches() == 0) {
checkTopNFilterEfficiency(topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), LOG, conf.getCheckEfficiencyNumRows());
}
}
use of org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase in project hive by apache.
the class TestVectorHashKeyWrapperBatch method testVectorHashKeyWrapperBatch.
// Specific test for HIVE-18744 --
// Tests Timestamp assignment.
@Test
public void testVectorHashKeyWrapperBatch() throws HiveException {
VectorExpression[] keyExpressions = new VectorExpression[] { new IdentityExpression(0) };
TypeInfo[] typeInfos = new TypeInfo[] { TypeInfoFactory.timestampTypeInfo };
VectorHashKeyWrapperBatch vhkwb = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions, typeInfos);
VectorizedRowBatch batch = new VectorizedRowBatch(1);
batch.selectedInUse = false;
batch.size = 10;
TimestampColumnVector timestampColVector = new TimestampColumnVector(batch.DEFAULT_SIZE);
;
batch.cols[0] = timestampColVector;
timestampColVector.reset();
// Cause Timestamp object to be replaced (in buggy code) with ZERO_TIMESTAMP.
timestampColVector.noNulls = false;
timestampColVector.isNull[0] = true;
Timestamp scratch = new Timestamp(2039);
Timestamp ts0 = new Timestamp(2039);
scratch.setTime(ts0.getTime());
scratch.setNanos(ts0.getNanos());
timestampColVector.set(1, scratch);
Timestamp ts1 = new Timestamp(33222);
scratch.setTime(ts1.getTime());
scratch.setNanos(ts1.getNanos());
timestampColVector.set(2, scratch);
batch.size = 3;
vhkwb.evaluateBatch(batch);
VectorHashKeyWrapperBase[] vhkwArray = vhkwb.getVectorHashKeyWrappers();
VectorHashKeyWrapperBase vhk = vhkwArray[0];
assertTrue(vhk.isNull(0));
vhk = vhkwArray[1];
assertFalse(vhk.isNull(0));
assertEquals(vhk.getTimestamp(0), ts0);
vhk = vhkwArray[2];
assertFalse(vhk.isNull(0));
assertEquals(vhk.getTimestamp(0), ts1);
}
use of org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase in project hive by apache.
the class TestVectorHashKeyWrapperBatch method testVectorHashKeyWrapperGeneralCopyKey.
// Test for HIVE-24575
@Test
public void testVectorHashKeyWrapperGeneralCopyKey() throws HiveException {
VectorExpression[] keyExpressions = new VectorExpression[] { new IdentityExpression(0) };
TypeInfo[] typeInfos = new TypeInfo[] { TypeInfoFactory.stringTypeInfo };
VectorHashKeyWrapperBatch vhkwb = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions, typeInfos);
VectorizedRowBatch batch = new VectorizedRowBatch(1);
batch.selectedInUse = false;
BytesColumnVector bytesColumnVector = new BytesColumnVector();
bytesColumnVector.initBuffer(1024);
batch.cols[0] = bytesColumnVector;
byte[] contents = "education_reference".getBytes();
bytesColumnVector.setVal(0, "system_management".getBytes());
bytesColumnVector.setVal(1, "travel_transportation".getBytes());
bytesColumnVector.setVal(2, contents);
bytesColumnVector.setVal(3, "app_management".getBytes());
batch.size = 4;
vhkwb.evaluateBatch(batch);
VectorHashKeyWrapperBase[] vhkwArray = vhkwb.getVectorHashKeyWrappers();
VectorHashKeyWrapperGeneral hashKey2 = (VectorHashKeyWrapperGeneral) vhkwArray[2];
VectorHashKeyWrapperGeneral hashKey1 = (VectorHashKeyWrapperGeneral) vhkwArray[1];
assertTrue(StringExpr.equal(hashKey2.getBytes(0), hashKey2.getByteStart(0), hashKey2.getByteLength(0), contents, 0, contents.length));
assertFalse(StringExpr.equal(hashKey2.getBytes(0), hashKey2.getByteStart(0), hashKey2.getByteLength(0), hashKey1.getBytes(0), hashKey1.getByteStart(0), hashKey1.getByteLength(0)));
hashKey2.copyKey(hashKey1);
assertTrue(StringExpr.equal(hashKey2.getBytes(0), hashKey2.getByteStart(0), hashKey2.getByteLength(0), contents, 0, contents.length));
assertTrue(StringExpr.equal(hashKey2.getBytes(0), hashKey2.getByteStart(0), hashKey2.getByteLength(0), hashKey1.getBytes(0), hashKey1.getByteStart(0), hashKey1.getByteLength(0)));
}
Aggregations