Search in sources :

Example 1 with VectorHashKeyWrapperBase

use of org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase in project hive by apache.

the class VectorSMBMapJoinOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    VectorExpression.doTransientInit(bigTableFilterExpressions, hconf);
    VectorExpression.doTransientInit(keyExpressions, hconf);
    VectorExpression.doTransientInit(bigTableValueExpressions, hconf);
    vrbCtx = new VectorizedRowBatchCtx();
    vrbCtx.init((StructObjectInspector) this.outputObjInspector, vOutContext.getScratchColumnTypeNames());
    outputBatch = vrbCtx.createVectorizedRowBatch();
    keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
    outputVectorAssignRowMap = new HashMap<ObjectInspector, VectorAssignRow>();
    // This key evaluator translates from the vectorized VectorHashKeyWrapper format
    // into the row-mode MapJoinKey
    keyEvaluator = new SMBJoinKeyEvaluator() {

        private List<Object> key;

        public SMBJoinKeyEvaluator init() {
            key = new ArrayList<Object>();
            for (int i = 0; i < keyExpressions.length; ++i) {
                key.add(null);
            }
            return this;
        }

        @Override
        public List<Object> evaluate(VectorHashKeyWrapperBase kw) throws HiveException {
            for (int i = 0; i < keyExpressions.length; ++i) {
                key.set(i, keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]));
            }
            return key;
        }
    }.init();
    Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
    List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
    // We're hijacking the big table evaluators and replacing them with our own custom ones
    // which are going to return values from the input batch vector expressions
    List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
    VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {

        @Override
        public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
            valueWriters = writers;
            joinValuesObjectInspectors[posBigTable] = oids;
        }
    });
    for (int i = 0; i < bigTableExpressions.size(); ++i) {
        ExprNodeDesc desc = bigTableExpressions.get(i);
        VectorExpression vectorExpr = bigTableValueExpressions[i];
        // This is a vectorized aware evaluator
        ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {

            int columnIndex;

            int writerIndex;

            public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
                this.columnIndex = columnIndex;
                this.writerIndex = writerIndex;
                return this;
            }

            @Override
            public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
                throw new HiveException("should never reach here");
            }

            @Override
            protected Object _evaluate(Object row, int version) throws HiveException {
                VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
                int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
                return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
            }
        }.initVectorExpr(vectorExpr.getOutputColumnNum(), i);
        vectorNodeEvaluators.add(eval);
    }
    // Now replace the old evaluators with our own
    joinValues[posBigTable] = vectorNodeEvaluators;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) VectorHashKeyWrapperBase(org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) VectorExpressionWriterFactory(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 2 with VectorHashKeyWrapperBase

use of org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase in project hive by apache.

the class VectorTopNKeyOperator method process.

@Override
public void process(Object data, int tag) throws HiveException {
    VectorizedRowBatch batch = (VectorizedRowBatch) data;
    if (!disabledPartitions.isEmpty() && disabledPartitions.size() == topNKeyFilters.size()) {
        // all filters are disabled due to efficiency check
        vectorForward(batch);
        return;
    }
    incomingBatches++;
    // The selected vector represents selected rows.
    // Clone the selected vector
    System.arraycopy(batch.selected, 0, temporarySelected, 0, batch.size);
    int[] selectedBackup = batch.selected;
    batch.selected = temporarySelected;
    int sizeBackup = batch.size;
    boolean selectedInUseBackup = batch.selectedInUse;
    for (VectorExpression keyExpression : vectorDesc.getKeyExpressions()) {
        keyExpression.evaluate(batch);
    }
    partitionKeyWrapperBatch.evaluateBatch(batch);
    VectorHashKeyWrapperBase[] partitionKeyWrappers = partitionKeyWrapperBatch.getVectorHashKeyWrappers();
    keyWrappersBatch.evaluateBatch(batch);
    VectorHashKeyWrapperBase[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers();
    // Filter rows with top n keys
    int size = 0;
    int[] selected = new int[batch.selected.length];
    for (int i = 0; i < batch.size; i++) {
        int j;
        if (batch.selectedInUse) {
            j = batch.selected[i];
        } else {
            j = i;
        }
        VectorHashKeyWrapperBase partitionKey = partitionKeyWrappers[i];
        if (disabledPartitions.contains(partitionKey)) {
            // filter for this partition is disabled
            selected[size++] = j;
        } else {
            TopNKeyFilter topNKeyFilter = topNKeyFilters.get(partitionKey);
            if (topNKeyFilter == null && topNKeyFilters.size() < conf.getMaxNumberOfPartitions()) {
                topNKeyFilter = new TopNKeyFilter(conf.getTopN(), keyWrapperComparator);
                topNKeyFilters.put(partitionKey.copyKey(), topNKeyFilter);
            }
            if (topNKeyFilter == null || topNKeyFilter.canForward(keyWrappers[i])) {
                selected[size++] = j;
            }
        }
    }
    // Apply selection to batch
    if (batch.size != size) {
        batch.selectedInUse = true;
        batch.selected = selected;
        batch.size = size;
    }
    // Forward the result
    if (size > 0) {
        vectorForward(batch);
    }
    // Restore the original selected vector
    batch.selected = selectedBackup;
    batch.size = sizeBackup;
    batch.selectedInUse = selectedInUseBackup;
    if (incomingBatches % conf.getCheckEfficiencyNumBatches() == 0) {
        checkTopNFilterEfficiency(topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), LOG, conf.getCheckEfficiencyNumRows());
    }
}
Also used : VectorHashKeyWrapperBase(org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase) TopNKeyFilter(org.apache.hadoop.hive.ql.exec.TopNKeyFilter) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 3 with VectorHashKeyWrapperBase

use of org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase in project hive by apache.

the class TestVectorHashKeyWrapperBatch method testVectorHashKeyWrapperBatch.

// Specific test for HIVE-18744 --
// Tests Timestamp assignment.
@Test
public void testVectorHashKeyWrapperBatch() throws HiveException {
    VectorExpression[] keyExpressions = new VectorExpression[] { new IdentityExpression(0) };
    TypeInfo[] typeInfos = new TypeInfo[] { TypeInfoFactory.timestampTypeInfo };
    VectorHashKeyWrapperBatch vhkwb = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions, typeInfos);
    VectorizedRowBatch batch = new VectorizedRowBatch(1);
    batch.selectedInUse = false;
    batch.size = 10;
    TimestampColumnVector timestampColVector = new TimestampColumnVector(batch.DEFAULT_SIZE);
    ;
    batch.cols[0] = timestampColVector;
    timestampColVector.reset();
    // Cause Timestamp object to be replaced (in buggy code) with ZERO_TIMESTAMP.
    timestampColVector.noNulls = false;
    timestampColVector.isNull[0] = true;
    Timestamp scratch = new Timestamp(2039);
    Timestamp ts0 = new Timestamp(2039);
    scratch.setTime(ts0.getTime());
    scratch.setNanos(ts0.getNanos());
    timestampColVector.set(1, scratch);
    Timestamp ts1 = new Timestamp(33222);
    scratch.setTime(ts1.getTime());
    scratch.setNanos(ts1.getNanos());
    timestampColVector.set(2, scratch);
    batch.size = 3;
    vhkwb.evaluateBatch(batch);
    VectorHashKeyWrapperBase[] vhkwArray = vhkwb.getVectorHashKeyWrappers();
    VectorHashKeyWrapperBase vhk = vhkwArray[0];
    assertTrue(vhk.isNull(0));
    vhk = vhkwArray[1];
    assertFalse(vhk.isNull(0));
    assertEquals(vhk.getTimestamp(0), ts0);
    vhk = vhkwArray[2];
    assertFalse(vhk.isNull(0));
    assertEquals(vhk.getTimestamp(0), ts1);
}
Also used : VectorHashKeyWrapperBase(org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) IdentityExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VectorHashKeyWrapperBatch(org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch) Timestamp(java.sql.Timestamp) Test(org.junit.Test)

Example 4 with VectorHashKeyWrapperBase

use of org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase in project hive by apache.

the class TestVectorHashKeyWrapperBatch method testVectorHashKeyWrapperGeneralCopyKey.

// Test for HIVE-24575
@Test
public void testVectorHashKeyWrapperGeneralCopyKey() throws HiveException {
    VectorExpression[] keyExpressions = new VectorExpression[] { new IdentityExpression(0) };
    TypeInfo[] typeInfos = new TypeInfo[] { TypeInfoFactory.stringTypeInfo };
    VectorHashKeyWrapperBatch vhkwb = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions, typeInfos);
    VectorizedRowBatch batch = new VectorizedRowBatch(1);
    batch.selectedInUse = false;
    BytesColumnVector bytesColumnVector = new BytesColumnVector();
    bytesColumnVector.initBuffer(1024);
    batch.cols[0] = bytesColumnVector;
    byte[] contents = "education_reference".getBytes();
    bytesColumnVector.setVal(0, "system_management".getBytes());
    bytesColumnVector.setVal(1, "travel_transportation".getBytes());
    bytesColumnVector.setVal(2, contents);
    bytesColumnVector.setVal(3, "app_management".getBytes());
    batch.size = 4;
    vhkwb.evaluateBatch(batch);
    VectorHashKeyWrapperBase[] vhkwArray = vhkwb.getVectorHashKeyWrappers();
    VectorHashKeyWrapperGeneral hashKey2 = (VectorHashKeyWrapperGeneral) vhkwArray[2];
    VectorHashKeyWrapperGeneral hashKey1 = (VectorHashKeyWrapperGeneral) vhkwArray[1];
    assertTrue(StringExpr.equal(hashKey2.getBytes(0), hashKey2.getByteStart(0), hashKey2.getByteLength(0), contents, 0, contents.length));
    assertFalse(StringExpr.equal(hashKey2.getBytes(0), hashKey2.getByteStart(0), hashKey2.getByteLength(0), hashKey1.getBytes(0), hashKey1.getByteStart(0), hashKey1.getByteLength(0)));
    hashKey2.copyKey(hashKey1);
    assertTrue(StringExpr.equal(hashKey2.getBytes(0), hashKey2.getByteStart(0), hashKey2.getByteLength(0), contents, 0, contents.length));
    assertTrue(StringExpr.equal(hashKey2.getBytes(0), hashKey2.getByteStart(0), hashKey2.getByteLength(0), hashKey1.getBytes(0), hashKey1.getByteStart(0), hashKey1.getByteLength(0)));
}
Also used : VectorHashKeyWrapperBase(org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase) VectorHashKeyWrapperGeneral(org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperGeneral) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) IdentityExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VectorHashKeyWrapperBatch(org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch) Test(org.junit.Test)

Aggregations

VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 VectorHashKeyWrapperBase (org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase)4 IdentityExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression)2 VectorHashKeyWrapperBatch (org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch)2 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)2 Test (org.junit.Test)2 Timestamp (java.sql.Timestamp)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 ExprNodeEvaluator (org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator)1 TopNKeyFilter (org.apache.hadoop.hive.ql.exec.TopNKeyFilter)1 VectorExpressionWriter (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter)1 VectorExpressionWriterFactory (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory)1 VectorHashKeyWrapperGeneral (org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperGeneral)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1