Search in sources :

Example 46 with ColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.

the class VectorMapJoinOuterGenerateResultOperator method generateOuterNulls.

/**
 * Generate the non matching outer join output results for one vectorized row batch.
 *
 * For each non matching row specified by parameter, generate nulls for the small table results.
 *
 * @param batch
 *          The big table batch with any matching and any non matching rows both as
 *          selected in use.
 * @param noMatchs
 *          A subset of the rows of the batch that are non matches.
 * @param noMatchSize
 *          Number of non matches in noMatchs.
 */
protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, int noMatchSize) throws IOException, HiveException {
    for (int i = 0; i < noMatchSize; i++) {
        int batchIndex = noMatchs[i];
        // key as null, too.
        for (int column : bigTableOuterKeyOutputVectorColumns) {
            ColumnVector colVector = batch.cols[column];
            colVector.noNulls = false;
            colVector.isNull[batchIndex] = true;
        }
        // Small table values are set to null.
        for (int column : smallTableOutputVectorColumns) {
            ColumnVector colVector = batch.cols[column];
            colVector.noNulls = false;
            colVector.isNull[batchIndex] = true;
        }
    }
}
Also used : ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector)

Example 47 with ColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.

the class VectorMapJoinOuterGenerateResultOperator method generateOuterNullsRepeatedAll.

/**
 * Generate the non-match outer join output results for the whole repeating vectorized
 * row batch.
 *
 * Each row will get nulls for all small table values.
 *
 * @param batch
 *          The big table batch.
 */
protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException {
    for (int column : smallTableOutputVectorColumns) {
        ColumnVector colVector = batch.cols[column];
        colVector.noNulls = false;
        colVector.isNull[0] = true;
        colVector.isRepeating = true;
    }
    // as null, too.
    for (int column : bigTableOuterKeyOutputVectorColumns) {
        ColumnVector colVector = batch.cols[column];
        colVector.noNulls = false;
        colVector.isNull[0] = true;
        colVector.isRepeating = true;
    }
}
Also used : ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector)

Example 48 with ColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.

the class VectorMapJoinOuterMultiKeyOperator method process.

// ---------------------------------------------------------------------------
// Process Multi-Key Outer Join on a vectorized row batch.
// 
@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
            commonSetup(batch);
            /*
         * Initialize Multi-Key members for this specialized class.
         */
            keyVectorSerializeWrite = new VectorSerializeRow(new BinarySortableSerializeWrite(bigTableKeyColumnMap.length));
            keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap);
            currentKeyOutput = new Output();
            saveKeyOutput = new Output();
            needCommonSetup = false;
        }
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
            /*
         * Get our Multi-Key hash map information for this specialized class.
         */
            hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable;
            needHashTableSetup = false;
        }
        batchCounter++;
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        // Do the per-batch setup for an outer join.
        outerPerBatchSetup(batch);
        // For outer join, remember our input rows before ON expression filtering or before
        // hash table matching so we can generate results for all rows (matching and non matching)
        // later.
        boolean inputSelectedInUse = batch.selectedInUse;
        if (inputSelectedInUse) {
            // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
            // throw new HiveException("batch.selected is not in sort order and unique");
            // }
            System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
        }
        // Filtering for outer join just removes rows available for hash table matching.
        boolean someRowsFilteredOut = false;
        if (bigTableFilterExpressions.length > 0) {
            // Since the input
            for (VectorExpression ve : bigTableFilterExpressions) {
                ve.evaluate(batch);
            }
            someRowsFilteredOut = (batch.size != inputLogicalSize);
            if (LOG.isDebugEnabled()) {
                if (batch.selectedInUse) {
                    if (inputSelectedInUse) {
                        LOG.debug(CLASS_NAME + " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
                    } else {
                        LOG.debug(CLASS_NAME + " inputLogicalSize " + inputLogicalSize + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
                    }
                }
            }
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        /*
       * Multi-Key specific declarations.
       */
        // None.
        /*
       * Multi-Key Long check for repeating.
       */
        // If all BigTable input columns to key expressions are isRepeating, then
        // calculate key once; lookup once.
        // Also determine if any nulls are present since for a join that means no match.
        boolean allKeyInputColumnsRepeating;
        // Only valid if allKeyInputColumnsRepeating is true.
        boolean someKeyInputColumnIsNull = false;
        if (bigTableKeyColumnMap.length == 0) {
            allKeyInputColumnsRepeating = false;
        } else {
            allKeyInputColumnsRepeating = true;
            for (int i = 0; i < bigTableKeyColumnMap.length; i++) {
                ColumnVector colVector = batch.cols[bigTableKeyColumnMap[i]];
                if (!colVector.isRepeating) {
                    allKeyInputColumnsRepeating = false;
                    break;
                }
                if (!colVector.noNulls && colVector.isNull[0]) {
                    someKeyInputColumnIsNull = true;
                }
            }
        }
        if (allKeyInputColumnsRepeating) {
            /*
         * Repeating.
         */
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
            /*
         * Multi-Key specific repeated lookup.
         */
            JoinUtil.JoinResult joinResult;
            if (batch.size == 0) {
                // Whole repeated key batch was filtered out.
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else if (someKeyInputColumnIsNull) {
                // Any (repeated) null key column is no match for whole batch.
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                // All key input columns are repeating.  Generate key once.  Lookup once.
                keyVectorSerializeWrite.setOutput(currentKeyOutput);
                keyVectorSerializeWrite.serializeWrite(batch, 0);
                byte[] keyBytes = currentKeyOutput.getData();
                int keyLength = currentKeyOutput.getLength();
                joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0]);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
            }
            finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            }
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashMapResultCount = 0;
            int allMatchCount = 0;
            int equalKeySeriesCount = 0;
            int spillCount = 0;
            boolean atLeastOneNonMatch = someRowsFilteredOut;
            /*
         * Multi-Key specific variables.
         */
            Output temp;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < batch.size; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
                /*
           * Multi-Key outer null detection.
           */
                // Generate binary sortable key for current row in vectorized row batch.
                keyVectorSerializeWrite.setOutput(currentKeyOutput);
                keyVectorSerializeWrite.serializeWrite(batch, batchIndex);
                if (keyVectorSerializeWrite.getHasAnyNulls()) {
                    // Have that the NULL does not interfere with the current equal key series, if there
                    // is one. We do not set saveJoinResult.
                    // 
                    // Let a current MATCH equal key series keep going, or
                    // Let a current SPILL equal key series keep going, or
                    // Let a current NOMATCH keep not matching.
                    atLeastOneNonMatch = true;
                // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
                } else {
                    if (!haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) {
                        if (haveSaveKey) {
                            // Move on with our counts.
                            switch(saveJoinResult) {
                                case MATCH:
                                    hashMapResultCount++;
                                    equalKeySeriesCount++;
                                    break;
                                case SPILL:
                                    hashMapResultCount++;
                                    break;
                                case NOMATCH:
                                    break;
                            }
                        }
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
                        /*
               * Multi-Key specific save key.
               */
                        temp = saveKeyOutput;
                        saveKeyOutput = currentKeyOutput;
                        currentKeyOutput = temp;
                        /*
               * Multi-Key specific lookup key.
               */
                        byte[] keyBytes = saveKeyOutput.getData();
                        int keyLength = saveKeyOutput.getLength();
                        saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]);
                        switch(saveJoinResult) {
                            case MATCH:
                                equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
                                equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
                                equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
                                equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
                                allMatchs[allMatchCount++] = batchIndex;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                                break;
                            case SPILL:
                                spills[spillCount] = batchIndex;
                                spillHashMapResultIndices[spillCount] = hashMapResultCount;
                                spillCount++;
                                break;
                            case NOMATCH:
                                atLeastOneNonMatch = true;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                                break;
                        }
                    } else {
                        switch(saveJoinResult) {
                            case MATCH:
                                equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
                                allMatchs[allMatchCount++] = batchIndex;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                                break;
                            case SPILL:
                                spills[spillCount] = batchIndex;
                                spillHashMapResultIndices[spillCount] = hashMapResultCount;
                                spillCount++;
                                break;
                            case NOMATCH:
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                                break;
                        }
                    }
                // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
                // throw new HiveException("allMatchs is not in sort order and unique");
                // }
                }
            }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        hashMapResultCount++;
                        equalKeySeriesCount++;
                        break;
                    case SPILL:
                        hashMapResultCount++;
                        break;
                    case NOMATCH:
                        break;
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + " atLeastOneNonMatch " + atLeastOneNonMatch + " inputSelectedInUse " + inputSelectedInUse + " inputLogicalSize " + inputLogicalSize + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
            }
            // We will generate results for all matching and non-matching rows.
            finishOuter(batch, allMatchCount, equalKeySeriesCount, atLeastOneNonMatch, inputSelectedInUse, inputLogicalSize, spillCount, hashMapResultCount);
        }
        if (batch.size > 0) {
            // Forward any remaining selected rows.
            forwardBigTableBatch(batch);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) VectorSerializeRow(org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 49 with ColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.

the class SparkReduceRecordHandler method init.

@Override
@SuppressWarnings("unchecked")
public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
    super.init(job, output, reporter);
    rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector keyObjectInspector;
    ReduceWork gWork = Utilities.getReduceWork(job);
    reducer = gWork.getReducer();
    vectorized = gWork.getVectorMode();
    // clear out any parents as reducer is the
    reducer.setParentOperators(null);
    // root
    isTagged = gWork.getNeedsTagging();
    try {
        keyTableDesc = gWork.getKeyDesc();
        inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
        keyObjectInspector = inputKeyDeserializer.getObjectInspector();
        valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
        if (vectorized) {
            final int maxTags = gWork.getTagToValueDesc().size();
            // CONSIDER: Cleaning up this code and eliminating the arrays.  Vectorization only handles
            // one operator tree.
            Preconditions.checkState(maxTags == 1);
            keyStructInspector = (StructObjectInspector) keyObjectInspector;
            firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
            buffer = new DataOutputBuffer();
        }
        for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
            // We should initialize the SerDe with the TypeInfo when available.
            valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
            inputValueDeserializer[tag] = ReflectionUtils.newInstance(valueTableDesc[tag].getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null, valueTableDesc[tag].getProperties(), null);
            valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector();
            ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
            if (vectorized) {
                /* vectorization only works with struct object inspectors */
                valueStructInspector = (StructObjectInspector) valueObjectInspector[tag];
                final int totalColumns = firstValueColumnOffset + valueStructInspector.getAllStructFieldRefs().size();
                rowObjectInspector[tag] = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspector);
                batch = gWork.getVectorizedRowBatchCtx().createVectorizedRowBatch();
                // Setup vectorized deserialization for the key and value.
                BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
                keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
                true, binarySortableSerDe.getSortOrders(), binarySortableSerDe.getNullMarkers(), binarySortableSerDe.getNotNullMarkers()));
                keyBinarySortableDeserializeToRow.init(0);
                final int valuesSize = valueStructInspector.getAllStructFieldRefs().size();
                if (valuesSize > 0) {
                    valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspector), /* useExternalBuffer */
                    true));
                    valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
                    // Create data buffers for value bytes column vectors.
                    for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
                        ColumnVector colVector = batch.cols[i];
                        if (colVector instanceof BytesColumnVector) {
                            BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
                            bytesColumnVector.initBuffer();
                        }
                    }
                }
            } else {
                ois.add(keyObjectInspector);
                ois.add(valueObjectInspector[tag]);
                // reducer.setGroupKeyObjectInspector(keyObjectInspector);
                rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    ExecMapperContext execContext = new ExecMapperContext(job);
    localWork = gWork.getMapRedLocalWork();
    execContext.setJc(jc);
    execContext.setLocalWork(localWork);
    reducer.passExecContext(execContext);
    reducer.setReporter(rp);
    OperatorUtils.setChildrenCollector(Arrays.<Operator<? extends OperatorDesc>>asList(reducer), output);
    // initialize reduce operator tree
    try {
        LOG.info(reducer.dump(0));
        reducer.initialize(jc, rowObjectInspector);
        if (localWork != null) {
            for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
                dummyOp.setExecContext(execContext);
                dummyOp.initialize(jc, null);
            }
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Reduce operator initialization failed", e);
        }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
Also used : ExecMapperContext(org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) ArrayList(java.util.ArrayList) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 50 with ColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.

the class VectorInBloomFilterColDynamicValue method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    if (!initialized) {
        initValue();
    }
    ColumnVector inputColVector = batch.cols[colNum];
    int[] sel = batch.selected;
    boolean[] nullPos = inputColVector.isNull;
    int n = batch.size;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    // In case the dynamic value resolves to a null value
    if (bloomFilter == null) {
        batch.size = 0;
    }
    if (inputColVector.noNulls) {
        if (inputColVector.isRepeating) {
            // All must be selected otherwise size would be zero. Repeating property will not change.
            if (!(bfCheck.checkValue(inputColVector, 0))) {
                // Entire batch is filtered out.
                batch.size = 0;
            }
        } else if (batch.selectedInUse) {
            int newSize = 0;
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (bfCheck.checkValue(inputColVector, i)) {
                    sel[newSize++] = i;
                }
            }
            batch.size = newSize;
        } else {
            int newSize = 0;
            for (int i = 0; i != n; i++) {
                if (bfCheck.checkValue(inputColVector, i)) {
                    sel[newSize++] = i;
                }
            }
            if (newSize < n) {
                batch.size = newSize;
                batch.selectedInUse = true;
            }
        }
    } else {
        if (inputColVector.isRepeating) {
            // All must be selected otherwise size would be zero. Repeating property will not change.
            if (!nullPos[0]) {
                if (!(bfCheck.checkValue(inputColVector, 0))) {
                    // Entire batch is filtered out.
                    batch.size = 0;
                }
            } else {
                batch.size = 0;
            }
        } else if (batch.selectedInUse) {
            int newSize = 0;
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (!nullPos[i]) {
                    if (bfCheck.checkValue(inputColVector, i)) {
                        sel[newSize++] = i;
                    }
                }
            }
            // Change the selected vector
            batch.size = newSize;
        } else {
            int newSize = 0;
            for (int i = 0; i != n; i++) {
                if (!nullPos[i]) {
                    if (bfCheck.checkValue(inputColVector, i)) {
                        sel[newSize++] = i;
                    }
                }
            }
            if (newSize < n) {
                batch.size = newSize;
                batch.selectedInUse = true;
            }
        }
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Aggregations

ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)72 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)41 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)30 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)20 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)19 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)14 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)11 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)6 IOException (java.io.IOException)5 ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)5 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)5 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)4 Timestamp (java.sql.Timestamp)3 ArrayList (java.util.ArrayList)3 MapColumnVector (org.apache.hadoop.hive.ql.exec.vector.MapColumnVector)3 BinarySortableSerDe (org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe)3 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)3 LazyBinaryDeserializeRead (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)3 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3