Search in sources :

Example 66 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class VectorUDFTimestampFieldTimestamp method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    Preconditions.checkState(((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory() == PrimitiveCategory.TIMESTAMP);
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
    ColumnVector inputColVec = batch.cols[this.colNum];
    /* every line below this is identical for evaluateLong & evaluateString */
    final int n = inputColVec.isRepeating ? 1 : batch.size;
    int[] sel = batch.selected;
    final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse;
    if (batch.size == 0) {
        /* n != batch.size when isRepeating */
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outV.isRepeating = false;
    TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVec;
    if (inputColVec.isRepeating) {
        if (inputColVec.noNulls || !inputColVec.isNull[0]) {
            outV.isNull[0] = false;
            outV.vector[0] = getTimestampField(timestampColVector, 0);
        } else {
            outV.isNull[0] = true;
            outV.noNulls = false;
        }
        outV.isRepeating = true;
        return;
    }
    if (inputColVec.noNulls) {
        if (selectedInUse) {
            for (int j = 0; j < n; j++) {
                int i = sel[j];
                outV.isNull[i] = false;
                outV.vector[i] = getTimestampField(timestampColVector, i);
            }
        } else {
            Arrays.fill(outV.isNull, 0, n, false);
            for (int i = 0; i < n; i++) {
                outV.vector[i] = getTimestampField(timestampColVector, i);
            }
        }
    } else /* there are nulls in the inputColVector */
    {
        // Carefully handle NULLs...
        outV.noNulls = false;
        if (selectedInUse) {
            for (int j = 0; j < n; j++) {
                int i = sel[j];
                outV.isNull[i] = inputColVec.isNull[i];
                if (!inputColVec.isNull[i]) {
                    outV.vector[i] = getTimestampField(timestampColVector, i);
                }
            }
        } else {
            for (int i = 0; i < n; i++) {
                outV.isNull[i] = inputColVec.isNull[i];
                if (!inputColVec.isNull[i]) {
                    outV.vector[i] = getTimestampField(timestampColVector, i);
                }
            }
        }
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 67 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class VectorUDAFCountMerge method aggregateInputSelection.

@Override
public void aggregateInputSelection(VectorAggregationBufferRow[] aggregationBufferSets, int aggregateIndex, VectorizedRowBatch batch) throws HiveException {
    int batchSize = batch.size;
    if (batchSize == 0) {
        return;
    }
    inputExpression.evaluate(batch);
    LongColumnVector inputVector = (LongColumnVector) batch.cols[this.inputExpression.getOutputColumnNum()];
    long[] vector = inputVector.vector;
    if (inputVector.noNulls) {
        if (inputVector.isRepeating) {
            iterateNoNullsRepeatingWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector[0], batchSize);
        } else {
            if (batch.selectedInUse) {
                iterateNoNullsSelectionWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector, batch.selected, batchSize);
            } else {
                iterateNoNullsWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector, batchSize);
            }
        }
    } else {
        if (inputVector.isRepeating) {
            if (batch.selectedInUse) {
                iterateHasNullsRepeatingSelectionWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector[0], batchSize, batch.selected, inputVector.isNull);
            } else {
                iterateHasNullsRepeatingWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector[0], batchSize, inputVector.isNull);
            }
        } else {
            if (batch.selectedInUse) {
                iterateHasNullsSelectionWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector, batchSize, batch.selected, inputVector.isNull);
            } else {
                iterateHasNullsWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector, batchSize, inputVector.isNull);
            }
        }
    }
}
Also used : LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 68 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class VectorUDAFCountMerge method assignRowColumn.

@Override
public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int columnNum, AggregationBuffer agg) throws HiveException {
    LongColumnVector outputColVector = (LongColumnVector) batch.cols[columnNum];
    Aggregation myagg = (Aggregation) agg;
    outputColVector.isNull[batchIndex] = false;
    outputColVector.vector[batchIndex] = myagg.value;
}
Also used : LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 69 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class VectorUDAFCountStar method assignRowColumn.

@Override
public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int columnNum, AggregationBuffer agg) throws HiveException {
    LongColumnVector outputColVector = (LongColumnVector) batch.cols[columnNum];
    Aggregation myagg = (Aggregation) agg;
    outputColVector.isNull[batchIndex] = false;
    outputColVector.vector[batchIndex] = myagg.count;
}
Also used : LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 70 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class VectorMapJoinInnerBigOnlyLongOperator method process.

// ---------------------------------------------------------------------------
// Process Single-Column Long Inner Big-Only Join on a vectorized row batch.
// 
@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
            commonSetup(batch);
            /*
         * Initialize Single-Column Long members for this specialized class.
         */
            singleJoinColumn = bigTableKeyColumnMap[0];
            needCommonSetup = false;
        }
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
            /*
         * Get our Single-Column Long hash multi-set information for this specialized class.
         */
            hashMultiSet = (VectorMapJoinLongHashMultiSet) vectorMapJoinHashTable;
            useMinMax = hashMultiSet.useMinMax();
            if (useMinMax) {
                min = hashMultiSet.min();
                max = hashMultiSet.max();
            }
            needHashTableSetup = false;
        }
        batchCounter++;
        // For inner joins, we may apply the filter(s) now.
        for (VectorExpression ve : bigTableFilterExpressions) {
            ve.evaluate(batch);
        }
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        /*
       * Single-Column Long specific declarations.
       */
        // The one join column for this specialized class.
        LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn];
        long[] vector = joinColVector.vector;
        /*
       * Single-Column Long check for repeating.
       */
        // Check single column for repeating.
        boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
        if (allKeyInputColumnsRepeating) {
            /*
         * Repeating.
         */
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
            /*
         * Single-Column Long specific repeated lookup.
         */
            JoinUtil.JoinResult joinResult;
            if (!joinColVector.noNulls && joinColVector.isNull[0]) {
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                long key = vector[0];
                if (useMinMax && (key < min || key > max)) {
                    // Out of range for whole batch.
                    joinResult = JoinUtil.JoinResult.NOMATCH;
                } else {
                    joinResult = hashMultiSet.contains(key, hashMultiSetResults[0]);
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
            }
            finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            }
            // We remember any matching rows in matchs / matchSize.  At the end of the loop,
            // selected / batch.size will represent both matching and non-matching rows for outer join.
            // Only deferred rows will have been removed from selected.
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashMultiSetResultCount = 0;
            int allMatchCount = 0;
            int equalKeySeriesCount = 0;
            int spillCount = 0;
            /*
         * Single-Column Long specific variables.
         */
            long saveKey = 0;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < inputLogicalSize; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                /*
           * Single-Column Long get key.
           */
                long currentKey;
                boolean isNull;
                if (!joinColVector.noNulls && joinColVector.isNull[batchIndex]) {
                    currentKey = 0;
                    isNull = true;
                } else {
                    currentKey = vector[batchIndex];
                    isNull = false;
                }
                if (isNull || !haveSaveKey || currentKey != saveKey) {
                    if (haveSaveKey) {
                        // Move on with our counts.
                        switch(saveJoinResult) {
                            case MATCH:
                                // We have extracted the count from the hash multi-set result, so we don't keep it.
                                equalKeySeriesCount++;
                                break;
                            case SPILL:
                                // We keep the hash multi-set result for its spill information.
                                hashMultiSetResultCount++;
                                break;
                            case NOMATCH:
                                break;
                        }
                    }
                    if (isNull) {
                        saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        haveSaveKey = false;
                    } else {
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
                        /*
               * Single-Column Long specific save key.
               */
                        saveKey = currentKey;
                        if (useMinMax && (currentKey < min || currentKey > max)) {
                            // Key out of range for whole hash table.
                            saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        } else {
                            saveJoinResult = hashMultiSet.contains(currentKey, hashMultiSetResults[hashMultiSetResultCount]);
                        }
                    }
                    switch(saveJoinResult) {
                        case MATCH:
                            equalKeySeriesValueCounts[equalKeySeriesCount] = hashMultiSetResults[hashMultiSetResultCount].count();
                            equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
                            equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashMultiSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                            break;
                    }
                } else {
                    switch(saveJoinResult) {
                        case MATCH:
                            equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashMultiSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                            break;
                    }
                }
            }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        // We have extracted the count from the hash multi-set result, so we don't keep it.
                        equalKeySeriesCount++;
                        break;
                    case SPILL:
                        // We keep the hash multi-set result for its spill information.
                        hashMultiSetResultCount++;
                        break;
                    case NOMATCH:
                        break;
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesValueCounts " + longArrayToRangesString(equalKeySeriesValueCounts, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesDuplicateCounts " + intArrayToRangesString(equalKeySeriesDuplicateCounts, equalKeySeriesCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount)));
            }
            finishInnerBigOnly(batch, allMatchCount, equalKeySeriesCount, spillCount, (VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount);
        }
        if (batch.size > 0) {
            // Forward any remaining selected rows.
            forwardBigTableBatch(batch);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorMapJoinHashTableResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult) JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Aggregations

LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)277 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)133 Test (org.junit.Test)73 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)64 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)45 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)34 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)33 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)28 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)20 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)15 Random (java.util.Random)13 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)7 LongColAddLongScalar (org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongScalar)7 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)7 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)7 Timestamp (java.sql.Timestamp)6 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)6 IOException (java.io.IOException)5 Configuration (org.apache.hadoop.conf.Configuration)5