Search in sources :

Example 96 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class StructColumnInList method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    final int logicalSize = batch.size;
    if (logicalSize == 0) {
        return;
    }
    if (buffer == null) {
        buffer = new Output();
        binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
    }
    for (VectorExpression ve : structExpressions) {
        ve.evaluate(batch);
    }
    BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
    try {
        boolean selectedInUse = batch.selectedInUse;
        int[] selected = batch.selected;
        for (int logical = 0; logical < logicalSize; logical++) {
            int batchIndex = (selectedInUse ? selected[logical] : logical);
            binarySortableSerializeWrite.set(buffer);
            for (int f = 0; f < structColumnMap.length; f++) {
                int fieldColumn = structColumnMap[f];
                ColumnVector colVec = batch.cols[fieldColumn];
                int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
                if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
                    switch(fieldVectorColumnTypes[f]) {
                        case BYTES:
                            {
                                BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
                                byte[] bytes = bytesColVec.vector[adjustedIndex];
                                int start = bytesColVec.start[adjustedIndex];
                                int length = bytesColVec.length[adjustedIndex];
                                binarySortableSerializeWrite.writeString(bytes, start, length);
                            }
                            break;
                        case LONG:
                            binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
                            break;
                        case DOUBLE:
                            binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
                            break;
                        case DECIMAL:
                            DecimalColumnVector decColVector = ((DecimalColumnVector) colVec);
                            binarySortableSerializeWrite.writeHiveDecimal(decColVector.vector[adjustedIndex], decColVector.scale);
                            break;
                        default:
                            throw new RuntimeException("Unexpected vector column type " + fieldVectorColumnTypes[f].name());
                    }
                } else {
                    binarySortableSerializeWrite.writeNull();
                }
            }
            scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
        }
        // Now, take the serialized keys we just wrote into our scratch column and look them
        // up in the IN list.
        super.evaluate(batch);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 97 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class VectorElt method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    int[] sel = batch.selected;
    int n = batch.size;
    BytesColumnVector outputVector = (BytesColumnVector) batch.cols[outputColumn];
    if (n <= 0) {
        return;
    }
    outputVector.init();
    outputVector.noNulls = false;
    outputVector.isRepeating = false;
    LongColumnVector inputIndexVector = (LongColumnVector) batch.cols[inputColumns[0]];
    long[] indexVector = inputIndexVector.vector;
    if (inputIndexVector.isRepeating) {
        int index = (int) indexVector[0];
        if (index > 0 && index < inputColumns.length) {
            BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]];
            if (cv.isRepeating) {
                outputVector.setElement(0, 0, cv);
                outputVector.isRepeating = true;
            } else if (batch.selectedInUse) {
                for (int j = 0; j != n; j++) {
                    int i = sel[j];
                    outputVector.setVal(i, cv.vector[0], cv.start[0], cv.length[0]);
                }
            } else {
                for (int i = 0; i != n; i++) {
                    outputVector.setVal(i, cv.vector[0], cv.start[0], cv.length[0]);
                }
            }
        } else {
            outputVector.isNull[0] = true;
            outputVector.isRepeating = true;
        }
    } else if (batch.selectedInUse) {
        for (int j = 0; j != n; j++) {
            int i = sel[j];
            int index = (int) indexVector[i];
            if (index > 0 && index < inputColumns.length) {
                BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]];
                int cvi = cv.isRepeating ? 0 : i;
                outputVector.setVal(i, cv.vector[cvi], cv.start[cvi], cv.length[cvi]);
            } else {
                outputVector.isNull[i] = true;
            }
        }
    } else {
        for (int i = 0; i != n; i++) {
            int index = (int) indexVector[i];
            if (index > 0 && index < inputColumns.length) {
                BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]];
                int cvi = cv.isRepeating ? 0 : i;
                outputVector.setVal(i, cv.vector[cvi], cv.start[cvi], cv.length[cvi]);
            } else {
                outputVector.isNull[i] = true;
            }
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 98 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class VectorMapJoinInnerBigOnlyLongOperator method process.

//---------------------------------------------------------------------------
// Process Single-Column Long Inner Big-Only Join on a vectorized row batch.
//
@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
            commonSetup(batch);
            /*
         * Initialize Single-Column Long members for this specialized class.
         */
            singleJoinColumn = bigTableKeyColumnMap[0];
            needCommonSetup = false;
        }
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
            /*
         * Get our Single-Column Long hash multi-set information for this specialized class.
         */
            hashMultiSet = (VectorMapJoinLongHashMultiSet) vectorMapJoinHashTable;
            useMinMax = hashMultiSet.useMinMax();
            if (useMinMax) {
                min = hashMultiSet.min();
                max = hashMultiSet.max();
            }
            needHashTableSetup = false;
        }
        batchCounter++;
        // For inner joins, we may apply the filter(s) now.
        for (VectorExpression ve : bigTableFilterExpressions) {
            ve.evaluate(batch);
        }
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (isLogDebugEnabled) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        /*
       * Single-Column Long specific declarations.
       */
        // The one join column for this specialized class.
        LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn];
        long[] vector = joinColVector.vector;
        /*
       * Single-Column Long check for repeating.
       */
        // Check single column for repeating.
        boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
        if (allKeyInputColumnsRepeating) {
            /*
         * Repeating.
         */
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
            /*
         * Single-Column Long specific repeated lookup.
         */
            JoinUtil.JoinResult joinResult;
            if (!joinColVector.noNulls && joinColVector.isNull[0]) {
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                long key = vector[0];
                if (useMinMax && (key < min || key > max)) {
                    // Out of range for whole batch.
                    joinResult = JoinUtil.JoinResult.NOMATCH;
                } else {
                    joinResult = hashMultiSet.contains(key, hashMultiSetResults[0]);
                }
            }
            if (isLogDebugEnabled) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
            }
            finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
        } else {
            if (isLogDebugEnabled) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            }
            // We remember any matching rows in matchs / matchSize.  At the end of the loop,
            // selected / batch.size will represent both matching and non-matching rows for outer join.
            // Only deferred rows will have been removed from selected.
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashMultiSetResultCount = 0;
            int allMatchCount = 0;
            int equalKeySeriesCount = 0;
            int spillCount = 0;
            /*
         * Single-Column Long specific variables.
         */
            long saveKey = 0;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < inputLogicalSize; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                /*
           * Single-Column Long get key.
           */
                long currentKey;
                boolean isNull;
                if (!joinColVector.noNulls && joinColVector.isNull[batchIndex]) {
                    currentKey = 0;
                    isNull = true;
                } else {
                    currentKey = vector[batchIndex];
                    isNull = false;
                }
                if (isNull || !haveSaveKey || currentKey != saveKey) {
                    if (haveSaveKey) {
                        // Move on with our counts.
                        switch(saveJoinResult) {
                            case MATCH:
                                // We have extracted the count from the hash multi-set result, so we don't keep it.
                                equalKeySeriesCount++;
                                break;
                            case SPILL:
                                // We keep the hash multi-set result for its spill information.
                                hashMultiSetResultCount++;
                                break;
                            case NOMATCH:
                                break;
                        }
                    }
                    if (isNull) {
                        saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        haveSaveKey = false;
                    } else {
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
                        /*
               * Single-Column Long specific save key.
               */
                        saveKey = currentKey;
                        if (useMinMax && (currentKey < min || currentKey > max)) {
                            // Key out of range for whole hash table.
                            saveJoinResult = JoinUtil.JoinResult.NOMATCH;
                        } else {
                            saveJoinResult = hashMultiSet.contains(currentKey, hashMultiSetResults[hashMultiSetResultCount]);
                        }
                    }
                    switch(saveJoinResult) {
                        case MATCH:
                            equalKeySeriesValueCounts[equalKeySeriesCount] = hashMultiSetResults[hashMultiSetResultCount].count();
                            equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
                            equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashMultiSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                            break;
                    }
                } else {
                    switch(saveJoinResult) {
                        case MATCH:
                            equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
                            allMatchs[allMatchCount++] = batchIndex;
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                            break;
                        case SPILL:
                            spills[spillCount] = batchIndex;
                            spillHashMapResultIndices[spillCount] = hashMultiSetResultCount;
                            spillCount++;
                            break;
                        case NOMATCH:
                            // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                            break;
                    }
                }
            }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                        // We have extracted the count from the hash multi-set result, so we don't keep it.
                        equalKeySeriesCount++;
                        break;
                    case SPILL:
                        // We keep the hash multi-set result for its spill information.
                        hashMultiSetResultCount++;
                        break;
                    case NOMATCH:
                        break;
                }
            }
            if (isLogDebugEnabled) {
                LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesValueCounts " + longArrayToRangesString(equalKeySeriesValueCounts, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesDuplicateCounts " + intArrayToRangesString(equalKeySeriesDuplicateCounts, equalKeySeriesCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount)));
            }
            finishInnerBigOnly(batch, allMatchCount, equalKeySeriesCount, spillCount, (VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount);
        }
        if (batch.size > 0) {
            // Forward any remaining selected rows.
            forwardBigTableBatch(batch);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorMapJoinHashTableResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult) JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 99 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class VectorUDFTimestampFieldDate method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    Preconditions.checkState(inputTypes[0] == VectorExpression.Type.DATE);
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
    ColumnVector inputColVec = batch.cols[this.colNum];
    /* every line below this is identical for evaluateLong & evaluateString */
    final int n = inputColVec.isRepeating ? 1 : batch.size;
    int[] sel = batch.selected;
    final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse;
    if (batch.size == 0) {
        /* n != batch.size when isRepeating */
        return;
    }
    /* true for all algebraic UDFs with no state */
    outV.isRepeating = inputColVec.isRepeating;
    LongColumnVector longColVector = (LongColumnVector) inputColVec;
    if (inputColVec.noNulls) {
        outV.noNulls = true;
        if (selectedInUse) {
            for (int j = 0; j < n; j++) {
                int i = sel[j];
                outV.vector[i] = getDateField(longColVector.vector[i]);
            }
        } else {
            for (int i = 0; i < n; i++) {
                outV.vector[i] = getDateField(longColVector.vector[i]);
            }
        }
    } else {
        // Handle case with nulls. Don't do function if the value is null, to save time,
        // because calling the function can be expensive.
        outV.noNulls = false;
        if (selectedInUse) {
            for (int j = 0; j < n; j++) {
                int i = sel[j];
                outV.isNull[i] = inputColVec.isNull[i];
                if (!inputColVec.isNull[i]) {
                    outV.vector[i] = getDateField(longColVector.vector[i]);
                }
            }
        } else {
            for (int i = 0; i < n; i++) {
                outV.isNull[i] = inputColVec.isNull[i];
                if (!inputColVec.isNull[i]) {
                    outV.vector[i] = getDateField(longColVector.vector[i]);
                }
            }
        }
    }
}
Also used : LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 100 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class VectorUDFTimestampFieldString method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
    BytesColumnVector inputCol = (BytesColumnVector) batch.cols[this.colNum];
    final int n = inputCol.isRepeating ? 1 : batch.size;
    int[] sel = batch.selected;
    final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse;
    if (batch.size == 0) {
        // n != batch.size when isRepeating
        return;
    }
    // true for all algebraic UDFs with no state
    outV.isRepeating = inputCol.isRepeating;
    if (inputCol.noNulls) {
        outV.noNulls = true;
        if (selectedInUse) {
            for (int j = 0; j < n; j++) {
                int i = sel[j];
                try {
                    outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
                    outV.isNull[i] = false;
                } catch (ParseException e) {
                    outV.noNulls = false;
                    outV.isNull[i] = true;
                }
            }
        } else {
            for (int i = 0; i < n; i++) {
                try {
                    outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
                    outV.isNull[i] = false;
                } catch (ParseException e) {
                    outV.noNulls = false;
                    outV.isNull[i] = true;
                }
            }
        }
    } else {
        // Handle case with nulls. Don't do function if the value is null, to save time,
        // because calling the function can be expensive.
        outV.noNulls = false;
        if (selectedInUse) {
            for (int j = 0; j < n; j++) {
                int i = sel[j];
                outV.isNull[i] = inputCol.isNull[i];
                if (!inputCol.isNull[i]) {
                    try {
                        outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
                    } catch (ParseException e) {
                        outV.isNull[i] = true;
                    }
                }
            }
        } else {
            for (int i = 0; i < n; i++) {
                outV.isNull[i] = inputCol.isNull[i];
                if (!inputCol.isNull[i]) {
                    try {
                        outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
                    } catch (ParseException e) {
                        outV.isNull[i] = true;
                    }
                }
            }
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ParseException(java.text.ParseException) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Aggregations

LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)223 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)116 Test (org.junit.Test)67 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)50 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)36 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)25 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)24 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)17 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)14 Random (java.util.Random)12 Configuration (org.apache.hadoop.conf.Configuration)8 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)7 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)7 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)6 Path (org.apache.hadoop.fs.Path)5 LongWritable (org.apache.hadoop.io.LongWritable)5 IOException (java.io.IOException)4 Timestamp (java.sql.Timestamp)4 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)4 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)4