Search in sources :

Example 16 with ColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.

the class VectorUDFMapIndexBaseCol method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    ColumnVector outV = batch.cols[outputColumnNum];
    MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum];
    // indexColumnVector includes the keys of Map
    indexColumnVector = batch.cols[indexColumnNum];
    /*
     * Do careful maintenance of the outputColVector.noNulls flag.
     */
    int[] mapValueIndex;
    if (mapV.isRepeating) {
        if (mapV.isNull[0]) {
            outV.isNull[0] = true;
            outV.noNulls = false;
            outV.isRepeating = true;
        } else {
            mapValueIndex = getMapValueIndex(mapV, batch);
            if (indexColumnVector.isRepeating) {
                // the key is not found in MapColumnVector, set the output as null ColumnVector
                if (mapValueIndex[0] == -1) {
                    outV.isNull[0] = true;
                    outV.noNulls = false;
                } else {
                    // the key is found in MapColumnVector, set the value
                    outV.isNull[0] = false;
                    outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values);
                }
                outV.isRepeating = true;
            } else {
                setUnRepeatingOutVector(batch, mapV, outV, mapValueIndex);
            }
        }
    } else {
        mapValueIndex = getMapValueIndex(mapV, batch);
        setUnRepeatingOutVector(batch, mapV, outV, mapValueIndex);
    }
}
Also used : MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector)

Example 17 with ColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.

the class VectorUDFTimestampFieldDate method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    Preconditions.checkState(((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory() == PrimitiveCategory.DATE);
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
    ColumnVector inputColVec = batch.cols[this.colNum];
    /* every line below this is identical for evaluateLong & evaluateString */
    final int n = inputColVec.isRepeating ? 1 : batch.size;
    int[] sel = batch.selected;
    final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse;
    boolean[] outputIsNull = outputColVector.isNull;
    if (batch.size == 0) {
        /* n != batch.size when isRepeating */
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    LongColumnVector longColVector = (LongColumnVector) inputColVec;
    if (inputColVec.isRepeating) {
        if (inputColVec.noNulls || !inputColVec.isNull[0]) {
            outputColVector.isNull[0] = false;
            outputColVector.vector[0] = getDateField(longColVector.vector[0]);
        } else {
            outputColVector.isNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
        return;
    }
    if (inputColVec.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    outputColVector.vector[i] = getDateField(longColVector.vector[i]);
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    outputColVector.vector[i] = getDateField(longColVector.vector[i]);
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                outputColVector.vector[i] = getDateField(longColVector.vector[i]);
            }
        }
    } else /* there are nulls in the inputColVector */
    {
        // Carefully handle NULLs...
        outputColVector.noNulls = false;
        if (selectedInUse) {
            for (int j = 0; j < n; j++) {
                int i = sel[j];
                outputColVector.isNull[i] = inputColVec.isNull[i];
                if (!inputColVec.isNull[i]) {
                    outputColVector.vector[i] = getDateField(longColVector.vector[i]);
                }
            }
        } else {
            for (int i = 0; i < n; i++) {
                outputColVector.isNull[i] = inputColVec.isNull[i];
                if (!inputColVec.isNull[i]) {
                    outputColVector.vector[i] = getDateField(longColVector.vector[i]);
                }
            }
        }
    }
}
Also used : PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 18 with ColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.

the class VectorUDFTimestampFieldTimestamp method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    Preconditions.checkState(((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory() == PrimitiveCategory.TIMESTAMP);
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
    ColumnVector inputColVec = batch.cols[this.colNum];
    /* every line below this is identical for evaluateLong & evaluateString */
    final int n = inputColVec.isRepeating ? 1 : batch.size;
    int[] sel = batch.selected;
    final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse;
    if (batch.size == 0) {
        /* n != batch.size when isRepeating */
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outV.isRepeating = false;
    TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVec;
    if (inputColVec.isRepeating) {
        if (inputColVec.noNulls || !inputColVec.isNull[0]) {
            outV.isNull[0] = false;
            outV.vector[0] = getTimestampField(timestampColVector, 0);
        } else {
            outV.isNull[0] = true;
            outV.noNulls = false;
        }
        outV.isRepeating = true;
        return;
    }
    if (inputColVec.noNulls) {
        if (selectedInUse) {
            for (int j = 0; j < n; j++) {
                int i = sel[j];
                outV.isNull[i] = false;
                outV.vector[i] = getTimestampField(timestampColVector, i);
            }
        } else {
            Arrays.fill(outV.isNull, 0, n, false);
            for (int i = 0; i < n; i++) {
                outV.vector[i] = getTimestampField(timestampColVector, i);
            }
        }
    } else /* there are nulls in the inputColVector */
    {
        // Carefully handle NULLs...
        outV.noNulls = false;
        if (selectedInUse) {
            for (int j = 0; j < n; j++) {
                int i = sel[j];
                outV.isNull[i] = inputColVec.isNull[i];
                if (!inputColVec.isNull[i]) {
                    outV.vector[i] = getTimestampField(timestampColVector, i);
                }
            }
        } else {
            for (int i = 0; i < n; i++) {
                outV.isNull[i] = inputColVec.isNull[i];
                if (!inputColVec.isNull[i]) {
                    outV.vector[i] = getTimestampField(timestampColVector, i);
                }
            }
        }
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 19 with ColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.

the class VectorUDAFCount method aggregateInput.

@Override
public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) throws HiveException {
    inputExpression.evaluate(batch);
    ColumnVector inputVector = batch.cols[this.inputExpression.getOutputColumnNum()];
    int batchSize = batch.size;
    if (batchSize == 0) {
        return;
    }
    Aggregation myagg = (Aggregation) agg;
    if (inputVector.isRepeating) {
        if (inputVector.noNulls || !inputVector.isNull[0]) {
            myagg.count += batchSize;
        }
        return;
    }
    if (inputVector.noNulls) {
        myagg.count += batchSize;
        return;
    } else if (!batch.selectedInUse) {
        iterateNoSelectionHasNulls(myagg, batchSize, inputVector.isNull);
    } else {
        iterateSelectionHasNulls(myagg, batchSize, inputVector.isNull, batch.selected);
    }
}
Also used : ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 20 with ColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.

the class VectorMapJoinGenerateResultOperator method generateHashMapResultLargeMultiValue.

/**
 * Generate optimized results for a large N x M cross product using repeated vectorized row
 * batch optimization.
 *
 * @param batch
 *          The big table batch.
 * @param hashMapResult
 *          The hash map results for the matching key.
 * @param allMatchs
 *          The all match selected array that contains (physical) batch indices.
 * @param allMatchesIndex
 *          The index of the match key.
 * @param duplicateCount
 *          Number of equal key rows.
 */
private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, VectorMapJoinHashMapResult hashMapResult, int[] allMatchs, int allMatchesIndex, int duplicateCount) throws HiveException, IOException {
    // Kick out previous overflow batch results.
    if (overflowBatch.size > 0) {
        forwardOverflow();
    }
    ByteSegmentRef byteSegmentRef = hashMapResult.first();
    while (byteSegmentRef != null) {
        // Fill up as much of the overflow batch as possible with small table values.
        while (byteSegmentRef != null) {
            if (smallTableVectorDeserializeRow != null) {
                doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult);
            }
            overflowBatch.size++;
            if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) {
                break;
            }
            byteSegmentRef = hashMapResult.next();
        }
        for (int i = 0; i < duplicateCount; i++) {
            int batchIndex = allMatchs[allMatchesIndex + i];
            if (bigTableRetainedVectorCopy != null) {
                // The one big table row's values repeat.
                bigTableRetainedVectorCopy.copyByReference(batch, batchIndex, overflowBatch, 0);
                for (int column : bigTableRetainedMapping.getOutputColumns()) {
                    overflowBatch.cols[column].isRepeating = true;
                }
            }
            // Crucial here that we don't reset the overflow batch, or we will loose the small table
            // values we put in above.
            forwardOverflowNoReset();
            // Hand reset the big table columns.
            for (int column : bigTableRetainedMapping.getOutputColumns()) {
                ColumnVector colVector = overflowBatch.cols[column];
                colVector.reset();
            }
        }
        byteSegmentRef = hashMapResult.next();
        if (byteSegmentRef == null) {
            break;
        }
        // Get ready for a another round of small table values.
        overflowBatch.reset();
    }
    // Clear away any residue from our optimizations.
    overflowBatch.reset();
}
Also used : ByteSegmentRef(org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector)

Aggregations

ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)72 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)41 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)30 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)20 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)19 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)14 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)11 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)6 IOException (java.io.IOException)5 ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)5 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)5 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)4 Timestamp (java.sql.Timestamp)3 ArrayList (java.util.ArrayList)3 MapColumnVector (org.apache.hadoop.hive.ql.exec.vector.MapColumnVector)3 BinarySortableSerDe (org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe)3 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)3 LazyBinaryDeserializeRead (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)3 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3