Search in sources :

Example 41 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class FilterStructColumnInList method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    final int logicalSize = batch.size;
    if (logicalSize == 0) {
        return;
    }
    if (buffer == null) {
        buffer = new Output();
        binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
    }
    for (VectorExpression ve : structExpressions) {
        ve.evaluate(batch);
    }
    BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
    try {
        boolean selectedInUse = batch.selectedInUse;
        int[] selected = batch.selected;
        for (int logical = 0; logical < logicalSize; logical++) {
            int batchIndex = (selectedInUse ? selected[logical] : logical);
            binarySortableSerializeWrite.set(buffer);
            for (int f = 0; f < structColumnMap.length; f++) {
                int fieldColumn = structColumnMap[f];
                ColumnVector colVec = batch.cols[fieldColumn];
                int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
                if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
                    switch(fieldVectorColumnTypes[f]) {
                        case BYTES:
                            {
                                BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
                                byte[] bytes = bytesColVec.vector[adjustedIndex];
                                int start = bytesColVec.start[adjustedIndex];
                                int length = bytesColVec.length[adjustedIndex];
                                binarySortableSerializeWrite.writeString(bytes, start, length);
                            }
                            break;
                        case LONG:
                            binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
                            break;
                        case DOUBLE:
                            binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
                            break;
                        case DECIMAL:
                            DecimalColumnVector decColVector = ((DecimalColumnVector) colVec);
                            binarySortableSerializeWrite.writeHiveDecimal(decColVector.vector[adjustedIndex], decColVector.scale);
                            break;
                        default:
                            throw new RuntimeException("Unexpected vector column type " + fieldVectorColumnTypes[f].name());
                    }
                } else {
                    binarySortableSerializeWrite.writeNull();
                }
            }
            scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
        }
        // Now, take the serialized keys we just wrote into our scratch column and look them
        // up in the IN list.
        super.evaluate(batch);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 42 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class IfExprStringGroupColumnStringGroupColumn method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
    BytesColumnVector arg2ColVector = (BytesColumnVector) batch.cols[arg2Column];
    BytesColumnVector arg3ColVector = (BytesColumnVector) batch.cols[arg3Column];
    BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn];
    int[] sel = batch.selected;
    boolean[] outputIsNull = outputColVector.isNull;
    outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
    // may override later
    outputColVector.isRepeating = false;
    int n = batch.size;
    long[] vector1 = arg1ColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    outputColVector.initBuffer();
    /* All the code paths below propagate nulls even if neither arg2 nor arg3
     * have nulls. This is to reduce the number of code paths and shorten the
     * code, at the expense of maybe doing unnecessary work if neither input
     * has nulls. This could be improved in the future by expanding the number
     * of code paths.
     */
    if (arg1ColVector.isRepeating) {
        if (vector1[0] == 1) {
            arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
        } else {
            arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
        }
        return;
    }
    // extend any repeating values and noNulls indicator in the inputs
    arg2ColVector.flatten(batch.selectedInUse, sel, n);
    arg3ColVector.flatten(batch.selectedInUse, sel, n);
    if (arg1ColVector.noNulls) {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (vector1[i] == 1) {
                    if (!arg2ColVector.isNull[i]) {
                        outputColVector.setVal(i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]);
                    }
                } else {
                    if (!arg3ColVector.isNull[i]) {
                        outputColVector.setVal(i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]);
                    }
                }
                outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        } else {
            for (int i = 0; i != n; i++) {
                if (vector1[i] == 1) {
                    if (!arg2ColVector.isNull[i]) {
                        outputColVector.setVal(i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]);
                    }
                } else {
                    if (!arg3ColVector.isNull[i]) {
                        outputColVector.setVal(i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]);
                    }
                }
                outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        }
    } else /* there are nulls */
    {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (!arg1ColVector.isNull[i] && vector1[i] == 1) {
                    if (!arg2ColVector.isNull[i]) {
                        outputColVector.setVal(i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]);
                    }
                } else {
                    if (!arg3ColVector.isNull[i]) {
                        outputColVector.setVal(i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]);
                    }
                }
                outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        } else {
            for (int i = 0; i != n; i++) {
                if (!arg1ColVector.isNull[i] && vector1[i] == 1) {
                    if (!arg2ColVector.isNull[i]) {
                        outputColVector.setVal(i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]);
                    }
                } else {
                    if (!arg3ColVector.isNull[i]) {
                        outputColVector.setVal(i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]);
                    }
                }
                outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        }
    }
    arg2ColVector.unFlatten();
    arg3ColVector.unFlatten();
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 43 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class IfExprStringGroupColumnStringScalar method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
    BytesColumnVector arg2ColVector = (BytesColumnVector) batch.cols[arg2Column];
    BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn];
    int[] sel = batch.selected;
    boolean[] outputIsNull = outputColVector.isNull;
    outputColVector.noNulls = arg2ColVector.noNulls;
    // may override later
    outputColVector.isRepeating = false;
    int n = batch.size;
    long[] vector1 = arg1ColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    outputColVector.initBuffer();
    /* All the code paths below propagate nulls even if arg2 has no nulls.
     * This is to reduce the number of code paths and shorten the
     * code, at the expense of maybe doing unnecessary work if neither input
     * has nulls. This could be improved in the future by expanding the number
     * of code paths.
     */
    if (arg1ColVector.isRepeating) {
        if (vector1[0] == 1) {
            arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
        } else {
            outputColVector.fill(arg3Scalar);
        }
        return;
    }
    // extend any repeating values and noNulls indicator in the inputs
    arg2ColVector.flatten(batch.selectedInUse, sel, n);
    if (arg1ColVector.noNulls) {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (vector1[i] == 1) {
                    if (!arg2ColVector.isNull[i]) {
                        outputColVector.setVal(i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]);
                    }
                } else {
                    outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length);
                }
                outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false);
            }
        } else {
            for (int i = 0; i != n; i++) {
                if (vector1[i] == 1) {
                    if (!arg2ColVector.isNull[i]) {
                        outputColVector.setVal(i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]);
                    }
                } else {
                    outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length);
                }
                outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false);
            }
        }
    } else /* there are nulls */
    {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (!arg1ColVector.isNull[i] && vector1[i] == 1) {
                    if (!arg2ColVector.isNull[i]) {
                        outputColVector.setVal(i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]);
                    }
                } else {
                    outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length);
                }
                outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : false);
            }
        } else {
            for (int i = 0; i != n; i++) {
                if (!arg1ColVector.isNull[i] && vector1[i] == 1) {
                    if (!arg2ColVector.isNull[i]) {
                        outputColVector.setVal(i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]);
                    }
                } else {
                    outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length);
                }
                outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : false);
            }
        }
    }
    // restore state of repeating and non nulls indicators
    arg2ColVector.unFlatten();
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 44 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class StructColumnInList method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    final int logicalSize = batch.size;
    if (logicalSize == 0) {
        return;
    }
    if (buffer == null) {
        buffer = new Output();
        binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
    }
    for (VectorExpression ve : structExpressions) {
        ve.evaluate(batch);
    }
    BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
    try {
        boolean selectedInUse = batch.selectedInUse;
        int[] selected = batch.selected;
        for (int logical = 0; logical < logicalSize; logical++) {
            int batchIndex = (selectedInUse ? selected[logical] : logical);
            binarySortableSerializeWrite.set(buffer);
            for (int f = 0; f < structColumnMap.length; f++) {
                int fieldColumn = structColumnMap[f];
                ColumnVector colVec = batch.cols[fieldColumn];
                int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
                if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
                    switch(fieldVectorColumnTypes[f]) {
                        case BYTES:
                            {
                                BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
                                byte[] bytes = bytesColVec.vector[adjustedIndex];
                                int start = bytesColVec.start[adjustedIndex];
                                int length = bytesColVec.length[adjustedIndex];
                                binarySortableSerializeWrite.writeString(bytes, start, length);
                            }
                            break;
                        case LONG:
                            binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
                            break;
                        case DOUBLE:
                            binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
                            break;
                        case DECIMAL:
                            DecimalColumnVector decColVector = ((DecimalColumnVector) colVec);
                            binarySortableSerializeWrite.writeHiveDecimal(decColVector.vector[adjustedIndex], decColVector.scale);
                            break;
                        default:
                            throw new RuntimeException("Unexpected vector column type " + fieldVectorColumnTypes[f].name());
                    }
                } else {
                    binarySortableSerializeWrite.writeNull();
                }
            }
            scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
        }
        // Now, take the serialized keys we just wrote into our scratch column and look them
        // up in the IN list.
        super.evaluate(batch);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 45 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class VectorElt method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    int[] sel = batch.selected;
    int n = batch.size;
    BytesColumnVector outputVector = (BytesColumnVector) batch.cols[outputColumn];
    if (n <= 0) {
        return;
    }
    outputVector.init();
    outputVector.noNulls = false;
    outputVector.isRepeating = false;
    LongColumnVector inputIndexVector = (LongColumnVector) batch.cols[inputColumns[0]];
    long[] indexVector = inputIndexVector.vector;
    if (inputIndexVector.isRepeating) {
        int index = (int) indexVector[0];
        if (index > 0 && index < inputColumns.length) {
            BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]];
            if (cv.isRepeating) {
                outputVector.setElement(0, 0, cv);
                outputVector.isRepeating = true;
            } else if (batch.selectedInUse) {
                for (int j = 0; j != n; j++) {
                    int i = sel[j];
                    outputVector.setVal(i, cv.vector[0], cv.start[0], cv.length[0]);
                }
            } else {
                for (int i = 0; i != n; i++) {
                    outputVector.setVal(i, cv.vector[0], cv.start[0], cv.length[0]);
                }
            }
        } else {
            outputVector.isNull[0] = true;
            outputVector.isRepeating = true;
        }
    } else if (batch.selectedInUse) {
        for (int j = 0; j != n; j++) {
            int i = sel[j];
            int index = (int) indexVector[i];
            if (index > 0 && index < inputColumns.length) {
                BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]];
                int cvi = cv.isRepeating ? 0 : i;
                outputVector.setVal(i, cv.vector[cvi], cv.start[cvi], cv.length[cvi]);
            } else {
                outputVector.isNull[i] = true;
            }
        }
    } else {
        for (int i = 0; i != n; i++) {
            int index = (int) indexVector[i];
            if (index > 0 && index < inputColumns.length) {
                BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]];
                int cvi = cv.isRepeating ? 0 : i;
                outputVector.setVal(i, cv.vector[cvi], cv.start[cvi], cv.length[cvi]);
            } else {
                outputVector.isNull[i] = true;
            }
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Aggregations

BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)124 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)66 Test (org.junit.Test)50 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)44 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)12 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)10 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)8 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Text (org.apache.hadoop.io.Text)8 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)6 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 TypeDescription (org.apache.orc.TypeDescription)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 ParseException (java.text.ParseException)3 Random (java.util.Random)3