Search in sources :

Example 86 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class TestVectorGenericDateExpressions method validateDateDiff.

private void validateDateDiff(VectorizedRowBatch batch, long scalar1, PrimitiveCategory scalarType1, PrimitiveCategory colType2, LongColumnVector date2) throws HiveException {
    VectorExpression udf = null;
    switch(scalarType1) {
        case DATE:
            udf = new VectorUDFDateDiffScalarCol(scalar1, 0, 1);
            break;
        case TIMESTAMP:
            udf = new VectorUDFDateDiffScalarCol(toTimestamp(scalar1), 0, 1);
            break;
        case STRING:
            udf = new VectorUDFDateDiffScalarCol(toString(scalar1), 0, 1);
            break;
    }
    udf.setInputTypeInfos(new TypeInfo[] { primitiveCategoryToTypeInfo(scalarType1), primitiveCategoryToTypeInfo(colType2) });
    udf.transientInit();
    udf.evaluate(batch);
    LongColumnVector output = (LongColumnVector) batch.cols[1];
    for (int i = 0; i < date2.vector.length; i++) {
        Assert.assertEquals(scalar1 - date2.vector[i], output.vector[i]);
    }
}
Also used : LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 87 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class FuncStringToLong method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol];
    int[] sel = batch.selected;
    int n = batch.size;
    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputCol];
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    if (n == 0) {
        // Nothing to do
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    if (inputColVector.isRepeating) {
        if (inputColVector.noNulls || !inputIsNull[0]) {
            // Set isNull before call in case it changes it mind.
            outputIsNull[0] = false;
            func(outputColVector, inputColVector, 0);
        } else {
            outputIsNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
        return;
    }
    if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    func(outputColVector, inputColVector, i);
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    func(outputColVector, inputColVector, i);
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                func(outputColVector, inputColVector, i);
            }
        }
    } else /* there are nulls in the inputColVector */
    {
        // Carefully handle NULLs...
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputColVector.isNull[i] = inputColVector.isNull[i];
                if (!inputColVector.isNull[i]) {
                    func(outputColVector, inputColVector, i);
                }
            }
        } else {
            System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inputColVector.isNull[i]) {
                    func(outputColVector, inputColVector, i);
                }
            }
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 88 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class IfExprColumnNull method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
    final ColumnVector arg2ColVector = batch.cols[arg2Column];
    final ColumnVector outputColVector = batch.cols[outputColumnNum];
    final int[] sel = batch.selected;
    final int n = batch.size;
    final boolean[] null1 = arg1ColVector.isNull;
    final long[] vector1 = arg1ColVector.vector;
    final boolean[] isNull = outputColVector.isNull;
    if (n == 0) {
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    /*
     * Repeating IF expression?
     */
    if (arg1ColVector.isRepeating) {
        if ((arg1ColVector.noNulls || !null1[0]) && vector1[0] == 1) {
            arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
        } else {
            outputColVector.isRepeating = true;
            outputColVector.noNulls = false;
            isNull[0] = true;
        }
        return;
    }
    if (arg1ColVector.noNulls) {
        /*
       * Repeating THEN expression?
       */
        if (arg2ColVector.isRepeating) {
            if (batch.selectedInUse) {
                for (int j = 0; j < n; j++) {
                    int i = sel[j];
                    if (vector1[i] == 1) {
                        isNull[i] = false;
                        // Assign repeated value (index 0) over and over.
                        outputColVector.setElement(i, 0, arg2ColVector);
                    } else {
                        isNull[i] = true;
                        outputColVector.noNulls = false;
                    }
                }
            } else {
                for (int i = 0; i < n; i++) {
                    if (vector1[i] == 1) {
                        isNull[i] = false;
                        // Assign repeated value (index 0) over and over.
                        outputColVector.setElement(i, 0, arg2ColVector);
                    } else {
                        isNull[i] = true;
                        outputColVector.noNulls = false;
                    }
                }
            }
        } else {
            if (batch.selectedInUse) {
                for (int j = 0; j < n; j++) {
                    int i = sel[j];
                    if (vector1[i] == 1) {
                        isNull[i] = false;
                        outputColVector.setElement(i, i, arg2ColVector);
                    } else {
                        isNull[i] = true;
                        outputColVector.noNulls = false;
                    }
                }
            } else {
                for (int i = 0; i < n; i++) {
                    if (vector1[i] == 1) {
                        isNull[i] = false;
                        outputColVector.setElement(i, i, arg2ColVector);
                    } else {
                        isNull[i] = true;
                        outputColVector.noNulls = false;
                    }
                }
            }
        }
    } else /* there are nulls in the inputColVector */
    {
        /*
       * Repeating THEN expression?
       */
        if (arg2ColVector.isRepeating) {
            if (batch.selectedInUse) {
                for (int j = 0; j < n; j++) {
                    int i = sel[j];
                    if (!null1[i] && vector1[i] == 1) {
                        isNull[i] = false;
                        outputColVector.setElement(i, 0, arg2ColVector);
                    } else {
                        isNull[i] = true;
                        outputColVector.noNulls = false;
                    }
                }
            } else {
                for (int i = 0; i < n; i++) {
                    if (!null1[i] && vector1[i] == 1) {
                        isNull[i] = false;
                        outputColVector.setElement(i, 0, arg2ColVector);
                    } else {
                        isNull[i] = true;
                        outputColVector.noNulls = false;
                    }
                }
            }
        } else {
            if (batch.selectedInUse) {
                for (int j = 0; j < n; j++) {
                    int i = sel[j];
                    if (!null1[i] && vector1[i] == 1) {
                        isNull[i] = false;
                        outputColVector.setElement(i, i, arg2ColVector);
                    } else {
                        isNull[i] = true;
                        outputColVector.noNulls = false;
                    }
                }
            } else {
                for (int i = 0; i < n; i++) {
                    if (!null1[i] && vector1[i] == 1) {
                        isNull[i] = false;
                        outputColVector.setElement(i, i, arg2ColVector);
                    } else {
                        isNull[i] = true;
                        outputColVector.noNulls = false;
                    }
                }
            }
        }
    }
}
Also used : LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 89 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class IfExprCondExprBase method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    // NOTE: We do conditional vector expression so we do not call super.evaluateChildren(batch).
    thenSelectedCount = 0;
    elseSelectedCount = 0;
    isIfStatementResultRepeated = false;
    // Give it a value.
    isIfStatementResultThen = false;
    int n = batch.size;
    if (n <= 0) {
        // Nothing to do
        return;
    }
    // Child #1 is the IF boolean expression.
    childExpressions[0].evaluate(batch);
    LongColumnVector ifExprColVector = (LongColumnVector) batch.cols[arg1Column];
    if (ifExprColVector.isRepeating) {
        isIfStatementResultRepeated = true;
        isIfStatementResultThen = ((ifExprColVector.noNulls || !ifExprColVector.isNull[0]) && ifExprColVector.vector[0] == 1);
        return;
    }
    if (thenSelected == null || n > thenSelected.length) {
        // (Re)allocate larger to be a multiple of 1024 (DEFAULT_SIZE).
        final int roundUpSize = ((n + VectorizedRowBatch.DEFAULT_SIZE - 1) / VectorizedRowBatch.DEFAULT_SIZE) * VectorizedRowBatch.DEFAULT_SIZE;
        thenSelected = new int[roundUpSize];
        elseSelected = new int[roundUpSize];
    }
    int[] sel = batch.selected;
    long[] vector = ifExprColVector.vector;
    if (ifExprColVector.noNulls) {
        if (batch.selectedInUse) {
            for (int j = 0; j < n; j++) {
                final int i = sel[j];
                if (vector[i] == 1) {
                    thenSelected[thenSelectedCount++] = i;
                } else {
                    elseSelected[elseSelectedCount++] = i;
                }
            }
        } else {
            for (int i = 0; i < n; i++) {
                if (vector[i] == 1) {
                    thenSelected[thenSelectedCount++] = i;
                } else {
                    elseSelected[elseSelectedCount++] = i;
                }
            }
        }
    } else {
        boolean[] isNull = ifExprColVector.isNull;
        if (batch.selectedInUse) {
            for (int j = 0; j < n; j++) {
                final int i = sel[j];
                if (!isNull[i] && vector[i] == 1) {
                    thenSelected[thenSelectedCount++] = i;
                } else {
                    elseSelected[elseSelectedCount++] = i;
                }
            }
        } else {
            for (int i = 0; i < n; i++) {
                if (!isNull[i] && vector[i] == 1) {
                    thenSelected[thenSelectedCount++] = i;
                } else {
                    elseSelected[elseSelectedCount++] = i;
                }
            }
        }
    }
    if (thenSelectedCount == 0) {
        isIfStatementResultRepeated = true;
        isIfStatementResultThen = false;
    } else if (elseSelectedCount == 0) {
        isIfStatementResultRepeated = true;
        isIfStatementResultThen = true;
    }
}
Also used : LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 90 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class IfExprIntervalDayTimeColumnColumn method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
    IntervalDayTimeColumnVector arg2ColVector = (IntervalDayTimeColumnVector) batch.cols[arg2Column];
    IntervalDayTimeColumnVector arg3ColVector = (IntervalDayTimeColumnVector) batch.cols[arg3Column];
    IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum];
    int[] sel = batch.selected;
    boolean[] outputIsNull = outputColVector.isNull;
    int n = batch.size;
    long[] vector1 = arg1ColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    /* All the code paths below propagate nulls even if neither arg2 nor arg3
     * have nulls. This is to reduce the number of code paths and shorten the
     * code, at the expense of maybe doing unnecessary work if neither input
     * has nulls. This could be improved in the future by expanding the number
     * of code paths.
     */
    if (arg1ColVector.isRepeating) {
        if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
            arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
        } else {
            arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
        }
        return;
    }
    // extend any repeating values and noNulls indicator in the inputs
    arg2ColVector.flatten(batch.selectedInUse, sel, n);
    arg3ColVector.flatten(batch.selectedInUse, sel, n);
    if (arg1ColVector.noNulls) {
        // Carefully handle NULLs...
        /*
       * For better performance on LONG/DOUBLE we don't want the conditional
       * statements inside the for loop.
       */
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
                outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i));
            }
        } else {
            for (int i = 0; i != n; i++) {
                outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
                outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i));
            }
        }
    } else /* there are NULLs in the inputColVector */
    {
        // Carefully handle NULLs...
        /*
       * For better performance on LONG/DOUBLE we don't want the conditional
       * statements inside the for loop.
       */
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i));
                outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        } else {
            for (int i = 0; i != n; i++) {
                outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3ColVector.asScratchIntervalDayTime(i));
                outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        }
    }
    // restore repeating and no nulls indicators
    arg2ColVector.unFlatten();
    arg3ColVector.unFlatten();
}
Also used : IntervalDayTimeColumnVector(org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Aggregations

LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)277 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)133 Test (org.junit.Test)73 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)64 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)45 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)34 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)33 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)28 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)20 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)15 Random (java.util.Random)13 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)7 LongColAddLongScalar (org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongScalar)7 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)7 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)7 Timestamp (java.sql.Timestamp)6 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)6 IOException (java.io.IOException)5 Configuration (org.apache.hadoop.conf.Configuration)5