Search in sources :

Example 11 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class TestVectorTypeCasts method testCastTimestampToDouble.

@Test
public void testCastTimestampToDouble() {
    double[] doubleValues = new double[500];
    VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchTimestampInDoubleOut(doubleValues);
    TimestampColumnVector inV = (TimestampColumnVector) b.cols[0];
    DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1];
    b.cols[0].noNulls = true;
    VectorExpression expr = new CastTimestampToDouble(0, 1);
    expr.evaluate(b);
    for (int i = 0; i < doubleValues.length; i++) {
        double actual = resultV.vector[i];
        double doubleValue = TimestampUtils.getDouble(inV.asScratchTimestamp(i));
        assertEquals(actual, doubleValue, 0.000000001F);
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Test(org.junit.Test)

Example 12 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class TestVectorizedORCReader method checkVectorizedReader.

private void checkVectorizedReader() throws Exception {
    Reader vreader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
    RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows();
    RecordReaderImpl rr = (RecordReaderImpl) reader.rows();
    VectorizedRowBatch batch = reader.getSchema().createRowBatch();
    OrcStruct row = null;
    // Check Vectorized ORC reader against ORC row reader
    while (vrr.nextBatch(batch)) {
        for (int i = 0; i < batch.size; i++) {
            row = (OrcStruct) rr.next(row);
            for (int j = 0; j < batch.cols.length; j++) {
                Object a = (row.getFieldValue(j));
                ColumnVector cv = batch.cols[j];
                // if the value is repeating, use row 0
                int rowId = cv.isRepeating ? 0 : i;
                // make sure the null flag agrees
                if (a == null) {
                    Assert.assertEquals(true, !cv.noNulls && cv.isNull[rowId]);
                } else if (a instanceof BooleanWritable) {
                    // Boolean values are stores a 1's and 0's, so convert and compare
                    Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
                    long b = ((LongColumnVector) cv).vector[rowId];
                    Assert.assertEquals(temp.toString(), Long.toString(b));
                } else if (a instanceof TimestampWritable) {
                    // Timestamps are stored as long, so convert and compare
                    TimestampWritable t = ((TimestampWritable) a);
                    TimestampColumnVector tcv = ((TimestampColumnVector) cv);
                    Assert.assertEquals(t.getTimestamp(), tcv.asScratchTimestamp(rowId));
                } else if (a instanceof DateWritable) {
                    // Dates are stored as long, so convert and compare
                    DateWritable adt = (DateWritable) a;
                    long b = ((LongColumnVector) cv).vector[rowId];
                    Assert.assertEquals(adt.get().getTime(), DateWritable.daysToMillis((int) b));
                } else if (a instanceof HiveDecimalWritable) {
                    // Decimals are stored as BigInteger, so convert and compare
                    HiveDecimalWritable dec = (HiveDecimalWritable) a;
                    HiveDecimalWritable b = ((DecimalColumnVector) cv).vector[i];
                    Assert.assertEquals(dec, b);
                } else if (a instanceof DoubleWritable) {
                    double b = ((DoubleColumnVector) cv).vector[rowId];
                    assertEquals(a.toString(), Double.toString(b));
                } else if (a instanceof Text) {
                    BytesColumnVector bcv = (BytesColumnVector) cv;
                    Text b = new Text();
                    b.set(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]);
                    assertEquals(a, b);
                } else if (a instanceof IntWritable || a instanceof LongWritable || a instanceof ByteWritable || a instanceof ShortWritable) {
                    assertEquals(a.toString(), Long.toString(((LongColumnVector) cv).vector[rowId]));
                } else {
                    assertEquals("huh", a.getClass().getName());
                }
            }
        }
        // Check repeating
        Assert.assertEquals(false, batch.cols[0].isRepeating);
        Assert.assertEquals(false, batch.cols[1].isRepeating);
        Assert.assertEquals(false, batch.cols[2].isRepeating);
        Assert.assertEquals(true, batch.cols[3].isRepeating);
        Assert.assertEquals(false, batch.cols[4].isRepeating);
        Assert.assertEquals(false, batch.cols[5].isRepeating);
        Assert.assertEquals(false, batch.cols[6].isRepeating);
        Assert.assertEquals(false, batch.cols[7].isRepeating);
        Assert.assertEquals(false, batch.cols[8].isRepeating);
        Assert.assertEquals(false, batch.cols[9].isRepeating);
        // Check non null
        Assert.assertEquals(false, batch.cols[0].noNulls);
        Assert.assertEquals(false, batch.cols[1].noNulls);
        Assert.assertEquals(true, batch.cols[2].noNulls);
        Assert.assertEquals(true, batch.cols[3].noNulls);
        Assert.assertEquals(false, batch.cols[4].noNulls);
        Assert.assertEquals(false, batch.cols[5].noNulls);
        Assert.assertEquals(false, batch.cols[6].noNulls);
        Assert.assertEquals(false, batch.cols[7].noNulls);
        Assert.assertEquals(false, batch.cols[8].noNulls);
        Assert.assertEquals(false, batch.cols[9].noNulls);
    }
    Assert.assertEquals(false, rr.nextBatch(batch));
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Text(org.apache.hadoop.io.Text) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) IntWritable(org.apache.hadoop.io.IntWritable)

Example 13 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class CastDoubleToTimestamp method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        this.evaluateChildren(batch);
    }
    DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum];
    TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn];
    int[] sel = batch.selected;
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    outputColVector.noNulls = inputColVector.noNulls;
    int n = batch.size;
    double[] vector = inputColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    if (inputColVector.isRepeating) {
        //All must be selected otherwise size would be zero
        //Repeating property will not change.
        setDouble(outputColVector, vector, 0);
        // Even if there are no nulls, we always copy over entry 0. Simplifies code.
        outputIsNull[0] = inputIsNull[0];
        outputColVector.isRepeating = true;
    } else if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                setDouble(outputColVector, vector, i);
            }
        } else {
            for (int i = 0; i != n; i++) {
                setDouble(outputColVector, vector, i);
            }
        }
        outputColVector.isRepeating = false;
    } else /* there are nulls */
    {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                setDouble(outputColVector, vector, i);
                outputIsNull[i] = inputIsNull[i];
            }
        } else {
            for (int i = 0; i != n; i++) {
                setDouble(outputColVector, vector, i);
            }
            System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
        }
        outputColVector.isRepeating = false;
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 14 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class VectorizedPrimitiveColumnReader method decodeDictionaryIds.

/**
   * Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
   */
private void decodeDictionaryIds(int rowId, int num, ColumnVector column, LongColumnVector dictionaryIds) {
    System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
    if (column.noNulls) {
        column.noNulls = dictionaryIds.noNulls;
    }
    column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
    switch(descriptor.getType()) {
        case INT32:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.decodeToInt((int) dictionaryIds.vector[i]);
            }
            break;
        case INT64:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.decodeToLong((int) dictionaryIds.vector[i]);
            }
            break;
        case FLOAT:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.decodeToFloat((int) dictionaryIds.vector[i]);
            }
            break;
        case DOUBLE:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.decodeToDouble((int) dictionaryIds.vector[i]);
            }
            break;
        case INT96:
            final Calendar calendar;
            if (Strings.isNullOrEmpty(this.conversionTimeZone)) {
                // Local time should be used if no timezone is specified
                calendar = Calendar.getInstance();
            } else {
                calendar = Calendar.getInstance(TimeZone.getTimeZone(this.conversionTimeZone));
            }
            for (int i = rowId; i < rowId + num; ++i) {
                ByteBuffer buf = dictionary.decodeToBinary((int) dictionaryIds.vector[i]).toByteBuffer();
                buf.order(ByteOrder.LITTLE_ENDIAN);
                long timeOfDayNanos = buf.getLong();
                int julianDay = buf.getInt();
                NanoTime nt = new NanoTime(julianDay, timeOfDayNanos);
                Timestamp ts = NanoTimeUtils.getTimestamp(nt, calendar);
                ((TimestampColumnVector) column).set(i, ts);
            }
            break;
        case BINARY:
        case FIXED_LEN_BYTE_ARRAY:
            if (column instanceof BytesColumnVector) {
                for (int i = rowId; i < rowId + num; ++i) {
                    ((BytesColumnVector) column).setVal(i, dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe());
                }
            } else {
                DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column);
                decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision();
                decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale();
                for (int i = rowId; i < rowId + num; ++i) {
                    decimalColumnVector.vector[i].set(dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe(), decimalColumnVector.scale);
                }
            }
            break;
        default:
            throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType());
    }
}
Also used : NanoTime(org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Calendar(java.util.Calendar) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ByteBuffer(java.nio.ByteBuffer) Timestamp(java.sql.Timestamp) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 15 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class IfExprTimestampColumnColumnBase method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
    TimestampColumnVector arg2ColVector = (TimestampColumnVector) batch.cols[arg2Column];
    TimestampColumnVector arg3ColVector = (TimestampColumnVector) batch.cols[arg3Column];
    TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn];
    int[] sel = batch.selected;
    boolean[] outputIsNull = outputColVector.isNull;
    outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls;
    // may override later
    outputColVector.isRepeating = false;
    int n = batch.size;
    long[] vector1 = arg1ColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    /* All the code paths below propagate nulls even if neither arg2 nor arg3
     * have nulls. This is to reduce the number of code paths and shorten the
     * code, at the expense of maybe doing unnecessary work if neither input
     * has nulls. This could be improved in the future by expanding the number
     * of code paths.
     */
    if (arg1ColVector.isRepeating) {
        if (vector1[0] == 1) {
            arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
        } else {
            arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
        }
        return;
    }
    // extend any repeating values and noNulls indicator in the inputs
    arg2ColVector.flatten(batch.selectedInUse, sel, n);
    arg3ColVector.flatten(batch.selectedInUse, sel, n);
    if (arg1ColVector.noNulls) {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3ColVector.asScratchTimestamp(i));
                outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        } else {
            for (int i = 0; i != n; i++) {
                outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3ColVector.asScratchTimestamp(i));
                outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        }
    } else /* there are nulls */
    {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3ColVector.asScratchTimestamp(i));
                outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        } else {
            for (int i = 0; i != n; i++) {
                outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3ColVector.asScratchTimestamp(i));
                outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        }
    }
    // restore repeating and no nulls indicators
    arg2ColVector.unFlatten();
    arg3ColVector.unFlatten();
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Aggregations

TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)54 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)19 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)17 Timestamp (java.sql.Timestamp)13 Test (org.junit.Test)10 Random (java.util.Random)8 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)8 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)7 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)7 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)6 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)3 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)3 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)3 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)3 BooleanWritable (org.apache.hadoop.io.BooleanWritable)3 IntWritable (org.apache.hadoop.io.IntWritable)3 LongWritable (org.apache.hadoop.io.LongWritable)3 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)2