Search in sources :

Example 11 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorDateExpressions method testVectorUDFUnixTimeStamp.

@Test
public void testVectorUDFUnixTimeStamp() {
    VectorizedRowBatch batch = getVectorizedRowBatch(new int[] { 0 }, VectorizedRowBatch.DEFAULT_SIZE);
    Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
    Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
    verifyUDFUnixTimeStamp(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFUnixTimeStamp(batch);
    int[] boundaries = getAllBoundaries();
    batch = getVectorizedRowBatch(boundaries, boundaries.length);
    verifyUDFUnixTimeStamp(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFUnixTimeStamp(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
    verifyUDFUnixTimeStamp(batch);
    batch = getVectorizedRowBatch(new int[] { 0 }, 1);
    batch.cols[0].isRepeating = true;
    verifyUDFUnixTimeStamp(batch);
    batch.cols[0].noNulls = false;
    batch.cols[0].isNull[0] = true;
    verifyUDFUnixTimeStamp(batch);
    batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
    verifyUDFUnixTimeStamp(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFUnixTimeStamp(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
    verifyUDFUnixTimeStamp(batch);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) Test(org.junit.Test)

Example 12 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorArithmeticExpressions method testDecimalScalarModuloDecimalColumn.

// Spot check decimal scalar-column modulo
@Test
public void testDecimalScalarModuloDecimalColumn() {
    VectorizedRowBatch b = getVectorizedRowBatch3DecimalCols();
    HiveDecimal d = HiveDecimal.create("2.00");
    VectorExpression expr = new DecimalScalarModuloDecimalColumn(d, 0, 2);
    // test without nulls
    expr.evaluate(b);
    DecimalColumnVector r = (DecimalColumnVector) b.cols[2];
    assertTrue(r.vector[0].getHiveDecimal().equals(HiveDecimal.create("0.80")));
    assertTrue(r.vector[1].getHiveDecimal().equals(HiveDecimal.create("2.00")));
    // entry 2 will be null due to zero-divide
    assertFalse(r.noNulls);
    assertTrue(r.isNull[2]);
    // try again with some different data values
    DecimalColumnVector in = (DecimalColumnVector) b.cols[0];
    expr = new DecimalScalarModuloDecimalColumn(d, 0, 2);
    in.vector[0].set(HiveDecimal.create("0.50"));
    in.vector[1].set(HiveDecimal.create("0.80"));
    in.vector[2].set(HiveDecimal.create("0.70"));
    expr.evaluate(b);
    assertTrue(r.vector[0].getHiveDecimal().equals(HiveDecimal.create("0.00")));
    assertTrue(r.vector[1].getHiveDecimal().equals(HiveDecimal.create("0.40")));
    assertTrue(r.vector[2].getHiveDecimal().equals(HiveDecimal.create("0.60")));
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) DecimalScalarModuloDecimalColumn(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalScalarModuloDecimalColumn) Test(org.junit.Test)

Example 13 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorArithmeticExpressions method testDecimalColMultiplyDecimalColumn.

// Spot check decimal column-column multiply
@Test
public void testDecimalColMultiplyDecimalColumn() {
    VectorizedRowBatch b = getVectorizedRowBatch3DecimalCols();
    VectorExpression expr = new DecimalColMultiplyDecimalColumn(0, 1, 2);
    DecimalColumnVector r = (DecimalColumnVector) b.cols[2];
    // test without nulls
    expr.evaluate(b);
    assertTrue(r.vector[0].getHiveDecimal().equals(HiveDecimal.create("1.20")));
    assertTrue(r.vector[1].getHiveDecimal().equals(HiveDecimal.create("-3.30")));
    assertTrue(r.vector[2].getHiveDecimal().equals(HiveDecimal.create("0.00")));
    // test that underflow produces NULL
    b = getVectorizedRowBatch3DecimalCols();
    DecimalColumnVector c0 = (DecimalColumnVector) b.cols[0];
    // set to max possible value
    c0.vector[0].set(HiveDecimal.create("9999999999999999.99"));
    DecimalColumnVector c1 = (DecimalColumnVector) b.cols[1];
    c1.vector[0].set(HiveDecimal.create("2.00"));
    r = (DecimalColumnVector) b.cols[2];
    // will cause overflow for result at position 0, must yield NULL
    expr.evaluate(b);
    assertTrue(!r.noNulls && r.isNull[0]);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DecimalColMultiplyDecimalColumn(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColMultiplyDecimalColumn) Test(org.junit.Test)

Example 14 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorArithmeticExpressions method testLongColAddLongColumn.

@Test
public void testLongColAddLongColumn() {
    int seed = 17;
    VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(VectorizedRowBatch.DEFAULT_SIZE, 6, seed);
    LongColumnVector lcv0 = (LongColumnVector) vrg.cols[0];
    LongColumnVector lcv1 = (LongColumnVector) vrg.cols[1];
    LongColumnVector lcv2 = (LongColumnVector) vrg.cols[2];
    LongColumnVector lcv3 = (LongColumnVector) vrg.cols[3];
    LongColumnVector lcv4 = (LongColumnVector) vrg.cols[4];
    LongColumnVector lcv5 = (LongColumnVector) vrg.cols[5];
    LongColAddLongColumn expr = new LongColAddLongColumn(0, 1, 2);
    expr.evaluate(vrg);
    for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
        assertEquals((i + 1) * seed * 3, lcv2.vector[i]);
    }
    assertTrue(lcv2.noNulls);
    // Now set one column nullable
    lcv1.noNulls = false;
    lcv1.isNull[1] = true;
    // set output isRepeating to true to make sure it gets over-written
    lcv2.isRepeating = true;
    // similarly with noNulls
    lcv2.noNulls = true;
    expr.evaluate(vrg);
    assertTrue(lcv2.isNull[1]);
    assertFalse(lcv2.noNulls);
    assertFalse(lcv2.isRepeating);
    verifyLongNullDataVectorEntries(lcv2, vrg.selected, vrg.selectedInUse, vrg.size);
    // Now set other column nullable too
    lcv0.noNulls = false;
    lcv0.isNull[1] = true;
    lcv0.isNull[3] = true;
    expr.evaluate(vrg);
    assertTrue(lcv2.isNull[1]);
    assertTrue(lcv2.isNull[3]);
    assertFalse(lcv2.noNulls);
    verifyLongNullDataVectorEntries(lcv2, vrg.selected, vrg.selectedInUse, vrg.size);
    // Now test with repeating flag
    lcv3.isRepeating = true;
    LongColAddLongColumn expr2 = new LongColAddLongColumn(3, 4, 5);
    expr2.evaluate(vrg);
    for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
        assertEquals(seed * (4 + 5 * (i + 1)), lcv5.vector[i]);
    }
    // Repeating with other as nullable
    lcv4.noNulls = false;
    lcv4.isNull[0] = true;
    expr2.evaluate(vrg);
    assertTrue(lcv5.isNull[0]);
    assertFalse(lcv5.noNulls);
    verifyLongNullDataVectorEntries(lcv5, vrg.selected, vrg.selectedInUse, vrg.size);
    // Repeating null value
    lcv3.isRepeating = true;
    lcv3.noNulls = false;
    lcv3.isNull[0] = true;
    expr2.evaluate(vrg);
    assertFalse(lcv5.noNulls);
    assertTrue(lcv5.isRepeating);
    assertTrue(lcv5.isNull[0]);
    verifyLongNullDataVectorEntries(lcv5, vrg.selected, vrg.selectedInUse, vrg.size);
    // Neither input has nulls. Verify that this propagates to output.
    vrg.selectedInUse = false;
    lcv0.noNulls = true;
    lcv1.noNulls = true;
    lcv0.isRepeating = false;
    lcv1.isRepeating = false;
    // set output noNulls to true to make sure it gets over-written
    lcv2.noNulls = false;
    // similarly with isRepeating
    lcv2.isRepeating = true;
    expr.evaluate(vrg);
    assertTrue(lcv2.noNulls);
    assertFalse(lcv2.isRepeating);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) LongColAddLongColumn(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Example 15 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorArithmeticExpressions method testDecimalColMultiplyDecimalScalar.

/* Spot check correctness of decimal column multiply decimal scalar. The case for
   * addition checks all the cases for the template, so don't do that redundantly here.
   */
@Test
public void testDecimalColMultiplyDecimalScalar() {
    VectorizedRowBatch b = getVectorizedRowBatch3DecimalCols();
    HiveDecimal d = HiveDecimal.create(2);
    VectorExpression expr = new DecimalColMultiplyDecimalScalar(0, d, 2);
    // test without nulls
    expr.evaluate(b);
    DecimalColumnVector r = (DecimalColumnVector) b.cols[2];
    assertTrue(r.vector[0].getHiveDecimal().equals(HiveDecimal.create("2.40")));
    assertTrue(r.vector[1].getHiveDecimal().equals(HiveDecimal.create("-6.60")));
    assertTrue(r.vector[2].getHiveDecimal().equals(HiveDecimal.create("0")));
    // test that overflow produces null
    b = getVectorizedRowBatch3DecimalCols();
    DecimalColumnVector in = (DecimalColumnVector) b.cols[0];
    // set to max possible value
    in.vector[0].set(HiveDecimal.create("9999999999999999.99"));
    expr.evaluate(b);
    r = (DecimalColumnVector) b.cols[2];
    assertFalse(r.noNulls);
    assertTrue(r.isNull[0]);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DecimalColMultiplyDecimalScalar(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColMultiplyDecimalScalar) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Test(org.junit.Test)

Aggregations

VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)280 Test (org.junit.Test)182 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)118 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)69 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)68 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)56 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)39 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)22 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)20 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)18 IOException (java.io.IOException)16 Timestamp (java.sql.Timestamp)16 Configuration (org.apache.hadoop.conf.Configuration)13 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)13 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)12 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)11 Random (java.util.Random)9 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)8 Path (org.apache.hadoop.fs.Path)6 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)6