Search in sources :

Example 81 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorFilterExpressions method testFilterTimestampNotBetween.

@Test
public void testFilterTimestampNotBetween() throws HiveException {
    VectorizedRowBatch vrb = new VectorizedRowBatch(1);
    vrb.cols[0] = new TimestampColumnVector();
    TimestampColumnVector lcv0 = (TimestampColumnVector) vrb.cols[0];
    Timestamp startTS = Timestamp.valueOf("2013-11-05 00:00:00.000000000");
    Timestamp endTS = Timestamp.valueOf("2013-11-05 00:00:00.000000010");
    Timestamp ts0 = Timestamp.valueOf("2013-11-04 00:00:00.000000000");
    lcv0.set(0, ts0);
    Timestamp ts1 = Timestamp.valueOf("2013-11-05 00:00:00.000000002");
    lcv0.set(1, ts1);
    Timestamp ts2 = Timestamp.valueOf("2099-11-06 00:00:00.000");
    lcv0.set(2, ts2);
    vrb.size = 3;
    VectorExpression expr1 = new FilterTimestampColumnNotBetween(0, startTS, endTS);
    expr1.evaluate(vrb);
    assertEquals(2, vrb.size);
    assertEquals(true, vrb.selectedInUse);
    assertEquals(0, vrb.selected[0]);
    assertEquals(2, vrb.selected[1]);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) FilterTimestampColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnNotBetween) Timestamp(java.sql.Timestamp) Test(org.junit.Test)

Example 82 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorFilterExpressions method testFilterLongIn.

/**
 * Test the IN filter VectorExpression classes.
 */
@Test
public void testFilterLongIn() throws HiveException {
    int seed = 17;
    VectorizedRowBatch vrb = VectorizedRowGroupGenUtil.getVectorizedRowBatch(5, 2, seed);
    LongColumnVector lcv0 = (LongColumnVector) vrb.cols[0];
    long[] inList = { 5, 20 };
    FilterLongColumnInList f = new FilterLongColumnInList(0);
    f.setInListValues(inList);
    f.setInputTypeInfos(new TypeInfo[] { TypeInfoFactory.longTypeInfo });
    f.transientInit(new HiveConf());
    VectorExpression expr1 = f;
    // Basic case
    lcv0.vector[0] = 5;
    lcv0.vector[1] = 20;
    lcv0.vector[2] = 17;
    lcv0.vector[3] = 15;
    lcv0.vector[4] = 10;
    expr1.evaluate(vrb);
    assertEquals(2, vrb.size);
    assertTrue(vrb.selectedInUse);
    assertEquals(0, vrb.selected[0]);
    assertEquals(1, vrb.selected[1]);
    // With nulls
    VectorizedRowBatch vrb1 = VectorizedRowGroupGenUtil.getVectorizedRowBatch(5, 2, seed);
    lcv0 = (LongColumnVector) vrb1.cols[0];
    lcv0.vector[0] = 5;
    lcv0.vector[1] = 20;
    lcv0.vector[2] = 17;
    lcv0.vector[3] = 15;
    lcv0.vector[4] = 10;
    lcv0.noNulls = false;
    lcv0.isNull[0] = true;
    lcv0.isNull[2] = true;
    expr1.evaluate(vrb1);
    assertEquals(1, vrb1.size);
    assertTrue(vrb1.selectedInUse);
    assertEquals(1, vrb1.selected[0]);
    // With nulls and selected
    VectorizedRowBatch vrb2 = VectorizedRowGroupGenUtil.getVectorizedRowBatch(7, 2, seed);
    vrb2.selectedInUse = true;
    vrb2.selected[0] = 1;
    vrb2.selected[1] = 2;
    vrb2.selected[2] = 4;
    vrb2.size = 3;
    lcv0 = (LongColumnVector) vrb2.cols[0];
    lcv0.vector[0] = 5;
    lcv0.vector[1] = 20;
    lcv0.vector[2] = 17;
    lcv0.vector[3] = 15;
    lcv0.vector[4] = 10;
    lcv0.vector[5] = 19;
    lcv0.vector[6] = 21;
    lcv0.noNulls = false;
    lcv0.isNull[0] = true;
    lcv0.isNull[2] = true;
    lcv0.isNull[5] = true;
    expr1.evaluate(vrb2);
    assertEquals(1, vrb2.size);
    assertEquals(1, vrb2.selected[0]);
    // Repeating non null
    VectorizedRowBatch vrb3 = VectorizedRowGroupGenUtil.getVectorizedRowBatch(7, 2, seed);
    lcv0 = (LongColumnVector) vrb3.cols[0];
    lcv0.isRepeating = true;
    lcv0.vector[0] = 5;
    lcv0.vector[1] = 20;
    lcv0.vector[2] = 17;
    lcv0.vector[3] = 15;
    lcv0.vector[4] = 10;
    expr1.evaluate(vrb3);
    assertEquals(7, vrb3.size);
    assertFalse(vrb3.selectedInUse);
    assertTrue(lcv0.isRepeating);
    // Repeating null
    lcv0.noNulls = false;
    lcv0.vector[0] = 5;
    lcv0.isNull[0] = true;
    expr1.evaluate(vrb3);
    assertEquals(0, vrb3.size);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveConf(org.apache.hadoop.hive.conf.HiveConf) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Example 83 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorFilterExpressions method testFilterDecimalColumnEqualDecimalColumn.

/**
 * This tests the template for Decimal Column-Column comparison filters,
 * called FilterDecimalColumnCompareColumn.txt. Only equal is tested for multiple
 * cases because the logic is the same for <, >, <=, >=, == and !=.
 */
@Test
public void testFilterDecimalColumnEqualDecimalColumn() throws HiveException {
    VectorizedRowBatch b = getVectorizedRowBatch2DecimalCol();
    VectorExpression expr = new FilterDecimalColEqualDecimalColumn(0, 1);
    expr.evaluate(b);
    // check that right row(s) are selected
    assertTrue(b.selectedInUse);
    assertEquals(1, b.selected[0]);
    assertEquals(1, b.size);
    // try again with a null value
    b = getVectorizedRowBatch2DecimalCol();
    b.cols[0].noNulls = false;
    b.cols[0].isNull[1] = true;
    expr.evaluate(b);
    // verify that no rows were selected
    assertEquals(0, b.size);
    // try the repeating case
    b = getVectorizedRowBatch2DecimalCol();
    b.cols[0].isRepeating = true;
    expr.evaluate(b);
    // verify that no rows were selected
    assertEquals(0, b.size);
    // try the repeating null case
    b = getVectorizedRowBatch2DecimalCol();
    b.cols[0].isRepeating = true;
    b.cols[0].noNulls = false;
    b.cols[0].isNull[0] = true;
    expr.evaluate(b);
    // verify that no rows were selected
    assertEquals(0, b.size);
    // try nulls on both sides
    b = getVectorizedRowBatch2DecimalCol();
    b.cols[0].noNulls = false;
    b.cols[0].isNull[0] = true;
    b.cols[1].noNulls = false;
    b.cols[1].isNull[2] = true;
    expr.evaluate(b);
    // second of three was selected
    assertEquals(1, b.size);
    // try repeating on both sides
    b = getVectorizedRowBatch2DecimalCol();
    b.cols[0].isRepeating = true;
    b.cols[1].isRepeating = true;
    expr.evaluate(b);
    // verify that no rows were selected
    assertEquals(0, b.size);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) FilterDecimalColEqualDecimalColumn(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColEqualDecimalColumn) Test(org.junit.Test)

Example 84 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorFilterExpressions method testColOpScalarNumericFilterNullAndRepeatingLogic.

@Test
public void testColOpScalarNumericFilterNullAndRepeatingLogic() throws HiveException {
    // No nulls, not repeating
    FilterLongColGreaterLongScalar f = new FilterLongColGreaterLongScalar(0, 1);
    VectorizedRowBatch batch = this.getSimpleLongBatch();
    batch.cols[0].noNulls = true;
    batch.cols[0].isRepeating = false;
    f.evaluate(batch);
    // only last 2 rows qualify
    Assert.assertEquals(2, batch.size);
    // show that their positions are recorded
    Assert.assertTrue(batch.selectedInUse);
    Assert.assertEquals(2, batch.selected[0]);
    Assert.assertEquals(3, batch.selected[1]);
    // make everything qualify and ensure selected is not in use
    // col > -1
    f = new FilterLongColGreaterLongScalar(0, -1);
    batch = getSimpleLongBatch();
    f.evaluate(batch);
    Assert.assertFalse(batch.selectedInUse);
    Assert.assertEquals(4, batch.size);
    // has nulls, not repeating
    batch = getSimpleLongBatch();
    // col > 1
    f = new FilterLongColGreaterLongScalar(0, 1);
    batch.cols[0].noNulls = false;
    batch.cols[0].isRepeating = false;
    batch.cols[0].isNull[3] = true;
    f.evaluate(batch);
    Assert.assertTrue(batch.selectedInUse);
    Assert.assertEquals(1, batch.size);
    Assert.assertEquals(2, batch.selected[0]);
    // no nulls, is repeating
    batch = getSimpleLongBatch();
    // col > -1
    f = new FilterLongColGreaterLongScalar(0, -1);
    batch.cols[0].noNulls = true;
    batch.cols[0].isRepeating = true;
    f.evaluate(batch);
    Assert.assertFalse(batch.selectedInUse);
    // everything qualifies (4 rows, all with value -1)
    Assert.assertEquals(4, batch.size);
    // has nulls, is repeating
    batch = getSimpleLongBatch();
    batch.cols[0].noNulls = false;
    batch.cols[0].isRepeating = true;
    batch.cols[0].isNull[0] = true;
    f.evaluate(batch);
    // all values are null so none qualify
    Assert.assertEquals(0, batch.size);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) FilterLongColGreaterLongScalar(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongScalar) Test(org.junit.Test)

Example 85 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorFilterExpressions method getVectorizedRowBatch2DecimalCol.

private VectorizedRowBatch getVectorizedRowBatch2DecimalCol() {
    VectorizedRowBatch b = new VectorizedRowBatch(2);
    DecimalColumnVector v0, v1;
    b.cols[0] = v0 = new DecimalColumnVector(18, 2);
    v0.vector[0].set(HiveDecimal.create("1.20"));
    v0.vector[1].set(HiveDecimal.create("-3.30"));
    v0.vector[2].set(HiveDecimal.create("0"));
    b.cols[1] = v1 = new DecimalColumnVector(18, 2);
    v1.vector[0].set(HiveDecimal.create("-1.00"));
    v1.vector[1].set(HiveDecimal.create("-3.30"));
    v1.vector[2].set(HiveDecimal.create("10.00"));
    b.size = 3;
    return b;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)

Aggregations

VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)401 Test (org.junit.Test)214 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)157 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)98 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)83 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)64 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)40 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)32 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)30 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)28 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)26 Configuration (org.apache.hadoop.conf.Configuration)23 IOException (java.io.IOException)20 HiveConf (org.apache.hadoop.hive.conf.HiveConf)20 VectorExtractRow (org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)19 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)18 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)18 Timestamp (java.sql.Timestamp)17 VectorUDFAdaptor (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor)16 VectorizedRowBatchCtx (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx)15