Search in sources :

Example 66 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorDateExpressions method getVectorizedRandomRowBatch.

private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size) {
    VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
    LongColumnVector lcv = new LongColumnVector(size);
    Random rand = new Random(seed);
    for (int i = 0; i < size; i++) {
        lcv.vector[i] = (rand.nextInt());
    }
    batch.cols[0] = lcv;
    batch.cols[1] = new LongColumnVector(size);
    batch.size = size;
    return batch;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) Random(java.util.Random) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 67 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorConditionalExpressions method testIfExprStringColumnStringColumn.

@Test
public void testIfExprStringColumnStringColumn() throws HiveException {
    VectorizedRowBatch batch = getBatch1Long3BytesVectors();
    VectorExpression expr = new IfExprStringGroupColumnStringGroupColumn(0, 1, 2, 3);
    BytesColumnVector r = (BytesColumnVector) batch.cols[3];
    expr.evaluate(batch);
    assertTrue(getString(r, 0).equals("arg3_0"));
    assertTrue(getString(r, 1).equals("arg3_1"));
    assertTrue(getString(r, 2).equals("arg2_2"));
    assertTrue(getString(r, 3).equals("arg2_3"));
    // test first IF argument repeating
    batch = getBatch1Long3BytesVectors();
    batch.cols[0].isRepeating = true;
    r = (BytesColumnVector) batch.cols[3];
    expr.evaluate(batch);
    assertTrue(getString(r, 0).equals("arg3_0"));
    assertTrue(getString(r, 1).equals("arg3_1"));
    assertTrue(getString(r, 2).equals("arg3_2"));
    assertTrue(getString(r, 3).equals("arg3_3"));
    // test second IF argument repeating
    batch = getBatch1Long3BytesVectors();
    batch.cols[1].isRepeating = true;
    r = (BytesColumnVector) batch.cols[3];
    expr.evaluate(batch);
    assertTrue(getString(r, 0).equals("arg3_0"));
    assertTrue(getString(r, 1).equals("arg3_1"));
    assertTrue(getString(r, 2).equals("arg2_0"));
    assertTrue(getString(r, 3).equals("arg2_0"));
    // test third IF argument repeating
    batch = getBatch1Long3BytesVectors();
    batch.cols[2].isRepeating = true;
    r = (BytesColumnVector) batch.cols[3];
    expr.evaluate(batch);
    assertTrue(getString(r, 0).equals("arg3_0"));
    assertTrue(getString(r, 1).equals("arg3_0"));
    assertTrue(getString(r, 2).equals("arg2_2"));
    assertTrue(getString(r, 3).equals("arg2_3"));
    // test second IF argument with nulls
    batch = getBatch1Long3BytesVectors();
    batch.cols[1].noNulls = false;
    batch.cols[1].isNull[2] = true;
    // set vector[2] to null to verify correct null handling
    ((BytesColumnVector) batch.cols[1]).vector[2] = null;
    r = (BytesColumnVector) batch.cols[3];
    expr.evaluate(batch);
    assertTrue(getString(r, 0).equals("arg3_0"));
    assertTrue(getString(r, 1).equals("arg3_1"));
    assertTrue(!r.noNulls && r.isNull[2]);
    assertTrue(getString(r, 3).equals("arg2_3"));
    assertFalse(r.isNull[0] || r.isNull[1] || r.isNull[3]);
    // test third IF argument with nulls
    batch = getBatch1Long3BytesVectors();
    batch.cols[2].noNulls = false;
    batch.cols[2].isNull[0] = true;
    // set vector[0] to null object reference to verify correct null handling
    ((BytesColumnVector) batch.cols[2]).vector[0] = null;
    r = (BytesColumnVector) batch.cols[3];
    expr.evaluate(batch);
    assertTrue(!r.noNulls && r.isNull[0]);
    assertTrue(getString(r, 1).equals("arg3_1"));
    assertTrue(getString(r, 2).equals("arg2_2"));
    assertTrue(getString(r, 3).equals("arg2_3"));
    assertFalse(r.isNull[1] || r.isNull[2] || r.isNull[3]);
    // test second IF argument with nulls and repeating
    batch = getBatch1Long3BytesVectors();
    batch.cols[1].noNulls = false;
    batch.cols[1].isNull[0] = true;
    batch.cols[1].isRepeating = true;
    r = (BytesColumnVector) batch.cols[3];
    expr.evaluate(batch);
    assertTrue(getString(r, 0).equals("arg3_0"));
    assertTrue(getString(r, 1).equals("arg3_1"));
    assertTrue(!r.noNulls && r.isNull[2]);
    assertTrue(!r.noNulls && r.isNull[3]);
    assertFalse(r.isNull[0] || r.isNull[1]);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) IfExprStringGroupColumnStringGroupColumn(org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn) Test(org.junit.Test)

Example 68 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorConditionalExpressions method testDoubleColumnScalarIfExpr.

@Test
public void testDoubleColumnScalarIfExpr() throws HiveException {
    VectorizedRowBatch batch = getBatch1Long3DoubleVectors();
    VectorExpression expr = new IfExprDoubleColumnDoubleScalar(0, 1, 200d, 3);
    DoubleColumnVector r = (DoubleColumnVector) batch.cols[3];
    expr.evaluate(batch);
    assertEquals(true, 200d == r.vector[0]);
    assertEquals(true, 200d == r.vector[1]);
    assertEquals(true, -3d == r.vector[2]);
    assertEquals(true, -4d == r.vector[3]);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) IfExprDoubleColumnDoubleScalar(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleScalar) Test(org.junit.Test)

Example 69 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorConditionalExpressions method testDoubleScalarColumnIfExpr.

@Test
public void testDoubleScalarColumnIfExpr() throws HiveException {
    VectorizedRowBatch batch = getBatch1Long3DoubleVectors();
    VectorExpression expr = new IfExprDoubleScalarDoubleColumn(0, 100.0d, 2, 3);
    DoubleColumnVector r = (DoubleColumnVector) batch.cols[3];
    expr.evaluate(batch);
    assertEquals(true, 1d == r.vector[0]);
    assertEquals(true, 2d == r.vector[1]);
    assertEquals(true, 100d == r.vector[2]);
    assertEquals(true, 100d == r.vector[3]);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) IfExprDoubleScalarDoubleColumn(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleColumn) Test(org.junit.Test)

Example 70 with VectorizedRowBatch

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch in project hive by apache.

the class TestVectorDateDiff method doVectorDateAddSubTest.

private void doVectorDateAddSubTest(TypeInfo dateTimeStringTypeInfo1, TypeInfo dateTimeStringTypeInfo2, List<String> columns, TypeInfo[] typeInfos, List<ExprNodeDesc> children, ExprNodeGenericFuncDesc exprDesc, DateDiffTestMode dateDiffTestMode, ColumnScalarMode columnScalarMode, VectorRandomBatchSource batchSource, VectorizedRowBatchCtx batchContext, Object[] resultObjects) throws Exception {
    HiveConf hiveConf = new HiveConf();
    if (dateDiffTestMode == DateDiffTestMode.ADAPTOR) {
        hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
    }
    DataTypePhysicalVariation[] dataTypePhysicalVariations = new DataTypePhysicalVariation[2];
    Arrays.fill(dataTypePhysicalVariations, DataTypePhysicalVariation.NONE);
    VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
    VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc);
    vectorExpression.transientInit(hiveConf);
    if (dateDiffTestMode == DateDiffTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
        System.out.println("*NO NATIVE VECTOR EXPRESSION* dateTimeStringTypeInfo1 " + dateTimeStringTypeInfo1.toString() + " dateTimeStringTypeInfo2 " + dateTimeStringTypeInfo2.toString() + " dateDiffTestMode " + dateDiffTestMode + " columnScalarMode " + columnScalarMode + " vectorExpression " + vectorExpression.toString());
    }
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
    resultVectorExtractRow.init(new TypeInfo[] { TypeInfoFactory.intTypeInfo }, new int[] { columns.size() });
    Object[] scrqtchRow = new Object[1];
    // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
    /*
    System.out.println(
        "*DEBUG* dateTimeStringTypeInfo1 " + dateTimeStringTypeInfo1.toString() +
        " dateTimeStringTypeInfo2 " + dateTimeStringTypeInfo2.toString() +
        " dateDiffTestMode " + dateDiffTestMode +
        " columnScalarMode " + columnScalarMode +
        " vectorExpression " + vectorExpression.toString());
    */
    batchSource.resetBatchIteration();
    int rowIndex = 0;
    while (true) {
        if (!batchSource.fillNextBatch(batch)) {
            break;
        }
        vectorExpression.evaluate(batch);
        extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, resultObjects);
        rowIndex += batch.size;
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)

Aggregations

VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)401 Test (org.junit.Test)214 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)157 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)98 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)83 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)64 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)40 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)32 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)30 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)28 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)26 Configuration (org.apache.hadoop.conf.Configuration)23 IOException (java.io.IOException)20 HiveConf (org.apache.hadoop.hive.conf.HiveConf)20 VectorExtractRow (org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)19 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)18 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)18 Timestamp (java.sql.Timestamp)17 VectorUDFAdaptor (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor)16 VectorizedRowBatchCtx (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx)15