Examples with BytesColumnVector - org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector

Example 16 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorMathFunctions method testVectorBin.

@Test
public void testVectorBin() {
    // test conversion of long->string
    VectorizedRowBatch b = getBatchForStringMath();
    BytesColumnVector resultV = (BytesColumnVector) b.cols[2];
    b.cols[0].noNulls = true;
    VectorExpression expr = new FuncBin(1, 2);
    expr.evaluate(b);
    String s = new String(resultV.vector[1], resultV.start[1], resultV.length[1]);
    Assert.assertEquals("11111111", s);
}

Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Test(org.junit.Test)

Example 17 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorGenericDateExpressions method testDateAddColScalar.

@Test
public void testDateAddColScalar() {
    for (VectorExpression.Type colType1 : dateTimestampStringTypes) testDateAddColScalar(colType1, true);
    VectorExpression udf = new VectorUDFDateAddColScalar(0, 0, 1);
    udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP);
    VectorizedRowBatch batch = new VectorizedRowBatch(2, 1);
    batch.cols[0] = new BytesColumnVector(1);
    batch.cols[1] = new LongColumnVector(1);
    BytesColumnVector bcv = (BytesColumnVector) batch.cols[0];
    byte[] bytes = "error".getBytes(utf8);
    bcv.vector[0] = bytes;
    bcv.start[0] = 0;
    bcv.length[0] = bytes.length;
    udf.evaluate(batch);
    Assert.assertEquals(batch.cols[1].isNull[0], true);
}

Example 18 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorGenericDateExpressions method testDateDiffScalarCol.

@Test
public void testDateDiffScalarCol() {
    for (VectorExpression.Type scalarType1 : dateTimestampStringTypes) {
        for (VectorExpression.Type colType2 : dateTimestampStringTypes) {
            LongColumnVector date2 = newRandomLongColumnVector(10000, size);
            LongColumnVector output = new LongColumnVector(size);
            ColumnVector col2 = castTo(date2, colType2);
            VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
            batch.cols[0] = col2;
            batch.cols[1] = output;
            long scalar1 = newRandom(1000);
            validateDateDiff(batch, scalar1, scalarType1, colType2, date2);
            TestVectorizedRowBatch.addRandomNulls(date2);
            batch.cols[0] = castTo(date2, colType2);
            validateDateDiff(batch, scalar1, scalarType1, colType2, date2);
        }
    }
    VectorExpression udf;
    byte[] bytes = "error".getBytes(utf8);
    VectorizedRowBatch batch = new VectorizedRowBatch(2, 1);
    udf = new VectorUDFDateDiffScalarCol(new Timestamp(0), 0, 1);
    udf.setInputTypes(VectorExpression.Type.TIMESTAMP, VectorExpression.Type.STRING);
    batch.cols[0] = new BytesColumnVector(1);
    batch.cols[1] = new LongColumnVector(1);
    BytesColumnVector bcv = (BytesColumnVector) batch.cols[0];
    bcv.vector[0] = bytes;
    bcv.start[0] = 0;
    bcv.length[0] = bytes.length;
    udf.evaluate(batch);
    Assert.assertEquals(batch.cols[1].isNull[0], true);
    udf = new VectorUDFDateDiffScalarCol(bytes, 0, 1);
    udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP);
    batch.cols[0] = new LongColumnVector(1);
    batch.cols[1] = new LongColumnVector(1);
    udf.evaluate(batch);
    Assert.assertEquals(batch.cols[1].isNull[0], true);
}

Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Timestamp(java.sql.Timestamp) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) Test(org.junit.Test)

Example 19 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorStringExpressions method testVarCharScalarConcatCol.

@Test
public void testVarCharScalarConcatCol() {
    // has nulls, not repeating
    VectorizedRowBatch batch = makeStringBatch();
    VarCharScalarConcatStringGroupCol expr = new VarCharScalarConcatStringGroupCol(new HiveVarchar(new String(red), 14), 0, 1);
    expr.evaluate(batch);
    BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
    int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.isNull[2]);
    int cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1], outCol.start[1], outCol.length[1]);
    Assert.assertEquals(0, cmp2);
    Assert.assertFalse(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    // no nulls, not repeating
    batch = makeStringBatch();
    batch.cols[0].noNulls = true;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1], outCol.start[1], outCol.length[1]);
    Assert.assertEquals(0, cmp2);
    int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2], outCol.start[2], outCol.length[2]);
    Assert.assertEquals(0, cmp3);
    Assert.assertTrue(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    // has nulls, is repeating
    batch = makeStringBatch();
    batch.cols[0].isRepeating = true;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.isRepeating);
    Assert.assertFalse(outCol.noNulls);
    // no nulls, is repeating
    batch = makeStringBatch();
    batch.cols[0].isRepeating = true;
    batch.cols[0].noNulls = true;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.isRepeating);
    Assert.assertTrue(outCol.noNulls);
}

Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) Test(org.junit.Test)

Example 20 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorStringExpressions method testStringLikeRandomized.

@Test
public void testStringLikeRandomized() throws HiveException, UnsupportedEncodingException {
    final String[] patterns = new String[] { "ABC%", "%ABC", "%ABC%", "ABC%DEF", "ABC%DEF%", "%ABC%DEF", "%ABC%DEF%", "ABC%DEF%EFG", "%ABC%DEF%EFG", "%ABC%DEF%EFG%H" };
    long positive = 0;
    long negative = 0;
    Random control = new Random(1234);
    UDFLike udf = new UDFLike();
    for (String pattern : patterns) {
        VectorExpression expr = new FilterStringColLikeStringScalar(0, pattern.getBytes("utf-8"));
        VectorizedRowBatch batch = VectorizedRowGroupGenUtil.getVectorizedRowBatch(1, 1, 1);
        batch.cols[0] = new BytesColumnVector(1);
        BytesColumnVector bcv = (BytesColumnVector) batch.cols[0];
        Text pText = new Text(pattern);
        for (int i = 0; i < 1024; i++) {
            String input = generateCandidate(control, pattern);
            BooleanWritable like = udf.evaluate(new Text(input), pText);
            batch.reset();
            bcv.initBuffer();
            byte[] utf8 = input.getBytes("utf-8");
            bcv.setVal(0, utf8, 0, utf8.length);
            bcv.noNulls = true;
            batch.size = 1;
            expr.evaluate(batch);
            if (like.get()) {
                positive++;
            } else {
                negative++;
            }
            assertEquals(String.format("Checking '%s' against '%s'", input, pattern), like.get(), (batch.size != 0));
        }
    }
    LOG.info(String.format("Randomized testing: ran %d positive tests and %d negative tests", positive, negative));
}

Also used : Text(org.apache.hadoop.io.Text) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Random(java.util.Random) BooleanWritable(org.apache.hadoop.io.BooleanWritable) UDFLike(org.apache.hadoop.hive.ql.udf.UDFLike) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Test(org.junit.Test)

Aggregations

BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)124 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)66 Test (org.junit.Test)50 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)44 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)12 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)10 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)8 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Text (org.apache.hadoop.io.Text)8 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)6 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 TypeDescription (org.apache.orc.TypeDescription)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 ParseException (java.text.ParseException)3 Random (java.util.Random)3