Search in sources :

Example 76 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorStringExpressions method testColLower.

@Test
public void testColLower() {
    // has nulls, not repeating
    VectorizedRowBatch batch = makeStringBatchMixedCase();
    StringLower expr = new StringLower(0, 1);
    expr.evaluate(batch);
    BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
    int cmp = StringExpr.compare(mixedUpLower, 0, mixedUpLower.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.isNull[2]);
    int cmp2 = StringExpr.compare(green, 0, green.length, outCol.vector[1], outCol.start[1], outCol.length[1]);
    Assert.assertEquals(0, cmp2);
    // no nulls, not repeating
    batch = makeStringBatchMixedCase();
    batch.cols[0].noNulls = true;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    cmp = StringExpr.compare(mixedUpLower, 0, mixedUpLower.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.noNulls);
    // has nulls, is repeating
    batch = makeStringBatchMixedCase();
    batch.cols[0].isRepeating = true;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    cmp = StringExpr.compare(mixedUpLower, 0, mixedUpLower.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.isRepeating);
    Assert.assertFalse(outCol.noNulls);
    // no nulls, is repeating
    batch = makeStringBatchMixedCase();
    batch.cols[0].isRepeating = true;
    batch.cols[0].noNulls = true;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    cmp = StringExpr.compare(mixedUpLower, 0, mixedUpLower.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.isRepeating);
    Assert.assertTrue(outCol.noNulls);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Test(org.junit.Test)

Example 77 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorStringExpressions method testColUpper.

@Test
public void testColUpper() {
    // no nulls, not repeating
    /* We don't test all the combinations because (at least currently)
     * the logic is inherited to be the same as testColLower, which checks all the cases).
     */
    VectorizedRowBatch batch = makeStringBatchMixedCase();
    StringUpper expr = new StringUpper(0, 1);
    batch.cols[0].noNulls = true;
    expr.evaluate(batch);
    BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
    int cmp = StringExpr.compare(mixedUpUpper, 0, mixedUpUpper.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.noNulls);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Test(org.junit.Test)

Example 78 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorStringExpressions method testStringScalarConcatCol.

@Test
public void testStringScalarConcatCol() {
    // has nulls, not repeating
    VectorizedRowBatch batch = makeStringBatch();
    StringScalarConcatStringGroupCol expr = new StringScalarConcatStringGroupCol(red, 0, 1);
    expr.evaluate(batch);
    BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
    int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.isNull[2]);
    int cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1], outCol.start[1], outCol.length[1]);
    Assert.assertEquals(0, cmp2);
    Assert.assertFalse(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    // no nulls, not repeating
    batch = makeStringBatch();
    batch.cols[0].noNulls = true;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1], outCol.start[1], outCol.length[1]);
    Assert.assertEquals(0, cmp2);
    int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2], outCol.start[2], outCol.length[2]);
    Assert.assertEquals(0, cmp3);
    Assert.assertTrue(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    // has nulls, is repeating
    batch = makeStringBatch();
    batch.cols[0].isRepeating = true;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.isRepeating);
    Assert.assertFalse(outCol.noNulls);
    // no nulls, is repeating
    batch = makeStringBatch();
    batch.cols[0].isRepeating = true;
    batch.cols[0].noNulls = true;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
    Assert.assertEquals(0, cmp);
    Assert.assertTrue(outCol.isRepeating);
    Assert.assertTrue(outCol.noNulls);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Test(org.junit.Test)

Example 79 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorStringExpressions method testSubstrStart.

@Test
public void testSubstrStart() throws UnsupportedEncodingException {
    // Testing no nulls and no repeating
    VectorizedRowBatch batch = new VectorizedRowBatch(2);
    BytesColumnVector v = new BytesColumnVector();
    batch.cols[0] = v;
    BytesColumnVector outV = new BytesColumnVector();
    batch.cols[1] = outV;
    byte[] data1 = "abcd string".getBytes("UTF-8");
    byte[] data2 = "efgh string".getBytes("UTF-8");
    byte[] data3 = "efgh".getBytes("UTF-8");
    batch.size = 3;
    v.noNulls = true;
    v.setRef(0, data1, 0, data1.length);
    v.isNull[0] = false;
    v.setRef(1, data2, 0, data2.length);
    v.isNull[1] = false;
    v.setRef(2, data3, 0, data3.length);
    v.isNull[2] = false;
    StringSubstrColStart expr = new StringSubstrColStart(0, 6, 1);
    expr.evaluate(batch);
    BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
    Assert.assertEquals(3, batch.size);
    Assert.assertTrue(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    byte[] expected = "string".getBytes("UTF-8");
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
    // This yields empty because starting idx is out of bounds.
    Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
    outCol.noNulls = false;
    outCol.isRepeating = true;
    // Testing negative substring index.
    // Start index -6 should yield the last 6 characters of the string
    expr = new StringSubstrColStart(0, -6, 1);
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    Assert.assertEquals(3, batch.size);
    Assert.assertTrue(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
    Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
    outCol.noNulls = false;
    outCol.isRepeating = true;
    // Testing substring starting from index 1
    expr = new StringSubstrColStart(0, 1, 1);
    expr.evaluate(batch);
    Assert.assertEquals(3, batch.size);
    Assert.assertTrue(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    Assert.assertEquals(0, StringExpr.compare(data1, 0, data1.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
    Assert.assertEquals(0, StringExpr.compare(data2, 0, data2.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
    Assert.assertEquals(0, StringExpr.compare(data3, 0, data3.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
    outV.noNulls = false;
    outV.isRepeating = true;
    // Testing with nulls
    expr = new StringSubstrColStart(0, 6, 1);
    v.noNulls = false;
    v.isNull[0] = true;
    expr.evaluate(batch);
    Assert.assertEquals(3, batch.size);
    Assert.assertFalse(outV.noNulls);
    Assert.assertTrue(outV.isNull[0]);
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
    Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
    outCol.noNulls = false;
    outCol.isRepeating = false;
    // Testing with repeating and no nulls
    outV = new BytesColumnVector();
    v = new BytesColumnVector();
    v.isRepeating = true;
    v.noNulls = true;
    v.setRef(0, data1, 0, data1.length);
    batch = new VectorizedRowBatch(2);
    batch.cols[0] = v;
    batch.cols[1] = outV;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    expected = "string".getBytes("UTF-8");
    Assert.assertTrue(outV.isRepeating);
    Assert.assertTrue(outV.noNulls);
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
    // Testing multiByte string substring
    v = new BytesColumnVector();
    v.isRepeating = false;
    v.noNulls = true;
    v.setRef(0, multiByte, 0, 10);
    batch.cols[0] = v;
    batch.cols[1] = outV;
    outV.isRepeating = true;
    outV.noNulls = false;
    expr = new StringSubstrColStart(0, 3, 1);
    batch.size = 1;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    Assert.assertFalse(outV.isRepeating);
    Assert.assertTrue(outV.noNulls);
    Assert.assertEquals(0, StringExpr.compare(// 3nd char starts from index 3 and total length should be 7 bytes as max is 10
    multiByte, 3, 10 - 3, outCol.vector[0], outCol.start[0], outCol.length[0]));
    // Testing multiByte string with reference starting mid array
    v = new BytesColumnVector();
    v.isRepeating = false;
    v.noNulls = true;
    // string is 2 chars long (a 3 byte and a 4 byte char)
    v.setRef(0, multiByte, 3, 7);
    batch.cols[0] = v;
    batch.cols[1] = outV;
    outV.isRepeating = true;
    outV.noNulls = false;
    outCol = (BytesColumnVector) batch.cols[1];
    expr = new StringSubstrColStart(0, 2, 1);
    expr.evaluate(batch);
    Assert.assertFalse(outV.isRepeating);
    Assert.assertTrue(outV.noNulls);
    Assert.assertEquals(0, StringExpr.compare(// the result is the last 1 character, which occupies 4 bytes
    multiByte, 6, 4, outCol.vector[0], outCol.start[0], outCol.length[0]));
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Test(org.junit.Test)

Example 80 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorStringExpressions method testRegex.

/**
   * Test vectorized regex expression.
   */
@Test
public void testRegex() throws HiveException {
    VectorizedRowBatch b = makeStringBatch();
    FilterStringColRegExpStringScalar expr = new FilterStringColRegExpStringScalar(0, "a.*".getBytes());
    b.size = 5;
    b.selectedInUse = false;
    BytesColumnVector v = (BytesColumnVector) b.cols[0];
    v.isRepeating = false;
    v.noNulls = false;
    String s1 = "4kMasVoB7lX1wc5i64bNk";
    String s2 = "a27V63IL7jK3o";
    String s3 = "27V63IL7jK3oa";
    String s4 = "27V63IL7jK3o";
    v.isNull[0] = false;
    v.setRef(0, s1.getBytes(), 0, s1.getBytes().length);
    v.isNull[1] = true;
    v.vector[1] = null;
    v.isNull[2] = false;
    v.setRef(2, s2.getBytes(), 0, s2.getBytes().length);
    v.isNull[3] = false;
    v.setRef(3, s3.getBytes(), 0, s3.getBytes().length);
    v.isNull[4] = false;
    v.setRef(4, s4.getBytes(), 0, s4.getBytes().length);
    expr.evaluate(b);
    Assert.assertTrue(b.selectedInUse);
    Assert.assertEquals(3, b.size);
    Assert.assertEquals(0, b.selected[0]);
    Assert.assertEquals(2, b.selected[1]);
    Assert.assertEquals(3, b.selected[2]);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Test(org.junit.Test)

Aggregations

BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)124 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)66 Test (org.junit.Test)50 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)44 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)12 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)10 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)8 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Text (org.apache.hadoop.io.Text)8 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)6 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 TypeDescription (org.apache.orc.TypeDescription)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 ParseException (java.text.ParseException)3 Random (java.util.Random)3