use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class TestVectorStringExpressions method testColLower.
@Test
public void testColLower() {
// has nulls, not repeating
VectorizedRowBatch batch = makeStringBatchMixedCase();
StringLower expr = new StringLower(0, 1);
expr.evaluate(batch);
BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
int cmp = StringExpr.compare(mixedUpLower, 0, mixedUpLower.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
Assert.assertEquals(0, cmp);
Assert.assertTrue(outCol.isNull[2]);
int cmp2 = StringExpr.compare(green, 0, green.length, outCol.vector[1], outCol.start[1], outCol.length[1]);
Assert.assertEquals(0, cmp2);
// no nulls, not repeating
batch = makeStringBatchMixedCase();
batch.cols[0].noNulls = true;
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
cmp = StringExpr.compare(mixedUpLower, 0, mixedUpLower.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
Assert.assertEquals(0, cmp);
Assert.assertTrue(outCol.noNulls);
// has nulls, is repeating
batch = makeStringBatchMixedCase();
batch.cols[0].isRepeating = true;
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
cmp = StringExpr.compare(mixedUpLower, 0, mixedUpLower.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
Assert.assertEquals(0, cmp);
Assert.assertTrue(outCol.isRepeating);
Assert.assertFalse(outCol.noNulls);
// no nulls, is repeating
batch = makeStringBatchMixedCase();
batch.cols[0].isRepeating = true;
batch.cols[0].noNulls = true;
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
cmp = StringExpr.compare(mixedUpLower, 0, mixedUpLower.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
Assert.assertEquals(0, cmp);
Assert.assertTrue(outCol.isRepeating);
Assert.assertTrue(outCol.noNulls);
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class TestVectorStringExpressions method testColUpper.
@Test
public void testColUpper() {
// no nulls, not repeating
/* We don't test all the combinations because (at least currently)
* the logic is inherited to be the same as testColLower, which checks all the cases).
*/
VectorizedRowBatch batch = makeStringBatchMixedCase();
StringUpper expr = new StringUpper(0, 1);
batch.cols[0].noNulls = true;
expr.evaluate(batch);
BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
int cmp = StringExpr.compare(mixedUpUpper, 0, mixedUpUpper.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
Assert.assertEquals(0, cmp);
Assert.assertTrue(outCol.noNulls);
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class TestVectorStringExpressions method testStringScalarConcatCol.
@Test
public void testStringScalarConcatCol() {
// has nulls, not repeating
VectorizedRowBatch batch = makeStringBatch();
StringScalarConcatStringGroupCol expr = new StringScalarConcatStringGroupCol(red, 0, 1);
expr.evaluate(batch);
BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
Assert.assertEquals(0, cmp);
Assert.assertTrue(outCol.isNull[2]);
int cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1], outCol.start[1], outCol.length[1]);
Assert.assertEquals(0, cmp2);
Assert.assertFalse(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
// no nulls, not repeating
batch = makeStringBatch();
batch.cols[0].noNulls = true;
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
Assert.assertEquals(0, cmp);
cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1], outCol.start[1], outCol.length[1]);
Assert.assertEquals(0, cmp2);
int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2], outCol.start[2], outCol.length[2]);
Assert.assertEquals(0, cmp3);
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
// has nulls, is repeating
batch = makeStringBatch();
batch.cols[0].isRepeating = true;
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
Assert.assertEquals(0, cmp);
Assert.assertTrue(outCol.isRepeating);
Assert.assertFalse(outCol.noNulls);
// no nulls, is repeating
batch = makeStringBatch();
batch.cols[0].isRepeating = true;
batch.cols[0].noNulls = true;
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], outCol.start[0], outCol.length[0]);
Assert.assertEquals(0, cmp);
Assert.assertTrue(outCol.isRepeating);
Assert.assertTrue(outCol.noNulls);
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class TestVectorStringExpressions method testSubstrStart.
@Test
public void testSubstrStart() throws UnsupportedEncodingException {
// Testing no nulls and no repeating
VectorizedRowBatch batch = new VectorizedRowBatch(2);
BytesColumnVector v = new BytesColumnVector();
batch.cols[0] = v;
BytesColumnVector outV = new BytesColumnVector();
batch.cols[1] = outV;
byte[] data1 = "abcd string".getBytes("UTF-8");
byte[] data2 = "efgh string".getBytes("UTF-8");
byte[] data3 = "efgh".getBytes("UTF-8");
batch.size = 3;
v.noNulls = true;
v.setRef(0, data1, 0, data1.length);
v.isNull[0] = false;
v.setRef(1, data2, 0, data2.length);
v.isNull[1] = false;
v.setRef(2, data3, 0, data3.length);
v.isNull[2] = false;
StringSubstrColStart expr = new StringSubstrColStart(0, 6, 1);
expr.evaluate(batch);
BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
Assert.assertEquals(3, batch.size);
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
byte[] expected = "string".getBytes("UTF-8");
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
// This yields empty because starting idx is out of bounds.
Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
outCol.noNulls = false;
outCol.isRepeating = true;
// Testing negative substring index.
// Start index -6 should yield the last 6 characters of the string
expr = new StringSubstrColStart(0, -6, 1);
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
Assert.assertEquals(3, batch.size);
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
outCol.noNulls = false;
outCol.isRepeating = true;
// Testing substring starting from index 1
expr = new StringSubstrColStart(0, 1, 1);
expr.evaluate(batch);
Assert.assertEquals(3, batch.size);
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
Assert.assertEquals(0, StringExpr.compare(data1, 0, data1.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
Assert.assertEquals(0, StringExpr.compare(data2, 0, data2.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
Assert.assertEquals(0, StringExpr.compare(data3, 0, data3.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
outV.noNulls = false;
outV.isRepeating = true;
// Testing with nulls
expr = new StringSubstrColStart(0, 6, 1);
v.noNulls = false;
v.isNull[0] = true;
expr.evaluate(batch);
Assert.assertEquals(3, batch.size);
Assert.assertFalse(outV.noNulls);
Assert.assertTrue(outV.isNull[0]);
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
outCol.noNulls = false;
outCol.isRepeating = false;
// Testing with repeating and no nulls
outV = new BytesColumnVector();
v = new BytesColumnVector();
v.isRepeating = true;
v.noNulls = true;
v.setRef(0, data1, 0, data1.length);
batch = new VectorizedRowBatch(2);
batch.cols[0] = v;
batch.cols[1] = outV;
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
expected = "string".getBytes("UTF-8");
Assert.assertTrue(outV.isRepeating);
Assert.assertTrue(outV.noNulls);
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
// Testing multiByte string substring
v = new BytesColumnVector();
v.isRepeating = false;
v.noNulls = true;
v.setRef(0, multiByte, 0, 10);
batch.cols[0] = v;
batch.cols[1] = outV;
outV.isRepeating = true;
outV.noNulls = false;
expr = new StringSubstrColStart(0, 3, 1);
batch.size = 1;
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
Assert.assertFalse(outV.isRepeating);
Assert.assertTrue(outV.noNulls);
Assert.assertEquals(0, StringExpr.compare(// 3nd char starts from index 3 and total length should be 7 bytes as max is 10
multiByte, 3, 10 - 3, outCol.vector[0], outCol.start[0], outCol.length[0]));
// Testing multiByte string with reference starting mid array
v = new BytesColumnVector();
v.isRepeating = false;
v.noNulls = true;
// string is 2 chars long (a 3 byte and a 4 byte char)
v.setRef(0, multiByte, 3, 7);
batch.cols[0] = v;
batch.cols[1] = outV;
outV.isRepeating = true;
outV.noNulls = false;
outCol = (BytesColumnVector) batch.cols[1];
expr = new StringSubstrColStart(0, 2, 1);
expr.evaluate(batch);
Assert.assertFalse(outV.isRepeating);
Assert.assertTrue(outV.noNulls);
Assert.assertEquals(0, StringExpr.compare(// the result is the last 1 character, which occupies 4 bytes
multiByte, 6, 4, outCol.vector[0], outCol.start[0], outCol.length[0]));
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class TestVectorStringExpressions method testRegex.
/**
* Test vectorized regex expression.
*/
@Test
public void testRegex() throws HiveException {
VectorizedRowBatch b = makeStringBatch();
FilterStringColRegExpStringScalar expr = new FilterStringColRegExpStringScalar(0, "a.*".getBytes());
b.size = 5;
b.selectedInUse = false;
BytesColumnVector v = (BytesColumnVector) b.cols[0];
v.isRepeating = false;
v.noNulls = false;
String s1 = "4kMasVoB7lX1wc5i64bNk";
String s2 = "a27V63IL7jK3o";
String s3 = "27V63IL7jK3oa";
String s4 = "27V63IL7jK3o";
v.isNull[0] = false;
v.setRef(0, s1.getBytes(), 0, s1.getBytes().length);
v.isNull[1] = true;
v.vector[1] = null;
v.isNull[2] = false;
v.setRef(2, s2.getBytes(), 0, s2.getBytes().length);
v.isNull[3] = false;
v.setRef(3, s3.getBytes(), 0, s3.getBytes().length);
v.isNull[4] = false;
v.setRef(4, s4.getBytes(), 0, s4.getBytes().length);
expr.evaluate(b);
Assert.assertTrue(b.selectedInUse);
Assert.assertEquals(3, b.size);
Assert.assertEquals(0, b.selected[0]);
Assert.assertEquals(2, b.selected[1]);
Assert.assertEquals(3, b.selected[2]);
}
Aggregations