use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class TestVectorStringExpressions method testSubstrStartLen.
@Test
public void testSubstrStartLen() throws UnsupportedEncodingException {
// Testing no nulls and no repeating
VectorizedRowBatch batch = new VectorizedRowBatch(2);
BytesColumnVector v = new BytesColumnVector();
batch.cols[0] = v;
BytesColumnVector outV = new BytesColumnVector();
batch.cols[1] = outV;
byte[] data1 = "abcd string".getBytes("UTF-8");
byte[] data2 = "efgh string".getBytes("UTF-8");
byte[] data3 = "efgh".getBytes("UTF-8");
batch.size = 3;
v.noNulls = true;
v.setRef(0, data1, 0, data1.length);
v.isNull[0] = false;
v.setRef(1, data2, 0, data2.length);
v.isNull[1] = false;
v.setRef(2, data3, 0, data3.length);
v.isNull[2] = false;
outV.isRepeating = true;
outV.noNulls = false;
StringSubstrColStartLen expr = new StringSubstrColStartLen(0, 6, 6, 1);
expr.evaluate(batch);
BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
Assert.assertEquals(3, batch.size);
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
byte[] expected = "string".getBytes("UTF-8");
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
// Testing negative substring index
outV.isRepeating = true;
outV.noNulls = false;
expr = new StringSubstrColStartLen(0, -6, 6, 1);
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
Assert.assertEquals(3, batch.size);
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
// This yields empty because starting index is out of bounds
Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
//Testing substring index starting with 1 and zero length
outV.isRepeating = true;
outV.noNulls = false;
expr = new StringSubstrColStartLen(0, 1, 0, 1);
outCol = (BytesColumnVector) batch.cols[1];
expr.evaluate(batch);
Assert.assertEquals(3, batch.size);
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
Assert.assertEquals(0, StringExpr.compare(data1, 1, 0, outCol.vector[0], outCol.start[0], outCol.length[0]));
Assert.assertEquals(0, StringExpr.compare(data2, 1, 0, outCol.vector[1], outCol.start[1], outCol.length[1]));
Assert.assertEquals(0, StringExpr.compare(data3, 1, 0, outCol.vector[2], outCol.start[2], outCol.length[2]));
//Testing substring index starting with 0 and length equal to array length
outV.isRepeating = true;
outV.noNulls = false;
expr = new StringSubstrColStartLen(0, 0, 11, 1);
outCol = (BytesColumnVector) batch.cols[1];
expr.evaluate(batch);
Assert.assertEquals(3, batch.size);
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
Assert.assertEquals(0, StringExpr.compare(data1, 0, data1.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
Assert.assertEquals(0, StringExpr.compare(data2, 0, data2.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
Assert.assertEquals(0, StringExpr.compare(data3, 0, data3.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
// Testing setting length larger than array length, which should cap to the length itself
outV.isRepeating = true;
outV.noNulls = false;
expr = new StringSubstrColStartLen(0, 6, 10, 1);
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
Assert.assertEquals(3, batch.size);
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
outV.isRepeating = true;
outV.noNulls = true;
// Testing with nulls
v.noNulls = false;
v.isNull[0] = true;
expr.evaluate(batch);
Assert.assertEquals(3, batch.size);
Assert.assertFalse(outV.noNulls);
Assert.assertTrue(outV.isNull[0]);
Assert.assertFalse(outCol.isRepeating);
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
// Testing with repeating and no nulls
outV = new BytesColumnVector();
v = new BytesColumnVector();
outV.isRepeating = false;
outV.noNulls = true;
v.isRepeating = true;
v.noNulls = false;
v.setRef(0, data1, 0, data1.length);
batch = new VectorizedRowBatch(2);
batch.cols[0] = v;
batch.cols[1] = outV;
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
Assert.assertTrue(outCol.noNulls);
Assert.assertTrue(outCol.isRepeating);
Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
// Testing with multiByte String
v = new BytesColumnVector();
v.isRepeating = false;
v.noNulls = true;
batch.size = 1;
v.setRef(0, multiByte, 0, 10);
batch.cols[0] = v;
batch.cols[1] = outV;
outV.isRepeating = true;
outV.noNulls = false;
expr = new StringSubstrColStartLen(0, 3, 2, 1);
expr.evaluate(batch);
Assert.assertEquals(1, batch.size);
Assert.assertFalse(outV.isRepeating);
Assert.assertTrue(outV.noNulls);
Assert.assertEquals(0, StringExpr.compare(// 3rd char starts at index 3, and with length 2 it is covering the rest of the array.
multiByte, 3, 10 - 3, outCol.vector[0], outCol.start[0], outCol.length[0]));
// Testing multiByte string with reference set to mid array
v = new BytesColumnVector();
v.isRepeating = false;
v.noNulls = true;
outV = new BytesColumnVector();
batch.size = 1;
v.setRef(0, multiByte, 3, 7);
batch.cols[0] = v;
batch.cols[1] = outV;
outV.isRepeating = true;
outV.noNulls = false;
expr = new StringSubstrColStartLen(0, 2, 2, 1);
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
Assert.assertEquals(1, batch.size);
Assert.assertFalse(outV.isRepeating);
Assert.assertTrue(outV.noNulls);
Assert.assertEquals(0, StringExpr.compare(// 2nd substring index refers to the 6th index (last char in the array)
multiByte, 6, 10 - 6, outCol.vector[0], outCol.start[0], outCol.length[0]));
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class TestVectorGenericDateExpressions method testDateAddColCol.
@Test
public void testDateAddColCol() {
for (VectorExpression.Type colType1 : dateTimestampStringTypes) testDateAddColCol(colType1, true);
VectorExpression udf = new VectorUDFDateAddColCol(0, 1, 2);
VectorizedRowBatch batch = new VectorizedRowBatch(3, 1);
BytesColumnVector bcv;
byte[] bytes = "error".getBytes(utf8);
udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP);
batch.cols[0] = new BytesColumnVector(1);
batch.cols[1] = new LongColumnVector(1);
batch.cols[2] = new LongColumnVector(1);
bcv = (BytesColumnVector) batch.cols[0];
bcv.vector[0] = bytes;
bcv.start[0] = 0;
bcv.length[0] = bytes.length;
udf.evaluate(batch);
Assert.assertEquals(batch.cols[2].isNull[0], true);
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class AbstractExpression method getBytesColumnVector.
protected BytesColumnVector getBytesColumnVector() {
BytesColumnVector columnVector = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
Random random = new Random();
int length = 16;
for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) {
columnVector.vector[i] = new byte[length];
columnVector.start[i] = 0;
columnVector.length[i] = length;
for (int j = 0; j < length; j++) {
columnVector.vector[i][j] = (byte) (random.nextInt(+'c' - 'a' + 1) + 'a');
}
}
return columnVector;
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class BatchToRowReader method nextString.
public static Text nextString(ColumnVector vector, int row, Object previous) {
if (vector.isRepeating) {
row = 0;
}
if (vector.noNulls || !vector.isNull[row]) {
Text result;
if (previous == null || previous.getClass() != Text.class) {
result = new Text();
} else {
result = (Text) previous;
}
BytesColumnVector bytes = (BytesColumnVector) vector;
result.set(bytes.vector[row], bytes.start[row], bytes.length[row]);
return result;
} else {
return null;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class BatchToRowReader method nextChar.
public static HiveCharWritable nextChar(ColumnVector vector, int row, int size, Object previous) {
if (vector.isRepeating) {
row = 0;
}
if (vector.noNulls || !vector.isNull[row]) {
HiveCharWritable result;
if (previous == null || previous.getClass() != HiveCharWritable.class) {
result = new HiveCharWritable();
} else {
result = (HiveCharWritable) previous;
}
BytesColumnVector bytes = (BytesColumnVector) vector;
result.set(bytes.toString(row), size);
return result;
} else {
return null;
}
}
Aggregations