Search in sources :

Example 86 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorStringExpressions method testSubstrStartLen.

@Test
public void testSubstrStartLen() throws UnsupportedEncodingException {
    // Testing no nulls and no repeating
    VectorizedRowBatch batch = new VectorizedRowBatch(2);
    BytesColumnVector v = new BytesColumnVector();
    batch.cols[0] = v;
    BytesColumnVector outV = new BytesColumnVector();
    batch.cols[1] = outV;
    byte[] data1 = "abcd string".getBytes("UTF-8");
    byte[] data2 = "efgh string".getBytes("UTF-8");
    byte[] data3 = "efgh".getBytes("UTF-8");
    batch.size = 3;
    v.noNulls = true;
    v.setRef(0, data1, 0, data1.length);
    v.isNull[0] = false;
    v.setRef(1, data2, 0, data2.length);
    v.isNull[1] = false;
    v.setRef(2, data3, 0, data3.length);
    v.isNull[2] = false;
    outV.isRepeating = true;
    outV.noNulls = false;
    StringSubstrColStartLen expr = new StringSubstrColStartLen(0, 6, 6, 1);
    expr.evaluate(batch);
    BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
    Assert.assertEquals(3, batch.size);
    Assert.assertTrue(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    byte[] expected = "string".getBytes("UTF-8");
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
    Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
    // Testing negative substring index
    outV.isRepeating = true;
    outV.noNulls = false;
    expr = new StringSubstrColStartLen(0, -6, 6, 1);
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    Assert.assertTrue(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    Assert.assertEquals(3, batch.size);
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
    // This yields empty because starting index is out of bounds
    Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
    //Testing substring index starting with 1 and zero length
    outV.isRepeating = true;
    outV.noNulls = false;
    expr = new StringSubstrColStartLen(0, 1, 0, 1);
    outCol = (BytesColumnVector) batch.cols[1];
    expr.evaluate(batch);
    Assert.assertEquals(3, batch.size);
    Assert.assertTrue(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    Assert.assertEquals(0, StringExpr.compare(data1, 1, 0, outCol.vector[0], outCol.start[0], outCol.length[0]));
    Assert.assertEquals(0, StringExpr.compare(data2, 1, 0, outCol.vector[1], outCol.start[1], outCol.length[1]));
    Assert.assertEquals(0, StringExpr.compare(data3, 1, 0, outCol.vector[2], outCol.start[2], outCol.length[2]));
    //Testing substring index starting with 0 and length equal to array length
    outV.isRepeating = true;
    outV.noNulls = false;
    expr = new StringSubstrColStartLen(0, 0, 11, 1);
    outCol = (BytesColumnVector) batch.cols[1];
    expr.evaluate(batch);
    Assert.assertEquals(3, batch.size);
    Assert.assertTrue(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    Assert.assertEquals(0, StringExpr.compare(data1, 0, data1.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
    Assert.assertEquals(0, StringExpr.compare(data2, 0, data2.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
    Assert.assertEquals(0, StringExpr.compare(data3, 0, data3.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
    // Testing setting length larger than array length, which should cap to the length itself
    outV.isRepeating = true;
    outV.noNulls = false;
    expr = new StringSubstrColStartLen(0, 6, 10, 1);
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    Assert.assertEquals(3, batch.size);
    Assert.assertTrue(outCol.noNulls);
    Assert.assertFalse(outCol.isRepeating);
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
    Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
    outV.isRepeating = true;
    outV.noNulls = true;
    // Testing with nulls
    v.noNulls = false;
    v.isNull[0] = true;
    expr.evaluate(batch);
    Assert.assertEquals(3, batch.size);
    Assert.assertFalse(outV.noNulls);
    Assert.assertTrue(outV.isNull[0]);
    Assert.assertFalse(outCol.isRepeating);
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]));
    Assert.assertEquals(0, StringExpr.compare(emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]));
    // Testing with repeating and no nulls
    outV = new BytesColumnVector();
    v = new BytesColumnVector();
    outV.isRepeating = false;
    outV.noNulls = true;
    v.isRepeating = true;
    v.noNulls = false;
    v.setRef(0, data1, 0, data1.length);
    batch = new VectorizedRowBatch(2);
    batch.cols[0] = v;
    batch.cols[1] = outV;
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    Assert.assertTrue(outCol.noNulls);
    Assert.assertTrue(outCol.isRepeating);
    Assert.assertEquals(0, StringExpr.compare(expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]));
    // Testing with multiByte String
    v = new BytesColumnVector();
    v.isRepeating = false;
    v.noNulls = true;
    batch.size = 1;
    v.setRef(0, multiByte, 0, 10);
    batch.cols[0] = v;
    batch.cols[1] = outV;
    outV.isRepeating = true;
    outV.noNulls = false;
    expr = new StringSubstrColStartLen(0, 3, 2, 1);
    expr.evaluate(batch);
    Assert.assertEquals(1, batch.size);
    Assert.assertFalse(outV.isRepeating);
    Assert.assertTrue(outV.noNulls);
    Assert.assertEquals(0, StringExpr.compare(// 3rd char starts at index 3, and with length 2 it is covering the rest of the array.
    multiByte, 3, 10 - 3, outCol.vector[0], outCol.start[0], outCol.length[0]));
    // Testing multiByte string with reference set to mid array
    v = new BytesColumnVector();
    v.isRepeating = false;
    v.noNulls = true;
    outV = new BytesColumnVector();
    batch.size = 1;
    v.setRef(0, multiByte, 3, 7);
    batch.cols[0] = v;
    batch.cols[1] = outV;
    outV.isRepeating = true;
    outV.noNulls = false;
    expr = new StringSubstrColStartLen(0, 2, 2, 1);
    expr.evaluate(batch);
    outCol = (BytesColumnVector) batch.cols[1];
    Assert.assertEquals(1, batch.size);
    Assert.assertFalse(outV.isRepeating);
    Assert.assertTrue(outV.noNulls);
    Assert.assertEquals(0, StringExpr.compare(// 2nd substring index refers to the 6th index (last char in the array)
    multiByte, 6, 10 - 6, outCol.vector[0], outCol.start[0], outCol.length[0]));
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Test(org.junit.Test)

Example 87 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorGenericDateExpressions method testDateAddColCol.

@Test
public void testDateAddColCol() {
    for (VectorExpression.Type colType1 : dateTimestampStringTypes) testDateAddColCol(colType1, true);
    VectorExpression udf = new VectorUDFDateAddColCol(0, 1, 2);
    VectorizedRowBatch batch = new VectorizedRowBatch(3, 1);
    BytesColumnVector bcv;
    byte[] bytes = "error".getBytes(utf8);
    udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP);
    batch.cols[0] = new BytesColumnVector(1);
    batch.cols[1] = new LongColumnVector(1);
    batch.cols[2] = new LongColumnVector(1);
    bcv = (BytesColumnVector) batch.cols[0];
    bcv.vector[0] = bytes;
    bcv.start[0] = 0;
    bcv.length[0] = bytes.length;
    udf.evaluate(batch);
    Assert.assertEquals(batch.cols[2].isNull[0], true);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Example 88 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class AbstractExpression method getBytesColumnVector.

protected BytesColumnVector getBytesColumnVector() {
    BytesColumnVector columnVector = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    Random random = new Random();
    int length = 16;
    for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) {
        columnVector.vector[i] = new byte[length];
        columnVector.start[i] = 0;
        columnVector.length[i] = length;
        for (int j = 0; j < length; j++) {
            columnVector.vector[i][j] = (byte) (random.nextInt(+'c' - 'a' + 1) + 'a');
        }
    }
    return columnVector;
}
Also used : Random(java.util.Random) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)

Example 89 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class BatchToRowReader method nextString.

public static Text nextString(ColumnVector vector, int row, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        Text result;
        if (previous == null || previous.getClass() != Text.class) {
            result = new Text();
        } else {
            result = (Text) previous;
        }
        BytesColumnVector bytes = (BytesColumnVector) vector;
        result.set(bytes.vector[row], bytes.start[row], bytes.length[row]);
        return result;
    } else {
        return null;
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Text(org.apache.hadoop.io.Text)

Example 90 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class BatchToRowReader method nextChar.

public static HiveCharWritable nextChar(ColumnVector vector, int row, int size, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        HiveCharWritable result;
        if (previous == null || previous.getClass() != HiveCharWritable.class) {
            result = new HiveCharWritable();
        } else {
            result = (HiveCharWritable) previous;
        }
        BytesColumnVector bytes = (BytesColumnVector) vector;
        result.set(bytes.toString(row), size);
        return result;
    } else {
        return null;
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable)

Aggregations

BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)124 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)66 Test (org.junit.Test)50 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)44 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)12 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)10 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)8 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Text (org.apache.hadoop.io.Text)8 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)6 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 TypeDescription (org.apache.orc.TypeDescription)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 ParseException (java.text.ParseException)3 Random (java.util.Random)3