Search in sources :

Example 66 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class RecordReaderImpl method nextString.

static Text nextString(ColumnVector vector, int row, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        Text result;
        if (previous == null || previous.getClass() != Text.class) {
            result = new Text();
        } else {
            result = (Text) previous;
        }
        BytesColumnVector bytes = (BytesColumnVector) vector;
        result.set(bytes.vector[row], bytes.start[row], bytes.length[row]);
        return result;
    } else {
        return null;
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Text(org.apache.hadoop.io.Text)

Example 67 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorTimestampExpressions method getVectorizedRowBatchStringLong.

/*
   * Input array is used to fill the entire size of the vector row batch
   */
private VectorizedRowBatch getVectorizedRowBatchStringLong(Timestamp[] inputs, int size) {
    VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
    BytesColumnVector bcv = new BytesColumnVector(size);
    for (int i = 0; i < size; i++) {
        byte[] encoded = encodeTime(inputs[i % inputs.length]);
        bcv.vector[i] = encoded;
        bcv.start[i] = 0;
        bcv.length[i] = encoded.length;
    }
    batch.cols[0] = bcv;
    batch.cols[1] = new LongColumnVector(size);
    batch.size = size;
    return batch;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 68 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorTimestampExpressions method getVectorizedRandomRowBatchStringLong.

private VectorizedRowBatch getVectorizedRandomRowBatchStringLong(int seed, int size) {
    VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
    BytesColumnVector bcv = new BytesColumnVector(size);
    Random rand = new Random(seed);
    for (int i = 0; i < size; i++) {
        /* all 32 bit numbers qualify & multiply up to get nano-seconds */
        byte[] encoded = encodeTime(RandomTypeUtil.getRandTimestamp(rand));
        bcv.vector[i] = encoded;
        bcv.start[i] = 0;
        bcv.length[i] = encoded.length;
    }
    batch.cols[0] = bcv;
    batch.cols[1] = new LongColumnVector(size);
    batch.size = size;
    return batch;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) Random(java.util.Random) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 69 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorMathFunctions method getBatchForStringMath.

public static VectorizedRowBatch getBatchForStringMath() {
    VectorizedRowBatch batch = new VectorizedRowBatch(3);
    LongColumnVector inL;
    BytesColumnVector inS, outS;
    inL = new LongColumnVector();
    inS = new BytesColumnVector();
    outS = new BytesColumnVector();
    inL.vector[0] = 0;
    inL.vector[1] = 255;
    inL.vector[2] = 0;
    inS.initBuffer();
    try {
        inS.setVal(0, "00".getBytes("UTF-8"), 0, 2);
        inS.setVal(1, "3232".getBytes("UTF-8"), 0, 4);
        byte[] bad = "bad data".getBytes("UTF-8");
        inS.setVal(2, bad, 0, bad.length);
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
        Assert.assertTrue(false);
    }
    batch.cols[0] = inS;
    batch.cols[1] = inL;
    batch.cols[2] = outS;
    batch.size = 3;
    return batch;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) UnsupportedEncodingException(java.io.UnsupportedEncodingException) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 70 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestVectorGenericDateExpressions method testDateDiffColCol.

@Test
public void testDateDiffColCol() {
    for (VectorExpression.Type colType1 : dateTimestampStringTypes) {
        for (VectorExpression.Type colType2 : dateTimestampStringTypes) {
            LongColumnVector date1 = newRandomLongColumnVector(10000, size);
            LongColumnVector date2 = newRandomLongColumnVector(10000, size);
            LongColumnVector output = new LongColumnVector(size);
            VectorizedRowBatch batch = new VectorizedRowBatch(3, size);
            batch.cols[0] = castTo(date1, colType1);
            batch.cols[1] = castTo(date2, colType2);
            batch.cols[2] = output;
            validateDateDiff(batch, date1, date2, colType1, colType2);
            TestVectorizedRowBatch.addRandomNulls(date1);
            batch.cols[0] = castTo(date1, colType1);
            validateDateDiff(batch, date1, date2, colType1, colType2);
            TestVectorizedRowBatch.addRandomNulls(date2);
            batch.cols[1] = castTo(date2, colType2);
            validateDateDiff(batch, date1, date2, colType1, colType2);
        }
    }
    VectorExpression udf = new VectorUDFDateDiffColCol(0, 1, 2);
    VectorizedRowBatch batch = new VectorizedRowBatch(3, 1);
    BytesColumnVector bcv;
    byte[] bytes = "error".getBytes(utf8);
    udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP);
    batch.cols[0] = new BytesColumnVector(1);
    batch.cols[1] = new TimestampColumnVector(1);
    batch.cols[2] = new LongColumnVector(1);
    bcv = (BytesColumnVector) batch.cols[0];
    bcv.vector[0] = bytes;
    bcv.start[0] = 0;
    bcv.length[0] = bytes.length;
    udf.evaluate(batch);
    Assert.assertEquals(batch.cols[2].isNull[0], true);
    udf.setInputTypes(VectorExpression.Type.TIMESTAMP, VectorExpression.Type.STRING);
    batch.cols[0] = new TimestampColumnVector(1);
    batch.cols[1] = new BytesColumnVector(1);
    batch.cols[2] = new LongColumnVector(1);
    bcv = (BytesColumnVector) batch.cols[1];
    bcv.vector[0] = bytes;
    bcv.start[0] = 0;
    bcv.length[0] = bytes.length;
    udf.evaluate(batch);
    Assert.assertEquals(batch.cols[2].isNull[0], true);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Aggregations

BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)124 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)66 Test (org.junit.Test)50 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)44 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)12 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)10 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)8 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Text (org.apache.hadoop.io.Text)8 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)6 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 TypeDescription (org.apache.orc.TypeDescription)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 ParseException (java.text.ParseException)3 Random (java.util.Random)3