Search in sources :

Example 51 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class TestVectorUDFAdaptor method getBatchStrDblLongWithStrOut.

private VectorizedRowBatch getBatchStrDblLongWithStrOut() {
    VectorizedRowBatch b = new VectorizedRowBatch(4);
    BytesColumnVector strCol = new BytesColumnVector();
    LongColumnVector longCol = new LongColumnVector();
    DoubleColumnVector dblCol = new DoubleColumnVector();
    BytesColumnVector outCol = new BytesColumnVector();
    b.cols[0] = strCol;
    b.cols[1] = longCol;
    b.cols[2] = dblCol;
    b.cols[3] = outCol;
    strCol.initBuffer();
    strCol.setVal(0, blue, 0, blue.length);
    strCol.setVal(1, red, 0, red.length);
    longCol.vector[0] = 0;
    longCol.vector[1] = 1;
    dblCol.vector[0] = 0.0;
    dblCol.vector[1] = 1.0;
    // set one null value for possible later use
    longCol.isNull[1] = true;
    // but have no nulls initially
    longCol.noNulls = true;
    strCol.noNulls = true;
    dblCol.noNulls = true;
    outCol.initBuffer();
    b.size = 2;
    return b;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 52 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class TestVectorUDFAdaptor method testLongUDF.

@Test
public void testLongUDF() {
    // create a syntax tree for a simple function call "longudf(col0)"
    ExprNodeGenericFuncDesc funcDesc;
    TypeInfo typeInfo = TypeInfoFactory.longTypeInfo;
    GenericUDFBridge genericUDFBridge = new GenericUDFBridge("longudf", false, LongUDF.class.getName());
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    ExprNodeColumnDesc colDesc = new ExprNodeColumnDesc(typeInfo, "col0", "tablename", false);
    children.add(colDesc);
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[1];
    argDescs[0] = new VectorUDFArgDesc();
    argDescs[0].setVariable(0);
    funcDesc = new ExprNodeGenericFuncDesc(typeInfo, genericUDFBridge, genericUDFBridge.getUdfName(), children);
    // create the adaptor for this function call to work in vector mode
    VectorUDFAdaptor vudf = null;
    try {
        vudf = new VectorUDFAdaptor(funcDesc, 1, "Long", argDescs);
    } catch (HiveException e) {
        // We should never get here.
        assertTrue(false);
    }
    VectorizedRowBatch b = getBatchLongInLongOut();
    vudf.evaluate(b);
    // verify output
    LongColumnVector out = (LongColumnVector) b.cols[1];
    assertEquals(1000, out.vector[0]);
    assertEquals(1001, out.vector[1]);
    assertEquals(1002, out.vector[2]);
    assertTrue(out.noNulls);
    assertFalse(out.isRepeating);
    // with nulls
    b = getBatchLongInLongOut();
    out = (LongColumnVector) b.cols[1];
    b.cols[0].noNulls = false;
    vudf.evaluate(b);
    assertFalse(out.noNulls);
    assertEquals(1000, out.vector[0]);
    assertEquals(1001, out.vector[1]);
    assertTrue(out.isNull[2]);
    assertFalse(out.isRepeating);
    // with repeating
    b = getBatchLongInLongOut();
    out = (LongColumnVector) b.cols[1];
    b.cols[0].isRepeating = true;
    vudf.evaluate(b);
    // The implementation may or may not set output it isRepeting.
    // That is implementation-defined.
    assertTrue(b.cols[1].isRepeating && out.vector[0] == 1000 || !b.cols[1].isRepeating && out.vector[2] == 1000);
    assertEquals(3, b.size);
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LongUDF(org.apache.hadoop.hive.ql.exec.vector.udf.legacy.LongUDF) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Example 53 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class TestVectorUDFAdaptor method getBatchLongInLongOut.

private VectorizedRowBatch getBatchLongInLongOut() {
    VectorizedRowBatch b = new VectorizedRowBatch(2);
    LongColumnVector in = new LongColumnVector();
    LongColumnVector out = new LongColumnVector();
    b.cols[0] = in;
    b.cols[1] = out;
    in.vector[0] = 0;
    in.vector[1] = 1;
    in.vector[2] = 2;
    in.isNull[2] = true;
    in.noNulls = true;
    b.size = 3;
    return b;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 54 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class TestVectorGenericDateExpressions method validateDateAdd.

private void validateDateAdd(VectorizedRowBatch batch, long scalar1, LongColumnVector date2, VectorExpression.Type colType1, boolean isPositive) {
    VectorExpression udf = null;
    if (isPositive) {
        switch(colType1) {
            case DATE:
                udf = new VectorUDFDateAddScalarCol(scalar1, 0, 1);
                break;
            case TIMESTAMP:
                udf = new VectorUDFDateAddScalarCol(toTimestamp(scalar1), 0, 1);
                break;
            case STRING:
            case CHAR:
            case VARCHAR:
                udf = new VectorUDFDateAddScalarCol(toString(scalar1), 0, 1);
                break;
            default:
                throw new Error("Invalid input type: " + colType1.name());
        }
    } else {
        switch(colType1) {
            case DATE:
                udf = new VectorUDFDateSubScalarCol(scalar1, 0, 1);
                break;
            case TIMESTAMP:
                udf = new VectorUDFDateSubScalarCol(toTimestamp(scalar1), 0, 1);
                break;
            case STRING:
            case CHAR:
            case VARCHAR:
                udf = new VectorUDFDateSubScalarCol(toString(scalar1), 0, 1);
                break;
            default:
                throw new Error("Invalid input type: " + colType1.name());
        }
    }
    udf.setInputTypes(colType1, VectorExpression.Type.OTHER);
    udf.evaluate(batch);
    LongColumnVector output = (LongColumnVector) batch.cols[1];
    try {
        for (int i = 0; i < date2.vector.length; i++) {
            String expected;
            if (isPositive) {
                expected = new String(toString(scalar1 + date2.vector[i]), utf8);
            } else {
                expected = new String(toString(scalar1 - date2.vector[i]), utf8);
            }
            if (date2.isNull[i]) {
                Assert.assertTrue(output.isNull[i]);
            } else {
                Assert.assertEquals(expected, new String(toString(output.vector[i])));
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 55 with LongColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.

the class ColumnVectorGenUtil method generateLongColumnVector.

public static LongColumnVector generateLongColumnVector(boolean nulls, boolean repeating, int size, Random rand) {
    LongColumnVector lcv = new LongColumnVector(size);
    lcv.noNulls = !nulls;
    lcv.isRepeating = repeating;
    long repeatingValue;
    do {
        repeatingValue = rand.nextLong();
    } while (repeatingValue == 0);
    int nullFrequency = generateNullFrequency(rand);
    for (int i = 0; i < size; i++) {
        if (nulls && (repeating || i % nullFrequency == 0)) {
            lcv.isNull[i] = true;
            lcv.vector[i] = LONG_VECTOR_NULL_VALUE;
        } else {
            lcv.isNull[i] = false;
            lcv.vector[i] = repeating ? repeatingValue : rand.nextLong();
            if (lcv.vector[i] == 0) {
                i--;
            }
        }
    }
    return lcv;
}
Also used : LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Aggregations

LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)277 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)133 Test (org.junit.Test)73 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)64 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)45 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)34 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)33 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)28 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)20 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)15 Random (java.util.Random)13 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)7 LongColAddLongScalar (org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongScalar)7 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)7 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)7 Timestamp (java.sql.Timestamp)6 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)6 IOException (java.io.IOException)5 Configuration (org.apache.hadoop.conf.Configuration)5