Search in sources :

Example 16 with BooleanWritable

use of org.apache.hadoop.io.BooleanWritable in project hive by apache.

the class TestVectorExpressionWriters method testStructLong.

private void testStructLong(TypeInfo type) throws HiveException {
    LongColumnVector icv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false, vectorSize, new Random(10));
    icv.isNull[3] = true;
    LongColumnVector bcv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false, vectorSize, new Random(10));
    bcv.isNull[2] = true;
    ArrayList<Object>[] values = (ArrayList<Object>[]) new ArrayList[this.vectorSize];
    StructObjectInspector soi = genStructOI();
    VectorExpressionWriter[] vew = VectorExpressionWriterFactory.getExpressionWriters(soi);
    for (int i = 0; i < vectorSize; i++) {
        values[i] = new ArrayList<Object>(2);
        values[i].add(null);
        values[i].add(null);
        vew[0].setValue(values[i], icv, i);
        vew[1].setValue(values[i], bcv, i);
        Object theInt = values[i].get(0);
        if (theInt == null) {
            Assert.assertTrue(icv.isNull[i]);
        } else {
            IntWritable w = (IntWritable) theInt;
            Assert.assertEquals((int) icv.vector[i], w.get());
        }
        Object theBool = values[i].get(1);
        if (theBool == null) {
            Assert.assertTrue(bcv.isNull[i]);
        } else {
            BooleanWritable w = (BooleanWritable) theBool;
            Assert.assertEquals(bcv.vector[i] == 0 ? false : true, w.get());
        }
    }
}
Also used : Random(java.util.Random) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ArrayList(java.util.ArrayList) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) IntWritable(org.apache.hadoop.io.IntWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 17 with BooleanWritable

use of org.apache.hadoop.io.BooleanWritable in project hive by apache.

the class TestVectorStringExpressions method testStringLikeRandomized.

@Test
public void testStringLikeRandomized() throws HiveException, UnsupportedEncodingException {
    final String[] patterns = new String[] { "ABC%", "%ABC", "%ABC%", "ABC%DEF", "ABC%DEF%", "%ABC%DEF", "%ABC%DEF%", "ABC%DEF%EFG", "%ABC%DEF%EFG", "%ABC%DEF%EFG%H" };
    long positive = 0;
    long negative = 0;
    Random control = new Random(1234);
    UDFLike udf = new UDFLike();
    for (String pattern : patterns) {
        VectorExpression expr = new FilterStringColLikeStringScalar(0, pattern.getBytes("utf-8"));
        VectorizedRowBatch batch = VectorizedRowGroupGenUtil.getVectorizedRowBatch(1, 1, 1);
        batch.cols[0] = new BytesColumnVector(1);
        BytesColumnVector bcv = (BytesColumnVector) batch.cols[0];
        Text pText = new Text(pattern);
        for (int i = 0; i < 1024; i++) {
            String input = generateCandidate(control, pattern);
            BooleanWritable like = udf.evaluate(new Text(input), pText);
            batch.reset();
            bcv.initBuffer();
            byte[] utf8 = input.getBytes("utf-8");
            bcv.setVal(0, utf8, 0, utf8.length);
            bcv.noNulls = true;
            batch.size = 1;
            expr.evaluate(batch);
            if (like.get()) {
                positive++;
            } else {
                negative++;
            }
            assertEquals(String.format("Checking '%s' against '%s'", input, pattern), like.get(), (batch.size != 0));
        }
    }
    LOG.info(String.format("Randomized testing: ran %d positive tests and %d negative tests", positive, negative));
}
Also used : Text(org.apache.hadoop.io.Text) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Random(java.util.Random) BooleanWritable(org.apache.hadoop.io.BooleanWritable) UDFLike(org.apache.hadoop.hive.ql.udf.UDFLike) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) Test(org.junit.Test)

Example 18 with BooleanWritable

use of org.apache.hadoop.io.BooleanWritable in project hive by apache.

the class TestVectorizedORCReader method checkVectorizedReader.

private void checkVectorizedReader() throws Exception {
    Reader vreader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
    RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows();
    RecordReaderImpl rr = (RecordReaderImpl) reader.rows();
    VectorizedRowBatch batch = reader.getSchema().createRowBatch();
    OrcStruct row = null;
    // Check Vectorized ORC reader against ORC row reader
    while (vrr.nextBatch(batch)) {
        for (int i = 0; i < batch.size; i++) {
            row = (OrcStruct) rr.next(row);
            for (int j = 0; j < batch.cols.length; j++) {
                Object a = (row.getFieldValue(j));
                ColumnVector cv = batch.cols[j];
                // if the value is repeating, use row 0
                int rowId = cv.isRepeating ? 0 : i;
                // make sure the null flag agrees
                if (a == null) {
                    Assert.assertEquals(true, !cv.noNulls && cv.isNull[rowId]);
                } else if (a instanceof BooleanWritable) {
                    // Boolean values are stores a 1's and 0's, so convert and compare
                    Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
                    long b = ((LongColumnVector) cv).vector[rowId];
                    Assert.assertEquals(temp.toString(), Long.toString(b));
                } else if (a instanceof TimestampWritable) {
                    // Timestamps are stored as long, so convert and compare
                    TimestampWritable t = ((TimestampWritable) a);
                    TimestampColumnVector tcv = ((TimestampColumnVector) cv);
                    Assert.assertEquals(t.getTimestamp(), tcv.asScratchTimestamp(rowId));
                } else if (a instanceof DateWritable) {
                    // Dates are stored as long, so convert and compare
                    DateWritable adt = (DateWritable) a;
                    long b = ((LongColumnVector) cv).vector[rowId];
                    Assert.assertEquals(adt.get().getTime(), DateWritable.daysToMillis((int) b));
                } else if (a instanceof HiveDecimalWritable) {
                    // Decimals are stored as BigInteger, so convert and compare
                    HiveDecimalWritable dec = (HiveDecimalWritable) a;
                    HiveDecimalWritable b = ((DecimalColumnVector) cv).vector[i];
                    Assert.assertEquals(dec, b);
                } else if (a instanceof DoubleWritable) {
                    double b = ((DoubleColumnVector) cv).vector[rowId];
                    assertEquals(a.toString(), Double.toString(b));
                } else if (a instanceof Text) {
                    BytesColumnVector bcv = (BytesColumnVector) cv;
                    Text b = new Text();
                    b.set(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]);
                    assertEquals(a, b);
                } else if (a instanceof IntWritable || a instanceof LongWritable || a instanceof ByteWritable || a instanceof ShortWritable) {
                    assertEquals(a.toString(), Long.toString(((LongColumnVector) cv).vector[rowId]));
                } else {
                    assertEquals("huh", a.getClass().getName());
                }
            }
        }
        // Check repeating
        Assert.assertEquals(false, batch.cols[0].isRepeating);
        Assert.assertEquals(false, batch.cols[1].isRepeating);
        Assert.assertEquals(false, batch.cols[2].isRepeating);
        Assert.assertEquals(true, batch.cols[3].isRepeating);
        Assert.assertEquals(false, batch.cols[4].isRepeating);
        Assert.assertEquals(false, batch.cols[5].isRepeating);
        Assert.assertEquals(false, batch.cols[6].isRepeating);
        Assert.assertEquals(false, batch.cols[7].isRepeating);
        Assert.assertEquals(false, batch.cols[8].isRepeating);
        Assert.assertEquals(false, batch.cols[9].isRepeating);
        // Check non null
        Assert.assertEquals(false, batch.cols[0].noNulls);
        Assert.assertEquals(false, batch.cols[1].noNulls);
        Assert.assertEquals(true, batch.cols[2].noNulls);
        Assert.assertEquals(true, batch.cols[3].noNulls);
        Assert.assertEquals(false, batch.cols[4].noNulls);
        Assert.assertEquals(false, batch.cols[5].noNulls);
        Assert.assertEquals(false, batch.cols[6].noNulls);
        Assert.assertEquals(false, batch.cols[7].noNulls);
        Assert.assertEquals(false, batch.cols[8].noNulls);
        Assert.assertEquals(false, batch.cols[9].noNulls);
    }
    Assert.assertEquals(false, rr.nextBatch(batch));
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Text(org.apache.hadoop.io.Text) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) IntWritable(org.apache.hadoop.io.IntWritable)

Example 19 with BooleanWritable

use of org.apache.hadoop.io.BooleanWritable in project hive by apache.

the class TestGenericUDFMonthsBetween method testMonthsBetweenForTimestamp.

public void testMonthsBetweenForTimestamp() throws HiveException {
    GenericUDFMonthsBetween udf = new GenericUDFMonthsBetween();
    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
    ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
    ObjectInspector[] arguments = { valueOI1, valueOI2 };
    udf.initialize(arguments);
    testMonthsBetweenForTimestamp(udf);
    // Run without round-off
    GenericUDFMonthsBetween udfWithoutRoundOff = new GenericUDFMonthsBetween();
    ObjectInspector vOI1 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
    ObjectInspector vOI2 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
    ObjectInspector vOI3 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.booleanTypeInfo, new BooleanWritable(false));
    ObjectInspector[] args = { vOI1, vOI2, vOI3 };
    udfWithoutRoundOff.initialize(args);
    testMonthsBetweenForTimestamp(udfWithoutRoundOff);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BooleanWritable(org.apache.hadoop.io.BooleanWritable)

Example 20 with BooleanWritable

use of org.apache.hadoop.io.BooleanWritable in project hive by apache.

the class TestGenericUDFMonthsBetween method testMonthsBetweenForString.

public void testMonthsBetweenForString() throws HiveException {
    // Default run
    GenericUDFMonthsBetween udf = new GenericUDFMonthsBetween();
    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
    ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
    ObjectInspector[] arguments = { valueOI1, valueOI2 };
    udf.initialize(arguments);
    testMonthsBetweenForString(udf);
    // Run without round-off
    GenericUDFMonthsBetween udfWithoutRoundOff = new GenericUDFMonthsBetween();
    ObjectInspector vOI1 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
    ObjectInspector vOI2 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
    ObjectInspector vOI3 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.booleanTypeInfo, new BooleanWritable(false));
    ObjectInspector[] args = { vOI1, vOI2, vOI3 };
    udfWithoutRoundOff.initialize(args);
    testMonthsBetweenForString(udf);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BooleanWritable(org.apache.hadoop.io.BooleanWritable)

Aggregations

BooleanWritable (org.apache.hadoop.io.BooleanWritable)63 IntWritable (org.apache.hadoop.io.IntWritable)41 LongWritable (org.apache.hadoop.io.LongWritable)40 FloatWritable (org.apache.hadoop.io.FloatWritable)37 Text (org.apache.hadoop.io.Text)31 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)27 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)26 BytesWritable (org.apache.hadoop.io.BytesWritable)26 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)25 Writable (org.apache.hadoop.io.Writable)17 Test (org.junit.Test)17 ArrayList (java.util.ArrayList)15 Configuration (org.apache.hadoop.conf.Configuration)12 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)12 Random (java.util.Random)11 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)10 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)9 KeyValue (org.apache.hadoop.hbase.KeyValue)7 Result (org.apache.hadoop.hbase.client.Result)7 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)7