Search in sources :

Example 26 with HiveDecimalWritable

use of org.apache.hadoop.hive.serde2.io.HiveDecimalWritable in project hive by apache.

the class TestVectorizedORCReader method checkVectorizedReader.

private void checkVectorizedReader() throws Exception {
    Reader vreader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
    RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows();
    RecordReaderImpl rr = (RecordReaderImpl) reader.rows();
    VectorizedRowBatch batch = reader.getSchema().createRowBatch();
    OrcStruct row = null;
    // Check Vectorized ORC reader against ORC row reader
    while (vrr.nextBatch(batch)) {
        for (int i = 0; i < batch.size; i++) {
            row = (OrcStruct) rr.next(row);
            for (int j = 0; j < batch.cols.length; j++) {
                Object a = (row.getFieldValue(j));
                ColumnVector cv = batch.cols[j];
                // if the value is repeating, use row 0
                int rowId = cv.isRepeating ? 0 : i;
                // make sure the null flag agrees
                if (a == null) {
                    Assert.assertEquals(true, !cv.noNulls && cv.isNull[rowId]);
                } else if (a instanceof BooleanWritable) {
                    // Boolean values are stores a 1's and 0's, so convert and compare
                    Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
                    long b = ((LongColumnVector) cv).vector[rowId];
                    Assert.assertEquals(temp.toString(), Long.toString(b));
                } else if (a instanceof TimestampWritable) {
                    // Timestamps are stored as long, so convert and compare
                    TimestampWritable t = ((TimestampWritable) a);
                    TimestampColumnVector tcv = ((TimestampColumnVector) cv);
                    Assert.assertEquals(t.getTimestamp(), tcv.asScratchTimestamp(rowId));
                } else if (a instanceof DateWritable) {
                    // Dates are stored as long, so convert and compare
                    DateWritable adt = (DateWritable) a;
                    long b = ((LongColumnVector) cv).vector[rowId];
                    Assert.assertEquals(adt.get().getTime(), DateWritable.daysToMillis((int) b));
                } else if (a instanceof HiveDecimalWritable) {
                    // Decimals are stored as BigInteger, so convert and compare
                    HiveDecimalWritable dec = (HiveDecimalWritable) a;
                    HiveDecimalWritable b = ((DecimalColumnVector) cv).vector[i];
                    Assert.assertEquals(dec, b);
                } else if (a instanceof DoubleWritable) {
                    double b = ((DoubleColumnVector) cv).vector[rowId];
                    assertEquals(a.toString(), Double.toString(b));
                } else if (a instanceof Text) {
                    BytesColumnVector bcv = (BytesColumnVector) cv;
                    Text b = new Text();
                    b.set(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]);
                    assertEquals(a, b);
                } else if (a instanceof IntWritable || a instanceof LongWritable || a instanceof ByteWritable || a instanceof ShortWritable) {
                    assertEquals(a.toString(), Long.toString(((LongColumnVector) cv).vector[rowId]));
                } else {
                    assertEquals("huh", a.getClass().getName());
                }
            }
        }
        // Check repeating
        Assert.assertEquals(false, batch.cols[0].isRepeating);
        Assert.assertEquals(false, batch.cols[1].isRepeating);
        Assert.assertEquals(false, batch.cols[2].isRepeating);
        Assert.assertEquals(true, batch.cols[3].isRepeating);
        Assert.assertEquals(false, batch.cols[4].isRepeating);
        Assert.assertEquals(false, batch.cols[5].isRepeating);
        Assert.assertEquals(false, batch.cols[6].isRepeating);
        Assert.assertEquals(false, batch.cols[7].isRepeating);
        Assert.assertEquals(false, batch.cols[8].isRepeating);
        Assert.assertEquals(false, batch.cols[9].isRepeating);
        // Check non null
        Assert.assertEquals(false, batch.cols[0].noNulls);
        Assert.assertEquals(false, batch.cols[1].noNulls);
        Assert.assertEquals(true, batch.cols[2].noNulls);
        Assert.assertEquals(true, batch.cols[3].noNulls);
        Assert.assertEquals(false, batch.cols[4].noNulls);
        Assert.assertEquals(false, batch.cols[5].noNulls);
        Assert.assertEquals(false, batch.cols[6].noNulls);
        Assert.assertEquals(false, batch.cols[7].noNulls);
        Assert.assertEquals(false, batch.cols[8].noNulls);
        Assert.assertEquals(false, batch.cols[9].noNulls);
    }
    Assert.assertEquals(false, rr.nextBatch(batch));
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Text(org.apache.hadoop.io.Text) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) IntWritable(org.apache.hadoop.io.IntWritable)

Example 27 with HiveDecimalWritable

use of org.apache.hadoop.hive.serde2.io.HiveDecimalWritable in project hive by apache.

the class VectorizedColumnReaderTestBase method writeData.

protected static void writeData(ParquetWriter<Group> writer, boolean isDictionaryEncoding) throws IOException {
    SimpleGroupFactory f = new SimpleGroupFactory(schema);
    for (int i = 0; i < nElements; i++) {
        boolean isNull = isNull(i);
        int intVal = getIntValue(isDictionaryEncoding, i);
        long longVal = getLongValue(isDictionaryEncoding, i);
        Binary timeStamp = getTimestamp(isDictionaryEncoding, i);
        HiveDecimal decimalVal = getDecimal(isDictionaryEncoding, i).setScale(2);
        double doubleVal = getDoubleValue(isDictionaryEncoding, i);
        float floatVal = getFloatValue(isDictionaryEncoding, i);
        boolean booleanVal = getBooleanValue(i);
        Binary binary = getBinaryValue(isDictionaryEncoding, i);
        Group group = f.newGroup().append("int32_field", intVal).append("int64_field", longVal).append("int96_field", timeStamp).append("double_field", doubleVal).append("float_field", floatVal).append("boolean_field", booleanVal).append("flba_field", "abc");
        if (!isNull) {
            group.append("some_null_field", "x");
        }
        group.append("binary_field", binary);
        if (!isNull) {
            group.append("binary_field_some_null", binary);
        }
        HiveDecimalWritable w = new HiveDecimalWritable(decimalVal);
        group.append("value", Binary.fromConstantByteArray(w.getInternalStorage()));
        group.addGroup("struct_field").append("a", intVal).append("b", doubleVal);
        Group g = group.addGroup("nested_struct_field");
        g.addGroup("nsf").append("c", intVal).append("d", intVal);
        g.append("e", doubleVal);
        Group some_null_g = group.addGroup("struct_field_some_null");
        if (i % 2 != 0) {
            some_null_g.append("f", intVal);
        }
        if (i % 3 != 0) {
            some_null_g.append("g", doubleVal);
        }
        Group mapGroup = group.addGroup("map_field");
        if (i % 13 != 1) {
            mapGroup.addGroup("map").append("key", binary).append("value", "abc");
        } else {
            mapGroup.addGroup("map").append("key", binary);
        }
        Group arrayGroup = group.addGroup("array_list");
        for (int j = 0; j < i % 4; j++) {
            arrayGroup.addGroup("bag").append("array_element", intVal);
        }
        writer.write(group);
    }
    writer.close();
}
Also used : Group(org.apache.parquet.example.data.Group) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) Binary(org.apache.parquet.io.api.Binary)

Example 28 with HiveDecimalWritable

use of org.apache.hadoop.hive.serde2.io.HiveDecimalWritable in project hive by apache.

the class TestOrcFile method testHiveDecimalAllNulls.

@Test
public void testHiveDecimalAllNulls() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(DecimalStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
    // this is an invalid decimal value, getting HiveDecimal from it will return null
    writer.addRow(new DecimalStruct(new HiveDecimalWritable("1.463040009E9".getBytes(), 8)));
    writer.addRow(new DecimalStruct(null));
    writer.close();
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
    List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
    HiveDecimalObjectInspector doi = (HiveDecimalObjectInspector) readerInspector.getStructFieldRef("dec").getFieldObjectInspector();
    RecordReader rows = reader.rows();
    while (rows.hasNext()) {
        Object row = rows.next(null);
        assertEquals(null, doi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
    }
    // check the stats
    ColumnStatistics[] stats = reader.getStatistics();
    assertEquals(2, stats[0].getNumberOfValues());
    assertEquals(0, stats[1].getNumberOfValues());
    assertEquals(true, stats[1].hasNull());
}
Also used : DecimalColumnStatistics(org.apache.orc.DecimalColumnStatistics) BooleanColumnStatistics(org.apache.orc.BooleanColumnStatistics) StringColumnStatistics(org.apache.orc.StringColumnStatistics) DoubleColumnStatistics(org.apache.orc.DoubleColumnStatistics) IntegerColumnStatistics(org.apache.orc.IntegerColumnStatistics) ColumnStatistics(org.apache.orc.ColumnStatistics) BinaryColumnStatistics(org.apache.orc.BinaryColumnStatistics) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 29 with HiveDecimalWritable

use of org.apache.hadoop.hive.serde2.io.HiveDecimalWritable in project hive by apache.

the class TestGenericUDFOPDivide method testDouleDivideDecimal.

@Test
public void testDouleDivideDecimal() throws HiveException {
    GenericUDFOPDivide udf = new GenericUDFOPDivide();
    DoubleWritable left = new DoubleWritable(74.52);
    HiveDecimalWritable right = new HiveDecimalWritable(HiveDecimal.create("234.97"));
    ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.writableDoubleObjectInspector, PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getDecimalTypeInfo(5, 2)) };
    DeferredObject[] args = { new DeferredJavaObject(left), new DeferredJavaObject(right) };
    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(TypeInfoFactory.doubleTypeInfo, oi.getTypeInfo());
    DoubleWritable res = (DoubleWritable) udf.evaluate(args);
    Assert.assertEquals(new Double(74.52 / 234.97), new Double(res.get()));
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Test(org.junit.Test)

Example 30 with HiveDecimalWritable

use of org.apache.hadoop.hive.serde2.io.HiveDecimalWritable in project hive by apache.

the class TestGenericUDFCeil method testDecimal.

@Test
public void testDecimal() throws HiveException {
    GenericUDFCeil udf = new GenericUDFCeil();
    HiveDecimalWritable input = new HiveDecimalWritable(HiveDecimal.create("32300.004747"));
    DecimalTypeInfo inputTypeInfo = TypeInfoFactory.getDecimalTypeInfo(11, 6);
    ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(inputTypeInfo) };
    DeferredObject[] args = { new DeferredJavaObject(input) };
    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(TypeInfoFactory.getDecimalTypeInfo(6, 0), oi.getTypeInfo());
    HiveDecimalWritable res = (HiveDecimalWritable) udf.evaluate(args);
    Assert.assertEquals(HiveDecimal.create("32301"), res.getHiveDecimal());
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Test(org.junit.Test)

Aggregations

HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)116 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)33 Test (org.junit.Test)33 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)27 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)26 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)26 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)24 LongWritable (org.apache.hadoop.io.LongWritable)24 IntWritable (org.apache.hadoop.io.IntWritable)21 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)19 Text (org.apache.hadoop.io.Text)19 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)18 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)18 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)17 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)16 FloatWritable (org.apache.hadoop.io.FloatWritable)16 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)15 BytesWritable (org.apache.hadoop.io.BytesWritable)15 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)14 BooleanWritable (org.apache.hadoop.io.BooleanWritable)13