Search in sources :

Example 41 with DoubleWritable

use of org.apache.hadoop.hive.serde2.io.DoubleWritable in project hive by apache.

the class TestLazySimpleSerDe method testLazySimpleSerDeMissingColumns.

/**
   * Test the LazySimpleSerDe class with missing columns.
   */
public void testLazySimpleSerDeMissingColumns() throws Throwable {
    try {
        // Create the SerDe
        LazySimpleSerDe serDe = new LazySimpleSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createProperties();
        SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
        // Data
        Text t = new Text("123\t456\t789\t1000\t5.3\t");
        String s = "123\t456\t789\t1000\t5.3\t\tNULL\tNULL";
        Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text(""), null, null };
        // Test
        deserializeAndSerialize(serDe, t, s, expectedFieldsData);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Properties(java.util.Properties) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 42 with DoubleWritable

use of org.apache.hadoop.hive.serde2.io.DoubleWritable in project hive by apache.

the class TestLazySimpleSerDe method testLazySimpleSerDeExtraColumns.

/**
   * Test the LazySimpleSerDe class with extra columns.
   */
public void testLazySimpleSerDeExtraColumns() throws Throwable {
    try {
        // Create the SerDe
        LazySimpleSerDe serDe = new LazySimpleSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createProperties();
        SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
        // Data
        Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
        String s = "123\t456\t789\t1000\t5.3\thive and hadoop\t1\ta";
        Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), new IntWritable(1), new Text("a") };
        // Test
        deserializeAndSerialize(serDe, t, s, expectedFieldsData);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Properties(java.util.Properties) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 43 with DoubleWritable

use of org.apache.hadoop.hive.serde2.io.DoubleWritable in project hive by apache.

the class TestVectorizedORCReader method checkVectorizedReader.

private void checkVectorizedReader() throws Exception {
    Reader vreader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
    RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows();
    RecordReaderImpl rr = (RecordReaderImpl) reader.rows();
    VectorizedRowBatch batch = reader.getSchema().createRowBatch();
    OrcStruct row = null;
    // Check Vectorized ORC reader against ORC row reader
    while (vrr.nextBatch(batch)) {
        for (int i = 0; i < batch.size; i++) {
            row = (OrcStruct) rr.next(row);
            for (int j = 0; j < batch.cols.length; j++) {
                Object a = (row.getFieldValue(j));
                ColumnVector cv = batch.cols[j];
                // if the value is repeating, use row 0
                int rowId = cv.isRepeating ? 0 : i;
                // make sure the null flag agrees
                if (a == null) {
                    Assert.assertEquals(true, !cv.noNulls && cv.isNull[rowId]);
                } else if (a instanceof BooleanWritable) {
                    // Boolean values are stores a 1's and 0's, so convert and compare
                    Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
                    long b = ((LongColumnVector) cv).vector[rowId];
                    Assert.assertEquals(temp.toString(), Long.toString(b));
                } else if (a instanceof TimestampWritable) {
                    // Timestamps are stored as long, so convert and compare
                    TimestampWritable t = ((TimestampWritable) a);
                    TimestampColumnVector tcv = ((TimestampColumnVector) cv);
                    Assert.assertEquals(t.getTimestamp(), tcv.asScratchTimestamp(rowId));
                } else if (a instanceof DateWritable) {
                    // Dates are stored as long, so convert and compare
                    DateWritable adt = (DateWritable) a;
                    long b = ((LongColumnVector) cv).vector[rowId];
                    Assert.assertEquals(adt.get().getTime(), DateWritable.daysToMillis((int) b));
                } else if (a instanceof HiveDecimalWritable) {
                    // Decimals are stored as BigInteger, so convert and compare
                    HiveDecimalWritable dec = (HiveDecimalWritable) a;
                    HiveDecimalWritable b = ((DecimalColumnVector) cv).vector[i];
                    Assert.assertEquals(dec, b);
                } else if (a instanceof DoubleWritable) {
                    double b = ((DoubleColumnVector) cv).vector[rowId];
                    assertEquals(a.toString(), Double.toString(b));
                } else if (a instanceof Text) {
                    BytesColumnVector bcv = (BytesColumnVector) cv;
                    Text b = new Text();
                    b.set(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]);
                    assertEquals(a, b);
                } else if (a instanceof IntWritable || a instanceof LongWritable || a instanceof ByteWritable || a instanceof ShortWritable) {
                    assertEquals(a.toString(), Long.toString(((LongColumnVector) cv).vector[rowId]));
                } else {
                    assertEquals("huh", a.getClass().getName());
                }
            }
        }
        // Check repeating
        Assert.assertEquals(false, batch.cols[0].isRepeating);
        Assert.assertEquals(false, batch.cols[1].isRepeating);
        Assert.assertEquals(false, batch.cols[2].isRepeating);
        Assert.assertEquals(true, batch.cols[3].isRepeating);
        Assert.assertEquals(false, batch.cols[4].isRepeating);
        Assert.assertEquals(false, batch.cols[5].isRepeating);
        Assert.assertEquals(false, batch.cols[6].isRepeating);
        Assert.assertEquals(false, batch.cols[7].isRepeating);
        Assert.assertEquals(false, batch.cols[8].isRepeating);
        Assert.assertEquals(false, batch.cols[9].isRepeating);
        // Check non null
        Assert.assertEquals(false, batch.cols[0].noNulls);
        Assert.assertEquals(false, batch.cols[1].noNulls);
        Assert.assertEquals(true, batch.cols[2].noNulls);
        Assert.assertEquals(true, batch.cols[3].noNulls);
        Assert.assertEquals(false, batch.cols[4].noNulls);
        Assert.assertEquals(false, batch.cols[5].noNulls);
        Assert.assertEquals(false, batch.cols[6].noNulls);
        Assert.assertEquals(false, batch.cols[7].noNulls);
        Assert.assertEquals(false, batch.cols[8].noNulls);
        Assert.assertEquals(false, batch.cols[9].noNulls);
    }
    Assert.assertEquals(false, rr.nextBatch(batch));
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Text(org.apache.hadoop.io.Text) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) IntWritable(org.apache.hadoop.io.IntWritable)

Example 44 with DoubleWritable

use of org.apache.hadoop.hive.serde2.io.DoubleWritable in project hive by apache.

the class TestArrayCompatibility method testNewOptionalGroupInList.

@Test
public void testNewOptionalGroupInList() throws Exception {
    Path test = writeDirect("NewOptionalGroupInList", Types.buildMessage().optionalGroup().as(LIST).repeatedGroup().optionalGroup().required(DOUBLE).named("latitude").required(DOUBLE).named("longitude").named("element").named("list").named("locations").named("NewOptionalGroupInList"), new DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("locations", 0);
            rc.startGroup();
            // start writing array contents
            rc.startField("list", 0);
            // write a non-null element
            // array level
            rc.startGroup();
            rc.startField("element", 0);
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(0.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.endField("element", 0);
            // array level
            rc.endGroup();
            // write a null element (element field is omitted)
            // array level
            rc.startGroup();
            // array level
            rc.endGroup();
            // write a second non-null element
            // array level
            rc.startGroup();
            rc.startField("element", 0);
            rc.startGroup();
            rc.startField("latitude", 0);
            rc.addDouble(0.0);
            rc.endField("latitude", 0);
            rc.startField("longitude", 1);
            rc.addDouble(180.0);
            rc.endField("longitude", 1);
            rc.endGroup();
            rc.endField("element", 0);
            // array level
            rc.endGroup();
            // finished writing array contents
            rc.endField("list", 0);
            rc.endGroup();
            rc.endField("locations", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new DoubleWritable(0.0), new DoubleWritable(0.0)), null, record(new DoubleWritable(0.0), new DoubleWritable(180.0)));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) Test(org.junit.Test)

Example 45 with DoubleWritable

use of org.apache.hadoop.hive.serde2.io.DoubleWritable in project hive by apache.

the class TestMapStructures method testDoubleMapWithStructValue.

@Test
public void testDoubleMapWithStructValue() throws Exception {
    Path test = writeDirect("DoubleMapWithStructValue", Types.buildMessage().optionalGroup().as(MAP).repeatedGroup().optional(DOUBLE).named("key").optionalGroup().required(INT32).named("x").required(INT32).named("y").named("value").named("key_value").named("approx").named("DoubleMapWithStructValue"), new TestArrayCompatibility.DirectWriter() {

        @Override
        public void write(RecordConsumer rc) {
            rc.startMessage();
            rc.startField("approx", 0);
            rc.startGroup();
            rc.startField("key_value", 0);
            rc.startGroup();
            rc.startField("key", 0);
            rc.addDouble(3.14);
            rc.endField("key", 0);
            rc.startField("value", 1);
            rc.startGroup();
            rc.startField("x", 0);
            rc.addInteger(7);
            rc.endField("x", 0);
            rc.startField("y", 1);
            rc.addInteger(22);
            rc.endField("y", 1);
            rc.endGroup();
            rc.endField("value", 1);
            rc.endGroup();
            rc.endField("key_value", 0);
            rc.endGroup();
            rc.endField("approx", 0);
            rc.endMessage();
        }
    });
    ArrayWritable expected = list(record(new DoubleWritable(3.14), record(new IntWritable(7), new IntWritable(22))));
    List<ArrayWritable> records = read(test);
    Assert.assertEquals("Should have only one record", 1, records.size());
    assertEquals("Should match expected record", expected, records.get(0));
    deserialize(records.get(0), Arrays.asList("approx"), Arrays.asList("map<bigint,struct<x:int,y:int>>"));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayWritable(org.apache.hadoop.io.ArrayWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Aggregations

DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)113 Test (org.junit.Test)61 IntWritable (org.apache.hadoop.io.IntWritable)50 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)49 LongWritable (org.apache.hadoop.io.LongWritable)48 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)42 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)42 Text (org.apache.hadoop.io.Text)42 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)40 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)38 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)37 FloatWritable (org.apache.hadoop.io.FloatWritable)34 BooleanWritable (org.apache.hadoop.io.BooleanWritable)28 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)24 BytesWritable (org.apache.hadoop.io.BytesWritable)21 ArrayList (java.util.ArrayList)19 HiveVarcharWritable (org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)16 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)15 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)12 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)12