Search in sources :

Example 81 with TimestampWritable

use of org.apache.hadoop.hive.serde2.io.TimestampWritable in project hive by apache.

the class TestOrcFile method createOrcDateFile.

/**
 * Generate an ORC file with a range of dates and times.
 */
public void createOrcDateFile(Path file, int minYear, int maxYear) throws IOException {
    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT).addFieldNames("time").addFieldNames("date").addSubtypes(1).addSubtypes(2).build());
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.TIMESTAMP).build());
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.DATE).build());
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = OrcStruct.createObjectInspector(0, types);
    }
    Writer writer = OrcFile.createWriter(file, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000).blockPadding(false));
    OrcStruct row = new OrcStruct(2);
    for (int year = minYear; year < maxYear; ++year) {
        for (int ms = 1000; ms < 2000; ++ms) {
            row.setFieldValue(0, new TimestampWritable(Timestamp.valueOf(year + "-05-05 12:34:56." + ms)));
            row.setFieldValue(1, new DateWritable(new Date(year - 1900, 11, 25)));
            writer.addRow(row);
        }
    }
    writer.close();
    Reader reader = OrcFile.createReader(file, OrcFile.readerOptions(conf));
    RecordReader rows = reader.rows();
    for (int year = minYear; year < maxYear; ++year) {
        for (int ms = 1000; ms < 2000; ++ms) {
            row = (OrcStruct) rows.next(row);
            assertEquals(new TimestampWritable(Timestamp.valueOf(year + "-05-05 12:34:56." + ms)), row.getFieldValue(0));
            assertEquals(new DateWritable(new Date(year - 1900, 11, 25)), row.getFieldValue(1));
        }
    }
}
Also used : HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) OrcProto(org.apache.orc.OrcProto) ArrayList(java.util.ArrayList) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) Date(java.sql.Date)

Example 82 with TimestampWritable

use of org.apache.hadoop.hive.serde2.io.TimestampWritable in project hive by apache.

the class TestOrcFile method testUnionAndTimestamp.

/**
 * We test union, timestamp, and decimal separately since we need to make the
 * object inspector manually. (The Hive reflection-based doesn't handle
 * them properly.)
 */
@Test
public void testUnionAndTimestamp() throws Exception {
    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT).addFieldNames("time").addFieldNames("union").addFieldNames("decimal").addSubtypes(1).addSubtypes(2).addSubtypes(5).build());
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.TIMESTAMP).build());
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.UNION).addSubtypes(3).addSubtypes(4).build());
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build());
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRING).build());
    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.DECIMAL).build());
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = OrcStruct.createObjectInspector(0, types);
    }
    HiveDecimal maxValue = HiveDecimal.create("10000000000000000000");
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(1000).compress(CompressionKind.NONE).batchSize(1000).bufferSize(100).blockPadding(false));
    OrcStruct row = new OrcStruct(3);
    OrcUnion union = new OrcUnion();
    row.setFieldValue(1, union);
    row.setFieldValue(0, new TimestampWritable(Timestamp.valueOf("2000-03-12 15:00:00")));
    HiveDecimal value = HiveDecimal.create("12345678.6547456");
    row.setFieldValue(2, new HiveDecimalWritable(value));
    union.set((byte) 0, new IntWritable(42));
    writer.addRow(row);
    row.setFieldValue(0, new TimestampWritable(Timestamp.valueOf("2000-03-20 12:00:00.123456789")));
    union.set((byte) 1, new Text("hello"));
    value = HiveDecimal.create("-5643.234");
    row.setFieldValue(2, new HiveDecimalWritable(value));
    writer.addRow(row);
    row.setFieldValue(0, null);
    row.setFieldValue(1, null);
    row.setFieldValue(2, null);
    writer.addRow(row);
    row.setFieldValue(1, union);
    union.set((byte) 0, null);
    writer.addRow(row);
    union.set((byte) 1, null);
    writer.addRow(row);
    union.set((byte) 0, new IntWritable(200000));
    row.setFieldValue(0, new TimestampWritable(Timestamp.valueOf("1970-01-01 00:00:00")));
    value = HiveDecimal.create("10000000000000000000");
    row.setFieldValue(2, new HiveDecimalWritable(value));
    writer.addRow(row);
    Random rand = new Random(42);
    for (int i = 1970; i < 2038; ++i) {
        row.setFieldValue(0, new TimestampWritable(Timestamp.valueOf(i + "-05-05 12:34:56." + i)));
        if ((i & 1) == 0) {
            union.set((byte) 0, new IntWritable(i * i));
        } else {
            union.set((byte) 1, new Text(Integer.toString(i * i)));
        }
        value = HiveDecimal.create(new BigInteger(64, rand), rand.nextInt(18));
        row.setFieldValue(2, new HiveDecimalWritable(value));
        if (maxValue.compareTo(value) < 0) {
            maxValue = value;
        }
        writer.addRow(row);
    }
    // let's add a lot of constant rows to test the rle
    row.setFieldValue(0, null);
    union.set((byte) 0, new IntWritable(1732050807));
    row.setFieldValue(2, null);
    for (int i = 0; i < 5000; ++i) {
        writer.addRow(row);
    }
    union.set((byte) 0, new IntWritable(0));
    writer.addRow(row);
    union.set((byte) 0, new IntWritable(10));
    writer.addRow(row);
    union.set((byte) 0, new IntWritable(138));
    writer.addRow(row);
    writer.close();
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    TypeDescription schema = writer.getSchema();
    assertEquals(5, schema.getMaximumId());
    boolean[] expected = new boolean[] { false, false, false, false, false, false };
    boolean[] included = OrcUtils.includeColumns("", schema);
    assertEquals(true, Arrays.equals(expected, included));
    expected = new boolean[] { false, true, false, false, false, true };
    included = OrcUtils.includeColumns("time,decimal", schema);
    assertEquals(true, Arrays.equals(expected, included));
    expected = new boolean[] { false, false, true, true, true, false };
    included = OrcUtils.includeColumns("union", schema);
    assertEquals(true, Arrays.equals(expected, included));
    assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
    assertEquals(5077, reader.getNumberOfRows());
    DecimalColumnStatistics stats = (DecimalColumnStatistics) reader.getStatistics()[5];
    assertEquals(71, stats.getNumberOfValues());
    assertEquals(HiveDecimal.create("-5643.234"), stats.getMinimum());
    assertEquals(maxValue, stats.getMaximum());
    // TODO: fix this
    // assertEquals(null,stats.getSum());
    int stripeCount = 0;
    int rowCount = 0;
    long currentOffset = -1;
    for (StripeInformation stripe : reader.getStripes()) {
        stripeCount += 1;
        rowCount += stripe.getNumberOfRows();
        if (currentOffset < 0) {
            currentOffset = stripe.getOffset() + stripe.getLength();
        } else {
            assertEquals(currentOffset, stripe.getOffset());
            currentOffset += stripe.getLength();
        }
    }
    assertEquals(reader.getNumberOfRows(), rowCount);
    assertEquals(2, stripeCount);
    assertEquals(reader.getContentLength(), currentOffset);
    RecordReader rows = reader.rows();
    assertEquals(0, rows.getRowNumber());
    assertEquals(0.0, rows.getProgress(), 0.000001);
    assertEquals(true, rows.hasNext());
    row = (OrcStruct) rows.next(null);
    assertEquals(1, rows.getRowNumber());
    inspector = reader.getObjectInspector();
    assertEquals("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>", inspector.getTypeName());
    assertEquals(new TimestampWritable(Timestamp.valueOf("2000-03-12 15:00:00")), row.getFieldValue(0));
    union = (OrcUnion) row.getFieldValue(1);
    assertEquals(0, union.getTag());
    assertEquals(new IntWritable(42), union.getObject());
    assertEquals(new HiveDecimalWritable(HiveDecimal.create("12345678.6547456")), row.getFieldValue(2));
    row = (OrcStruct) rows.next(row);
    assertEquals(2, rows.getRowNumber());
    assertEquals(new TimestampWritable(Timestamp.valueOf("2000-03-20 12:00:00.123456789")), row.getFieldValue(0));
    assertEquals(1, union.getTag());
    assertEquals(new Text("hello"), union.getObject());
    assertEquals(new HiveDecimalWritable(HiveDecimal.create("-5643.234")), row.getFieldValue(2));
    row = (OrcStruct) rows.next(row);
    assertEquals(null, row.getFieldValue(0));
    assertEquals(null, row.getFieldValue(1));
    assertEquals(null, row.getFieldValue(2));
    row = (OrcStruct) rows.next(row);
    assertEquals(null, row.getFieldValue(0));
    union = (OrcUnion) row.getFieldValue(1);
    assertEquals(0, union.getTag());
    assertEquals(null, union.getObject());
    assertEquals(null, row.getFieldValue(2));
    row = (OrcStruct) rows.next(row);
    assertEquals(null, row.getFieldValue(0));
    assertEquals(1, union.getTag());
    assertEquals(null, union.getObject());
    assertEquals(null, row.getFieldValue(2));
    row = (OrcStruct) rows.next(row);
    assertEquals(new TimestampWritable(Timestamp.valueOf("1970-01-01 00:00:00")), row.getFieldValue(0));
    assertEquals(new IntWritable(200000), union.getObject());
    assertEquals(new HiveDecimalWritable(HiveDecimal.create("10000000000000000000")), row.getFieldValue(2));
    rand = new Random(42);
    for (int i = 1970; i < 2038; ++i) {
        row = (OrcStruct) rows.next(row);
        assertEquals(new TimestampWritable(Timestamp.valueOf(i + "-05-05 12:34:56." + i)), row.getFieldValue(0));
        if ((i & 1) == 0) {
            assertEquals(0, union.getTag());
            assertEquals(new IntWritable(i * i), union.getObject());
        } else {
            assertEquals(1, union.getTag());
            assertEquals(new Text(Integer.toString(i * i)), union.getObject());
        }
        assertEquals(new HiveDecimalWritable(HiveDecimal.create(new BigInteger(64, rand), rand.nextInt(18))), row.getFieldValue(2));
    }
    for (int i = 0; i < 5000; ++i) {
        row = (OrcStruct) rows.next(row);
        assertEquals(new IntWritable(1732050807), union.getObject());
    }
    row = (OrcStruct) rows.next(row);
    assertEquals(new IntWritable(0), union.getObject());
    row = (OrcStruct) rows.next(row);
    assertEquals(new IntWritable(10), union.getObject());
    row = (OrcStruct) rows.next(row);
    assertEquals(new IntWritable(138), union.getObject());
    assertEquals(false, rows.hasNext());
    assertEquals(1.0, rows.getProgress(), 0.00001);
    assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
    rows.seekToRow(1);
    row = (OrcStruct) rows.next(row);
    assertEquals(new TimestampWritable(Timestamp.valueOf("2000-03-20 12:00:00.123456789")), row.getFieldValue(0));
    assertEquals(1, union.getTag());
    assertEquals(new Text("hello"), union.getObject());
    assertEquals(new HiveDecimalWritable(HiveDecimal.create("-5643.234")), row.getFieldValue(2));
    rows.close();
}
Also used : HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) OrcProto(org.apache.orc.OrcProto) DecimalColumnStatistics(org.apache.orc.DecimalColumnStatistics) ArrayList(java.util.ArrayList) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) Text(org.apache.hadoop.io.Text) Random(java.util.Random) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) BigInteger(java.math.BigInteger) TypeDescription(org.apache.orc.TypeDescription) IntWritable(org.apache.hadoop.io.IntWritable) StripeInformation(org.apache.orc.StripeInformation) Test(org.junit.Test)

Example 83 with TimestampWritable

use of org.apache.hadoop.hive.serde2.io.TimestampWritable in project hive by apache.

the class BinarySortableSerDe method deserialize.

static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, byte nullMarker, byte notNullMarker, Object reuse) throws IOException {
    // Is this field a null?
    byte isNull = buffer.read(invert);
    if (isNull == nullMarker) {
        return null;
    }
    assert (isNull == notNullMarker);
    switch(type.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
                switch(ptype.getPrimitiveCategory()) {
                    case VOID:
                        {
                            return null;
                        }
                    case BOOLEAN:
                        {
                            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
                            byte b = buffer.read(invert);
                            assert (b == 1 || b == 2);
                            r.set(b == 2);
                            return r;
                        }
                    case BYTE:
                        {
                            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
                            r.set((byte) (buffer.read(invert) ^ 0x80));
                            return r;
                        }
                    case SHORT:
                        {
                            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
                            int v = buffer.read(invert) ^ 0x80;
                            v = (v << 8) + (buffer.read(invert) & 0xff);
                            r.set((short) v);
                            return r;
                        }
                    case INT:
                        {
                            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
                            r.set(deserializeInt(buffer, invert));
                            return r;
                        }
                    case LONG:
                        {
                            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
                            r.set(deserializeLong(buffer, invert));
                            return r;
                        }
                    case FLOAT:
                        {
                            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
                            int v = 0;
                            for (int i = 0; i < 4; i++) {
                                v = (v << 8) + (buffer.read(invert) & 0xff);
                            }
                            if ((v & (1 << 31)) == 0) {
                                // negative number, flip all bits
                                v = ~v;
                            } else {
                                // positive number, flip the first bit
                                v = v ^ (1 << 31);
                            }
                            r.set(Float.intBitsToFloat(v));
                            return r;
                        }
                    case DOUBLE:
                        {
                            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
                            long v = 0;
                            for (int i = 0; i < 8; i++) {
                                v = (v << 8) + (buffer.read(invert) & 0xff);
                            }
                            if ((v & (1L << 63)) == 0) {
                                // negative number, flip all bits
                                v = ~v;
                            } else {
                                // positive number, flip the first bit
                                v = v ^ (1L << 63);
                            }
                            r.set(Double.longBitsToDouble(v));
                            return r;
                        }
                    case STRING:
                        {
                            Text r = reuse == null ? new Text() : (Text) reuse;
                            return deserializeText(buffer, invert, r);
                        }
                    case CHAR:
                        {
                            HiveCharWritable r = reuse == null ? new HiveCharWritable() : (HiveCharWritable) reuse;
                            // Use internal text member to read value
                            deserializeText(buffer, invert, r.getTextValue());
                            r.enforceMaxLength(getCharacterMaxLength(type));
                            return r;
                        }
                    case VARCHAR:
                        {
                            HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse;
                            // Use HiveVarchar's internal Text member to read the value.
                            deserializeText(buffer, invert, r.getTextValue());
                            // If we cache helper data for deserialization we could avoid having
                            // to call getVarcharMaxLength() on every deserialize call.
                            r.enforceMaxLength(getCharacterMaxLength(type));
                            return r;
                        }
                    case BINARY:
                        {
                            BytesWritable bw = new BytesWritable();
                            // Get the actual length first
                            int start = buffer.tell();
                            int length = 0;
                            do {
                                byte b = buffer.read(invert);
                                if (b == 0) {
                                    // end of string
                                    break;
                                }
                                if (b == 1) {
                                    // the last char is an escape char. read the actual char
                                    buffer.read(invert);
                                }
                                length++;
                            } while (true);
                            if (length == buffer.tell() - start) {
                                // No escaping happened, so we are already done.
                                bw.set(buffer.getData(), start, length);
                            } else {
                                // Escaping happened, we need to copy byte-by-byte.
                                // 1. Set the length first.
                                bw.set(buffer.getData(), start, length);
                                // 2. Reset the pointer.
                                buffer.seek(start);
                                // 3. Copy the data.
                                byte[] rdata = bw.getBytes();
                                for (int i = 0; i < length; i++) {
                                    byte b = buffer.read(invert);
                                    if (b == 1) {
                                        // The last char is an escape char, read the actual char.
                                        // The serialization format escape \0 to \1, and \1 to \2,
                                        // to make sure the string is null-terminated.
                                        b = (byte) (buffer.read(invert) - 1);
                                    }
                                    rdata[i] = b;
                                }
                                // 4. Read the null terminator.
                                byte b = buffer.read(invert);
                                assert (b == 0);
                            }
                            return bw;
                        }
                    case DATE:
                        {
                            DateWritable d = reuse == null ? new DateWritable() : (DateWritable) reuse;
                            d.set(deserializeInt(buffer, invert));
                            return d;
                        }
                    case TIMESTAMP:
                        TimestampWritable t = (reuse == null ? new TimestampWritable() : (TimestampWritable) reuse);
                        byte[] bytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH];
                        for (int i = 0; i < bytes.length; i++) {
                            bytes[i] = buffer.read(invert);
                        }
                        t.setBinarySortable(bytes, 0);
                        return t;
                    case TIMESTAMPLOCALTZ:
                        TimestampLocalTZWritable tstz = (reuse == null ? new TimestampLocalTZWritable() : (TimestampLocalTZWritable) reuse);
                        byte[] data = new byte[TimestampLocalTZWritable.BINARY_SORTABLE_LENGTH];
                        for (int i = 0; i < data.length; i++) {
                            data[i] = buffer.read(invert);
                        }
                        // Across MR process boundary tz is normalized and stored in type
                        // and is not carried in data for each row.
                        tstz.fromBinarySortable(data, 0, ((TimestampLocalTZTypeInfo) type).timeZone());
                        return tstz;
                    case INTERVAL_YEAR_MONTH:
                        {
                            HiveIntervalYearMonthWritable i = reuse == null ? new HiveIntervalYearMonthWritable() : (HiveIntervalYearMonthWritable) reuse;
                            i.set(deserializeInt(buffer, invert));
                            return i;
                        }
                    case INTERVAL_DAY_TIME:
                        {
                            HiveIntervalDayTimeWritable i = reuse == null ? new HiveIntervalDayTimeWritable() : (HiveIntervalDayTimeWritable) reuse;
                            long totalSecs = deserializeLong(buffer, invert);
                            int nanos = deserializeInt(buffer, invert);
                            i.set(totalSecs, nanos);
                            return i;
                        }
                    case DECIMAL:
                        {
                            // See serialization of decimal for explanation (below)
                            HiveDecimalWritable bdw = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse);
                            int b = buffer.read(invert) - 1;
                            assert (b == 1 || b == -1 || b == 0);
                            boolean positive = b != -1;
                            int factor = buffer.read(invert) ^ 0x80;
                            for (int i = 0; i < 3; i++) {
                                factor = (factor << 8) + (buffer.read(invert) & 0xff);
                            }
                            if (!positive) {
                                factor = -factor;
                            }
                            int start = buffer.tell();
                            int length = 0;
                            do {
                                b = buffer.read(positive ? invert : !invert);
                                assert (b != 1);
                                if (b == 0) {
                                    // end of digits
                                    break;
                                }
                                length++;
                            } while (true);
                            final byte[] decimalBuffer = new byte[length];
                            buffer.seek(start);
                            for (int i = 0; i < length; ++i) {
                                decimalBuffer[i] = buffer.read(positive ? invert : !invert);
                            }
                            // read the null byte again
                            buffer.read(positive ? invert : !invert);
                            String digits = new String(decimalBuffer, 0, length, decimalCharSet);
                            BigInteger bi = new BigInteger(digits);
                            HiveDecimal bd = HiveDecimal.create(bi).scaleByPowerOfTen(factor - length);
                            if (!positive) {
                                bd = bd.negate();
                            }
                            bdw.set(bd);
                            return bdw;
                        }
                    default:
                        {
                            throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
                        }
                }
            }
        case LIST:
            {
                ListTypeInfo ltype = (ListTypeInfo) type;
                TypeInfo etype = ltype.getListElementTypeInfo();
                // Create the list if needed
                ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse;
                // Read the list
                int size = 0;
                while (true) {
                    int more = buffer.read(invert);
                    if (more == 0) {
                        // \0 to terminate
                        break;
                    }
                    // \1 followed by each element
                    assert (more == 1);
                    if (size == r.size()) {
                        r.add(null);
                    }
                    r.set(size, deserialize(buffer, etype, invert, nullMarker, notNullMarker, r.get(size)));
                    size++;
                }
                // Remove additional elements if the list is reused
                while (r.size() > size) {
                    r.remove(r.size() - 1);
                }
                return r;
            }
        case MAP:
            {
                MapTypeInfo mtype = (MapTypeInfo) type;
                TypeInfo ktype = mtype.getMapKeyTypeInfo();
                TypeInfo vtype = mtype.getMapValueTypeInfo();
                // Create the map if needed
                Map<Object, Object> r;
                if (reuse == null) {
                    r = new HashMap<Object, Object>();
                } else {
                    r = (HashMap<Object, Object>) reuse;
                    r.clear();
                }
                while (true) {
                    int more = buffer.read(invert);
                    if (more == 0) {
                        // \0 to terminate
                        break;
                    }
                    // \1 followed by each key and then each value
                    assert (more == 1);
                    Object k = deserialize(buffer, ktype, invert, nullMarker, notNullMarker, null);
                    Object v = deserialize(buffer, vtype, invert, nullMarker, notNullMarker, null);
                    r.put(k, v);
                }
                return r;
            }
        case STRUCT:
            {
                StructTypeInfo stype = (StructTypeInfo) type;
                List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos();
                int size = fieldTypes.size();
                // Create the struct if needed
                ArrayList<Object> r = reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse;
                assert (r.size() <= size);
                // Set the size of the struct
                while (r.size() < size) {
                    r.add(null);
                }
                // Read one field by one field
                for (int eid = 0; eid < size; eid++) {
                    r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, nullMarker, notNullMarker, r.get(eid)));
                }
                return r;
            }
        case UNION:
            {
                UnionTypeInfo utype = (UnionTypeInfo) type;
                StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse;
                // Read the tag
                byte tag = buffer.read(invert);
                r.setTag(tag);
                r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, nullMarker, notNullMarker, null));
                return r;
            }
        default:
            {
                throw new RuntimeException("Unrecognized type: " + type.getCategory());
            }
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) List(java.util.List) ArrayList(java.util.ArrayList) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampLocalTZTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) TimestampLocalTZTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) TimestampLocalTZWritable(org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StandardUnion(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion) BigInteger(java.math.BigInteger) Map(java.util.Map) HashMap(java.util.HashMap) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 84 with TimestampWritable

use of org.apache.hadoop.hive.serde2.io.TimestampWritable in project hive by apache.

the class BinarySortableSerDe method serialize.

static void serialize(ByteStream.Output buffer, Object o, ObjectInspector oi, boolean invert, byte nullMarker, byte notNullMarker) throws SerDeException {
    // Is this field a null?
    if (o == null) {
        writeByte(buffer, nullMarker, invert);
        return;
    }
    // This field is not a null.
    writeByte(buffer, notNullMarker, invert);
    switch(oi.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
                switch(poi.getPrimitiveCategory()) {
                    case VOID:
                        {
                            return;
                        }
                    case BOOLEAN:
                        {
                            boolean v = ((BooleanObjectInspector) poi).get(o);
                            writeByte(buffer, (byte) (v ? 2 : 1), invert);
                            return;
                        }
                    case BYTE:
                        {
                            ByteObjectInspector boi = (ByteObjectInspector) poi;
                            byte v = boi.get(o);
                            writeByte(buffer, (byte) (v ^ 0x80), invert);
                            return;
                        }
                    case SHORT:
                        {
                            ShortObjectInspector spoi = (ShortObjectInspector) poi;
                            short v = spoi.get(o);
                            serializeShort(buffer, v, invert);
                            return;
                        }
                    case INT:
                        {
                            IntObjectInspector ioi = (IntObjectInspector) poi;
                            int v = ioi.get(o);
                            serializeInt(buffer, v, invert);
                            return;
                        }
                    case LONG:
                        {
                            LongObjectInspector loi = (LongObjectInspector) poi;
                            long v = loi.get(o);
                            serializeLong(buffer, v, invert);
                            return;
                        }
                    case FLOAT:
                        {
                            FloatObjectInspector foi = (FloatObjectInspector) poi;
                            serializeFloat(buffer, foi.get(o), invert);
                            return;
                        }
                    case DOUBLE:
                        {
                            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
                            serializeDouble(buffer, doi.get(o), invert);
                            return;
                        }
                    case STRING:
                        {
                            StringObjectInspector soi = (StringObjectInspector) poi;
                            Text t = soi.getPrimitiveWritableObject(o);
                            serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
                            return;
                        }
                    case CHAR:
                        {
                            HiveCharObjectInspector hcoi = (HiveCharObjectInspector) poi;
                            HiveCharWritable hc = hcoi.getPrimitiveWritableObject(o);
                            // Trailing space should ignored for char comparisons.
                            // So write stripped values for this SerDe.
                            Text t = hc.getStrippedValue();
                            serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
                            return;
                        }
                    case VARCHAR:
                        {
                            HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi;
                            HiveVarcharWritable hc = hcoi.getPrimitiveWritableObject(o);
                            // use varchar's text field directly
                            Text t = hc.getTextValue();
                            serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
                            return;
                        }
                    case BINARY:
                        {
                            BinaryObjectInspector baoi = (BinaryObjectInspector) poi;
                            BytesWritable ba = baoi.getPrimitiveWritableObject(o);
                            byte[] toSer = new byte[ba.getLength()];
                            System.arraycopy(ba.getBytes(), 0, toSer, 0, ba.getLength());
                            serializeBytes(buffer, toSer, ba.getLength(), invert);
                            return;
                        }
                    case DATE:
                        {
                            DateObjectInspector doi = (DateObjectInspector) poi;
                            int v = doi.getPrimitiveWritableObject(o).getDays();
                            serializeInt(buffer, v, invert);
                            return;
                        }
                    case TIMESTAMP:
                        {
                            TimestampObjectInspector toi = (TimestampObjectInspector) poi;
                            TimestampWritable t = toi.getPrimitiveWritableObject(o);
                            serializeTimestampWritable(buffer, t, invert);
                            return;
                        }
                    case TIMESTAMPLOCALTZ:
                        {
                            TimestampLocalTZObjectInspector toi = (TimestampLocalTZObjectInspector) poi;
                            TimestampLocalTZWritable t = toi.getPrimitiveWritableObject(o);
                            serializeTimestampTZWritable(buffer, t, invert);
                            return;
                        }
                    case INTERVAL_YEAR_MONTH:
                        {
                            HiveIntervalYearMonthObjectInspector ioi = (HiveIntervalYearMonthObjectInspector) poi;
                            HiveIntervalYearMonth intervalYearMonth = ioi.getPrimitiveJavaObject(o);
                            serializeHiveIntervalYearMonth(buffer, intervalYearMonth, invert);
                            return;
                        }
                    case INTERVAL_DAY_TIME:
                        {
                            HiveIntervalDayTimeObjectInspector ioi = (HiveIntervalDayTimeObjectInspector) poi;
                            HiveIntervalDayTime intervalDayTime = ioi.getPrimitiveJavaObject(o);
                            serializeHiveIntervalDayTime(buffer, intervalDayTime, invert);
                            return;
                        }
                    case DECIMAL:
                        {
                            HiveDecimalObjectInspector boi = (HiveDecimalObjectInspector) poi;
                            HiveDecimal dec = boi.getPrimitiveJavaObject(o);
                            serializeHiveDecimal(buffer, dec, invert);
                            return;
                        }
                    default:
                        {
                            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
                        }
                }
            }
        case LIST:
            {
                ListObjectInspector loi = (ListObjectInspector) oi;
                ObjectInspector eoi = loi.getListElementObjectInspector();
                // \1 followed by each element
                int size = loi.getListLength(o);
                for (int eid = 0; eid < size; eid++) {
                    writeByte(buffer, (byte) 1, invert);
                    serialize(buffer, loi.getListElement(o, eid), eoi, invert, nullMarker, notNullMarker);
                }
                // and \0 to terminate
                writeByte(buffer, (byte) 0, invert);
                return;
            }
        case MAP:
            {
                MapObjectInspector moi = (MapObjectInspector) oi;
                ObjectInspector koi = moi.getMapKeyObjectInspector();
                ObjectInspector voi = moi.getMapValueObjectInspector();
                // \1 followed by each key and then each value
                Map<?, ?> map = moi.getMap(o);
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    writeByte(buffer, (byte) 1, invert);
                    serialize(buffer, entry.getKey(), koi, invert, nullMarker, notNullMarker);
                    serialize(buffer, entry.getValue(), voi, invert, nullMarker, notNullMarker);
                }
                // and \0 to terminate
                writeByte(buffer, (byte) 0, invert);
                return;
            }
        case STRUCT:
            {
                StructObjectInspector soi = (StructObjectInspector) oi;
                List<? extends StructField> fields = soi.getAllStructFieldRefs();
                for (int i = 0; i < fields.size(); i++) {
                    serialize(buffer, soi.getStructFieldData(o, fields.get(i)), fields.get(i).getFieldObjectInspector(), invert, nullMarker, notNullMarker);
                }
                return;
            }
        case UNION:
            {
                UnionObjectInspector uoi = (UnionObjectInspector) oi;
                byte tag = uoi.getTag(o);
                writeByte(buffer, tag, invert);
                serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), invert, nullMarker, notNullMarker);
                return;
            }
        default:
            {
                throw new RuntimeException("Unrecognized type: " + oi.getCategory());
            }
    }
}
Also used : LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) List(java.util.List) ArrayList(java.util.ArrayList) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) HiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalYearMonthObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) HiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalDayTimeObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalDayTimeObjectInspector) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalYearMonthObjectInspector) TimestampLocalTZWritable(org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) Map(java.util.Map) HashMap(java.util.HashMap) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 85 with TimestampWritable

use of org.apache.hadoop.hive.serde2.io.TimestampWritable in project hive by apache.

the class WritableHiveVarcharObjectInspector method getPrimitiveWritableObject.

@Override
public HiveVarcharWritable getPrimitiveWritableObject(Object o) {
    // then output new writable with correct params.
    if (o == null) {
        return null;
    }
    if ((o instanceof Text) || (o instanceof TimestampWritable) || (o instanceof HiveDecimalWritable) || (o instanceof DoubleWritable) || (o instanceof FloatWritable) || (o instanceof LongWritable) || (o instanceof IntWritable) || (o instanceof BooleanWritable)) {
        String str = o.toString();
        HiveVarcharWritable hcw = new HiveVarcharWritable();
        hcw.set(str, ((VarcharTypeInfo) typeInfo).getLength());
        return hcw;
    }
    HiveVarcharWritable writable = ((HiveVarcharWritable) o);
    if (doesWritableMatchTypeParams((HiveVarcharWritable) o)) {
        return writable;
    }
    return getWritableWithParams(writable);
}
Also used : FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)87 IntWritable (org.apache.hadoop.io.IntWritable)41 Text (org.apache.hadoop.io.Text)34 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)33 Timestamp (java.sql.Timestamp)30 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)29 LongWritable (org.apache.hadoop.io.LongWritable)28 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)25 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)25 BooleanWritable (org.apache.hadoop.io.BooleanWritable)25 FloatWritable (org.apache.hadoop.io.FloatWritable)25 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)24 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)24 HiveIntervalDayTimeWritable (org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable)23 BytesWritable (org.apache.hadoop.io.BytesWritable)23 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)20 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)20 HiveVarcharWritable (org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)20 HiveIntervalYearMonthWritable (org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable)19 Test (org.junit.Test)19