Search in sources :

Example 16 with StringObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project haivvreo by jghoman.

the class TestAvroDeserializer method verifyNullableType.

private void verifyNullableType(GenericData.Record record, Schema s, String expected) throws SerDeException, IOException {
    assertTrue(GENERIC_DATA.validate(s, record));
    AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    AvroDeserializer de = new AvroDeserializer();
    ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
    assertEquals(1, row.size());
    Object rowElement = row.get(0);
    StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
    List<Object> fieldsDataAsList = oi.getStructFieldsDataAsList(row);
    assertEquals(1, fieldsDataAsList.size());
    StructField fieldRef = oi.getStructFieldRef("nullablestring");
    ObjectInspector fieldObjectInspector = fieldRef.getFieldObjectInspector();
    StringObjectInspector soi = (StringObjectInspector) fieldObjectInspector;
    if (expected == null)
        assertNull(soi.getPrimitiveJavaObject(rowElement));
    else
        assertEquals("this is a string", soi.getPrimitiveJavaObject(rowElement));
}
Also used : VoidObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector) JavaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ArrayList(java.util.ArrayList) JavaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)

Example 17 with StringObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project haivvreo by jghoman.

the class TestAvroDeserializer method canDeserializeEnums.

// Enums are one of two types we fudge for Hive. Enums go in, Strings come out.
@Test
public void canDeserializeEnums() throws SerDeException, IOException {
    Schema s = Schema.parse(TestAvroObjectInspectorGenerator.ENUM_SCHEMA);
    GenericData.Record record = new GenericData.Record(s);
    record.put("baddies", "DALEKS");
    assertTrue(GENERIC_DATA.validate(s, record));
    AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    AvroDeserializer de = new AvroDeserializer();
    ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
    assertEquals(1, row.size());
    StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
    assertEquals(1, fieldRefs.size());
    StructField fieldRef = fieldRefs.get(0);
    assertEquals("baddies", fieldRef.getFieldName());
    Object theStringObject = oi.getStructFieldData(row, fieldRef);
    assertTrue(fieldRef.getFieldObjectInspector() instanceof StringObjectInspector);
    StringObjectInspector soi = (StringObjectInspector) fieldRef.getFieldObjectInspector();
    String finalValue = soi.getPrimitiveJavaObject(theStringObject);
    assertEquals("DALEKS", finalValue);
}
Also used : Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) JavaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) Test(org.junit.Test)

Example 18 with StringObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project hive by apache.

the class TestOrcSerDeStats method testStringAndBinaryStatistics.

@Test
public void testStringAndBinaryStatistics() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcSerDeStats.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(SimpleStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
    writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4), "foo"));
    writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3), "bar"));
    writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4, 5), null));
    writer.addRow(new SimpleStruct(null, "hi"));
    writer.close();
    assertEquals(4, writer.getNumberOfRows());
    assertEquals(273, writer.getRawDataSize());
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    assertEquals(4, reader.getNumberOfRows());
    assertEquals(273, reader.getRawDataSize());
    assertEquals(15, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
    assertEquals(258, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1")));
    assertEquals(273, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "string1")));
    // check the stats
    ColumnStatistics[] stats = reader.getStatistics();
    assertEquals(4, stats[0].getNumberOfValues());
    assertEquals("count: 4 hasNull: false", stats[0].toString());
    assertEquals(3, stats[1].getNumberOfValues());
    assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
    assertEquals("count: 3 hasNull: true sum: 15", stats[1].toString());
    assertEquals(3, stats[2].getNumberOfValues());
    assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
    assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
    assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
    assertEquals("count: 3 hasNull: true min: bar max: hi sum: 8", stats[2].toString());
    // check the inspectors
    StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
    assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
    assertEquals("struct<bytes1:binary,string1:string>", readerInspector.getTypeName());
    List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.getStructFieldRef("bytes1").getFieldObjectInspector();
    StringObjectInspector st = (StringObjectInspector) readerInspector.getStructFieldRef("string1").getFieldObjectInspector();
    RecordReader rows = reader.rows();
    Object row = rows.next(null);
    assertNotNull(row);
    // check the contents of the first row
    assertEquals(bytes(0, 1, 2, 3, 4), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals("foo", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
    // check the contents of second row
    assertEquals(true, rows.hasNext());
    row = rows.next(row);
    assertEquals(bytes(0, 1, 2, 3), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals("bar", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
    // check the contents of second row
    assertEquals(true, rows.hasNext());
    row = rows.next(row);
    assertEquals(bytes(0, 1, 2, 3, 4, 5), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
    assertNull(st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
    // check the contents of second row
    assertEquals(true, rows.hasNext());
    row = rows.next(row);
    assertNull(bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
    // handle the close up
    assertEquals(false, rows.hasNext());
    rows.close();
}
Also used : DoubleColumnStatistics(org.apache.orc.DoubleColumnStatistics) IntegerColumnStatistics(org.apache.orc.IntegerColumnStatistics) BooleanColumnStatistics(org.apache.orc.BooleanColumnStatistics) StringColumnStatistics(org.apache.orc.StringColumnStatistics) ColumnStatistics(org.apache.orc.ColumnStatistics) BinaryColumnStatistics(org.apache.orc.BinaryColumnStatistics) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 19 with StringObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project hive by apache.

the class RegexSerDe method serialize.

@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    if (outputFormatString == null) {
        throw new SerDeException("Cannot write data into table because \"output.format.string\"" + " is not specified in serde properties of the table.");
    }
    // Get all the fields out.
    // NOTE: The correct way to get fields out of the row is to use
    // objInspector.
    // The obj can be a Java ArrayList, or a Java class, or a byte[] or
    // whatever.
    // The only way to access the data inside the obj is through
    // ObjectInspector.
    StructObjectInspector outputRowOI = (StructObjectInspector) objInspector;
    List<? extends StructField> outputFieldRefs = outputRowOI.getAllStructFieldRefs();
    if (outputFieldRefs.size() != numColumns) {
        throw new SerDeException("Cannot serialize the object because there are " + outputFieldRefs.size() + " fields but the table has " + numColumns + " columns.");
    }
    // Get all data out.
    for (int c = 0; c < numColumns; c++) {
        Object field = outputRowOI.getStructFieldData(obj, outputFieldRefs.get(c));
        ObjectInspector fieldOI = outputFieldRefs.get(c).getFieldObjectInspector();
        // The data must be of type String
        StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI;
        // Convert the field to Java class String, because objects of String type
        // can be
        // stored in String, Text, or some other classes.
        outputFields[c] = fieldStringOI.getPrimitiveJavaObject(field);
    }
    // Format the String
    String outputRowString = null;
    try {
        outputRowString = String.format(outputFormatString, outputFields);
    } catch (MissingFormatArgumentException e) {
        throw new SerDeException("The table contains " + numColumns + " columns, but the outputFormatString is asking for more.", e);
    }
    outputRowText.set(outputRowString);
    return outputRowText;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) MissingFormatArgumentException(java.util.MissingFormatArgumentException) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 20 with StringObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project hive by apache.

the class TypedBytesSerDe method serializeField.

private void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException {
    switch(oi.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
                switch(poi.getPrimitiveCategory()) {
                    case VOID:
                        {
                            return;
                        }
                    case BOOLEAN:
                        {
                            BooleanObjectInspector boi = (BooleanObjectInspector) poi;
                            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
                            r.set(boi.get(o));
                            tbOut.write(r);
                            return;
                        }
                    case BYTE:
                        {
                            ByteObjectInspector boi = (ByteObjectInspector) poi;
                            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
                            r.set(boi.get(o));
                            tbOut.write(r);
                            return;
                        }
                    case SHORT:
                        {
                            ShortObjectInspector spoi = (ShortObjectInspector) poi;
                            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
                            r.set(spoi.get(o));
                            tbOut.write(r);
                            return;
                        }
                    case INT:
                        {
                            IntObjectInspector ioi = (IntObjectInspector) poi;
                            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
                            r.set(ioi.get(o));
                            tbOut.write(r);
                            return;
                        }
                    case LONG:
                        {
                            LongObjectInspector loi = (LongObjectInspector) poi;
                            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
                            r.set(loi.get(o));
                            tbOut.write(r);
                            return;
                        }
                    case FLOAT:
                        {
                            FloatObjectInspector foi = (FloatObjectInspector) poi;
                            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
                            r.set(foi.get(o));
                            tbOut.write(r);
                            return;
                        }
                    case DOUBLE:
                        {
                            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
                            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
                            r.set(doi.get(o));
                            tbOut.write(r);
                            return;
                        }
                    case STRING:
                        {
                            StringObjectInspector soi = (StringObjectInspector) poi;
                            Text t = soi.getPrimitiveWritableObject(o);
                            tbOut.write(t);
                            return;
                        }
                    default:
                        {
                            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
                        }
                }
            }
        case LIST:
        case MAP:
        case STRUCT:
            {
                // For complex object, serialize to JSON format
                String s = SerDeUtils.getJSONString(o, oi);
                Text t = reuse == null ? new Text() : (Text) reuse;
                // convert to Text and write it
                t.set(s);
                tbOut.write(t);
            }
        default:
            {
                throw new RuntimeException("Unrecognized type: " + oi.getCategory());
            }
    }
}
Also used : LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Text(org.apache.hadoop.io.Text) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) FloatWritable(org.apache.hadoop.io.FloatWritable) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) LongWritable(org.apache.hadoop.io.LongWritable) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)31 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)16 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)15 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)15 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)12 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)12 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)12 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)11 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)11 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)11 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)11 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)10 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)10 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)10 Text (org.apache.hadoop.io.Text)10 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)8 HiveCharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector)8 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)7 DateObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector)7 HiveVarcharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector)7