use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project haivvreo by jghoman.
the class TestAvroDeserializer method verifyNullableType.
private void verifyNullableType(GenericData.Record record, Schema s, String expected) throws SerDeException, IOException {
assertTrue(GENERIC_DATA.validate(s, record));
AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
AvroDeserializer de = new AvroDeserializer();
ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
assertEquals(1, row.size());
Object rowElement = row.get(0);
StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
List<Object> fieldsDataAsList = oi.getStructFieldsDataAsList(row);
assertEquals(1, fieldsDataAsList.size());
StructField fieldRef = oi.getStructFieldRef("nullablestring");
ObjectInspector fieldObjectInspector = fieldRef.getFieldObjectInspector();
StringObjectInspector soi = (StringObjectInspector) fieldObjectInspector;
if (expected == null)
assertNull(soi.getPrimitiveJavaObject(rowElement));
else
assertEquals("this is a string", soi.getPrimitiveJavaObject(rowElement));
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project haivvreo by jghoman.
the class TestAvroDeserializer method canDeserializeEnums.
// Enums are one of two types we fudge for Hive. Enums go in, Strings come out.
@Test
public void canDeserializeEnums() throws SerDeException, IOException {
Schema s = Schema.parse(TestAvroObjectInspectorGenerator.ENUM_SCHEMA);
GenericData.Record record = new GenericData.Record(s);
record.put("baddies", "DALEKS");
assertTrue(GENERIC_DATA.validate(s, record));
AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
AvroDeserializer de = new AvroDeserializer();
ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
assertEquals(1, row.size());
StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
assertEquals(1, fieldRefs.size());
StructField fieldRef = fieldRefs.get(0);
assertEquals("baddies", fieldRef.getFieldName());
Object theStringObject = oi.getStructFieldData(row, fieldRef);
assertTrue(fieldRef.getFieldObjectInspector() instanceof StringObjectInspector);
StringObjectInspector soi = (StringObjectInspector) fieldRef.getFieldObjectInspector();
String finalValue = soi.getPrimitiveJavaObject(theStringObject);
assertEquals("DALEKS", finalValue);
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project hive by apache.
the class TestOrcSerDeStats method testStringAndBinaryStatistics.
@Test
public void testStringAndBinaryStatistics() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcSerDeStats.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(SimpleStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4), "foo"));
writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3), "bar"));
writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4, 5), null));
writer.addRow(new SimpleStruct(null, "hi"));
writer.close();
assertEquals(4, writer.getNumberOfRows());
assertEquals(273, writer.getRawDataSize());
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(4, reader.getNumberOfRows());
assertEquals(273, reader.getRawDataSize());
assertEquals(15, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
assertEquals(258, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1")));
assertEquals(273, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "string1")));
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(4, stats[0].getNumberOfValues());
assertEquals("count: 4 hasNull: false", stats[0].toString());
assertEquals(3, stats[1].getNumberOfValues());
assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
assertEquals("count: 3 hasNull: true sum: 15", stats[1].toString());
assertEquals(3, stats[2].getNumberOfValues());
assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
assertEquals("count: 3 hasNull: true min: bar max: hi sum: 8", stats[2].toString());
// check the inspectors
StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
assertEquals("struct<bytes1:binary,string1:string>", readerInspector.getTypeName());
List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.getStructFieldRef("bytes1").getFieldObjectInspector();
StringObjectInspector st = (StringObjectInspector) readerInspector.getStructFieldRef("string1").getFieldObjectInspector();
RecordReader rows = reader.rows();
Object row = rows.next(null);
assertNotNull(row);
// check the contents of the first row
assertEquals(bytes(0, 1, 2, 3, 4), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
assertEquals("foo", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
// check the contents of second row
assertEquals(true, rows.hasNext());
row = rows.next(row);
assertEquals(bytes(0, 1, 2, 3), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
assertEquals("bar", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
// check the contents of second row
assertEquals(true, rows.hasNext());
row = rows.next(row);
assertEquals(bytes(0, 1, 2, 3, 4, 5), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
assertNull(st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
// check the contents of second row
assertEquals(true, rows.hasNext());
row = rows.next(row);
assertNull(bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
// handle the close up
assertEquals(false, rows.hasNext());
rows.close();
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project hive by apache.
the class RegexSerDe method serialize.
@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
if (outputFormatString == null) {
throw new SerDeException("Cannot write data into table because \"output.format.string\"" + " is not specified in serde properties of the table.");
}
// Get all the fields out.
// NOTE: The correct way to get fields out of the row is to use
// objInspector.
// The obj can be a Java ArrayList, or a Java class, or a byte[] or
// whatever.
// The only way to access the data inside the obj is through
// ObjectInspector.
StructObjectInspector outputRowOI = (StructObjectInspector) objInspector;
List<? extends StructField> outputFieldRefs = outputRowOI.getAllStructFieldRefs();
if (outputFieldRefs.size() != numColumns) {
throw new SerDeException("Cannot serialize the object because there are " + outputFieldRefs.size() + " fields but the table has " + numColumns + " columns.");
}
// Get all data out.
for (int c = 0; c < numColumns; c++) {
Object field = outputRowOI.getStructFieldData(obj, outputFieldRefs.get(c));
ObjectInspector fieldOI = outputFieldRefs.get(c).getFieldObjectInspector();
// The data must be of type String
StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI;
// Convert the field to Java class String, because objects of String type
// can be
// stored in String, Text, or some other classes.
outputFields[c] = fieldStringOI.getPrimitiveJavaObject(field);
}
// Format the String
String outputRowString = null;
try {
outputRowString = String.format(outputFormatString, outputFields);
} catch (MissingFormatArgumentException e) {
throw new SerDeException("The table contains " + numColumns + " columns, but the outputFormatString is asking for more.", e);
}
outputRowText.set(outputRowString);
return outputRowText;
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector in project hive by apache.
the class TypedBytesSerDe method serializeField.
private void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException {
switch(oi.getCategory()) {
case PRIMITIVE:
{
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
switch(poi.getPrimitiveCategory()) {
case VOID:
{
return;
}
case BOOLEAN:
{
BooleanObjectInspector boi = (BooleanObjectInspector) poi;
BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
r.set(boi.get(o));
tbOut.write(r);
return;
}
case BYTE:
{
ByteObjectInspector boi = (ByteObjectInspector) poi;
ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
r.set(boi.get(o));
tbOut.write(r);
return;
}
case SHORT:
{
ShortObjectInspector spoi = (ShortObjectInspector) poi;
ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
r.set(spoi.get(o));
tbOut.write(r);
return;
}
case INT:
{
IntObjectInspector ioi = (IntObjectInspector) poi;
IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
r.set(ioi.get(o));
tbOut.write(r);
return;
}
case LONG:
{
LongObjectInspector loi = (LongObjectInspector) poi;
LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
r.set(loi.get(o));
tbOut.write(r);
return;
}
case FLOAT:
{
FloatObjectInspector foi = (FloatObjectInspector) poi;
FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
r.set(foi.get(o));
tbOut.write(r);
return;
}
case DOUBLE:
{
DoubleObjectInspector doi = (DoubleObjectInspector) poi;
DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
r.set(doi.get(o));
tbOut.write(r);
return;
}
case STRING:
{
StringObjectInspector soi = (StringObjectInspector) poi;
Text t = soi.getPrimitiveWritableObject(o);
tbOut.write(t);
return;
}
default:
{
throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
}
}
}
case LIST:
case MAP:
case STRUCT:
{
// For complex object, serialize to JSON format
String s = SerDeUtils.getJSONString(o, oi);
Text t = reuse == null ? new Text() : (Text) reuse;
// convert to Text and write it
t.set(s);
tbOut.write(t);
}
default:
{
throw new RuntimeException("Unrecognized type: " + oi.getCategory());
}
}
}
Aggregations