Search in sources :

Example 1 with StandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.

the class TestAvroDeserializer method canDeserializeEnums.

// Enums are one of two types we fudge for Hive. Enums go in, Strings come out.
@Test
public void canDeserializeEnums() throws SerDeException, IOException {
    Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.ENUM_SCHEMA);
    GenericData.Record record = new GenericData.Record(s);
    record.put("baddies", new GenericData.EnumSymbol(s.getField("baddies").schema(), "DALEKS"));
    assertTrue(GENERIC_DATA.validate(s, record));
    AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    AvroDeserializer de = new AvroDeserializer();
    ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
    assertEquals(1, row.size());
    StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
    assertEquals(1, fieldRefs.size());
    StructField fieldRef = fieldRefs.get(0);
    assertEquals("baddies", fieldRef.getFieldName());
    Object theStringObject = oi.getStructFieldData(row, fieldRef);
    assertTrue(fieldRef.getFieldObjectInspector() instanceof StringObjectInspector);
    StringObjectInspector soi = (StringObjectInspector) fieldRef.getFieldObjectInspector();
    String finalValue = soi.getPrimitiveJavaObject(theStringObject);
    assertEquals("DALEKS", finalValue);
}
Also used : Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) JavaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) Test(org.junit.Test)

Example 2 with StandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.

the class TestAvroDeserializer method canDeserializeMapsWithJavaLangStringKeys.

@Test
public void canDeserializeMapsWithJavaLangStringKeys() throws IOException, SerDeException {
    // Ensures maps can be deserialized when avro.java.string=String.
    // See http://stackoverflow.com/a/19868919/312944 for why that might be used.
    String schemaString = "{\n" + "  \"namespace\": \"testing\",\n" + "  \"name\": \"oneMap\",\n" + "  \"type\": \"record\",\n" + "  \"fields\": [\n" + "    {\n" + "      \"name\":\"aMap\",\n" + "      \"type\":{\"type\":\"map\",\n" + "      \"avro.java.string\":\"String\",\n" + "      \"values\":\"long\"}\n" + "\t}\n" + "  ]\n" + "}";
    Schema s = AvroSerdeUtils.getSchemaFor(schemaString);
    GenericData.Record record = new GenericData.Record(s);
    Map<String, Long> m = new Hashtable<String, Long>();
    m.put("one", 1l);
    m.put("two", 2l);
    m.put("three", 3l);
    record.put("aMap", m);
    assertTrue(GENERIC_DATA.validate(s, record));
    System.out.println("record = " + record);
    AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    AvroDeserializer de = new AvroDeserializer();
    ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
    assertEquals(1, row.size());
    Object theMapObject = row.get(0);
    assertTrue(theMapObject instanceof Map);
    Map theMap = (Map) theMapObject;
    // Verify the raw object that's been created
    assertEquals(1l, theMap.get("one"));
    assertEquals(2l, theMap.get("two"));
    assertEquals(3l, theMap.get("three"));
    // Verify that the provided object inspector can pull out these same values
    StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
    List<Object> z = oi.getStructFieldsDataAsList(row);
    assertEquals(1, z.size());
    StructField fieldRef = oi.getStructFieldRef("amap");
    Map theMap2 = (Map) oi.getStructFieldData(row, fieldRef);
    assertEquals(1l, theMap2.get("one"));
    assertEquals(2l, theMap2.get("two"));
    assertEquals(3l, theMap2.get("three"));
}
Also used : Hashtable(java.util.Hashtable) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 3 with StandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.

the class TestAvroDeserializer method canDeserializeFixed.

// Fixed doesn't exist in Hive. Fixeds go in, lists of bytes go out.
@Test
public void canDeserializeFixed() throws SerDeException, IOException {
    Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.FIXED_SCHEMA);
    GenericData.Record record = new GenericData.Record(s);
    byte[] bytes = "ANANCIENTBLUEBOX".getBytes();
    record.put("hash", new GenericData.Fixed(s, bytes));
    assertTrue(GENERIC_DATA.validate(s, record));
    AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    AvroDeserializer de = new AvroDeserializer();
    ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
    assertEquals(1, row.size());
    Object byteObject = row.get(0);
    assertTrue(byteObject instanceof byte[]);
    byte[] outBytes = (byte[]) byteObject;
    // Verify the raw object that's been created
    for (int i = 0; i < bytes.length; i++) {
        assertEquals(bytes[i], outBytes[i]);
    }
    // Now go the correct way, through objectinspectors
    StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
    List<Object> fieldsDataAsList = oi.getStructFieldsDataAsList(row);
    assertEquals(1, fieldsDataAsList.size());
    StructField fieldRef = oi.getStructFieldRef("hash");
    outBytes = (byte[]) oi.getStructFieldData(row, fieldRef);
    for (int i = 0; i < outBytes.length; i++) {
        assertEquals(bytes[i], outBytes[i]);
    }
}
Also used : Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) Test(org.junit.Test)

Example 4 with StandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.

the class TestAvroObjectInspectorGenerator method primitiveTypesWorkCorrectly.

@Test
public void primitiveTypesWorkCorrectly() throws SerDeException {
    final String bunchOfPrimitives = "{\n" + "  \"namespace\": \"testing\",\n" + "  \"name\": \"PrimitiveTypes\",\n" + "  \"type\": \"record\",\n" + "  \"fields\": [\n" + "    {\n" + "      \"name\":\"aString\",\n" + "      \"type\":\"string\"\n" + "    },\n" + "    {\n" + "      \"name\":\"anInt\",\n" + "      \"type\":\"int\"\n" + "    },\n" + "    {\n" + "      \"name\":\"aBoolean\",\n" + "      \"type\":\"boolean\"\n" + "    },\n" + "    {\n" + "      \"name\":\"aLong\",\n" + "      \"type\":\"long\"\n" + "    },\n" + "    {\n" + "      \"name\":\"aFloat\",\n" + "      \"type\":\"float\"\n" + "    },\n" + "    {\n" + "      \"name\":\"aDouble\",\n" + "      \"type\":\"double\"\n" + "    },\n" + "    {\n" + "      \"name\":\"aNull\",\n" + "      \"type\":\"null\"\n" + "    }\n" + "  ]\n" + "}";
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(AvroSerdeUtils.getSchemaFor(bunchOfPrimitives));
    String[] expectedColumnNames = { "aString", "anInt", "aBoolean", "aLong", "aFloat", "aDouble", "aNull" };
    verifyColumnNames(expectedColumnNames, aoig.getColumnNames());
    TypeInfo[] expectedColumnTypes = { STRING, INT, BOOLEAN, LONG, FLOAT, DOUBLE, VOID };
    verifyColumnTypes(expectedColumnTypes, aoig.getColumnTypes());
    // Rip apart the object inspector, making sure we got what we expect.
    final ObjectInspector oi = aoig.getObjectInspector();
    assertTrue(oi instanceof StandardStructObjectInspector);
    final StandardStructObjectInspector ssoi = (StandardStructObjectInspector) oi;
    List<? extends StructField> structFields = ssoi.getAllStructFieldRefs();
    assertEquals(expectedColumnNames.length, structFields.size());
    for (int i = 0; i < expectedColumnNames.length; i++) {
        assertEquals("Column names don't match", expectedColumnNames[i].toLowerCase(), structFields.get(i).getFieldName());
        assertEquals("Column types don't match", expectedColumnTypes[i].getTypeName(), structFields.get(i).getFieldObjectInspector().getTypeName());
    }
}
Also used : StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Test(org.junit.Test)

Example 5 with StandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.

the class TestAvroDeserializer method unionTester.

private ResultPair unionTester(Schema ws, Schema rs, GenericData.Record record) throws SerDeException, IOException {
    assertTrue(GENERIC_DATA.validate(ws, record));
    AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(rs);
    AvroDeserializer de = new AvroDeserializer();
    ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, rs);
    assertEquals(1, row.size());
    StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
    assertEquals(1, fieldRefs.size());
    StructField fieldRef = fieldRefs.get(0);
    assertEquals("aunion", fieldRef.getFieldName());
    Object theUnion = oi.getStructFieldData(row, fieldRef);
    assertTrue(fieldRef.getFieldObjectInspector() instanceof UnionObjectInspector);
    UnionObjectInspector fieldObjectInspector = (UnionObjectInspector) fieldRef.getFieldObjectInspector();
    Object value = fieldObjectInspector.getField(theUnion);
    return new ResultPair(fieldObjectInspector, value, theUnion);
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector)

Aggregations

ArrayList (java.util.ArrayList)21 StandardStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)20 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)16 Test (org.junit.Test)13 GenericData (org.apache.avro.generic.GenericData)12 Schema (org.apache.avro.Schema)11 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)11 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)6 JavaStringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector)6 Map (java.util.Map)5 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)5 HashMap (java.util.HashMap)4 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)4 VoidObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector)4 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)4 UnionObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector)3 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)3 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)3 Hashtable (java.util.Hashtable)2 List (java.util.List)2