Search in sources :

Example 6 with StandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.

the class TestAvroDeserializer method canDeserializeBytes.

@Test
public void canDeserializeBytes() throws SerDeException, IOException {
    Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.BYTES_SCHEMA);
    GenericData.Record record = new GenericData.Record(s);
    byte[] bytes = "ANANCIENTBLUEBOX".getBytes();
    ByteBuffer bb = ByteBuffer.wrap(bytes);
    bb.rewind();
    record.put("bytesField", bb);
    assertTrue(GENERIC_DATA.validate(s, record));
    AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    AvroDeserializer de = new AvroDeserializer();
    ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
    assertEquals(1, row.size());
    Object byteObject = row.get(0);
    assertTrue(byteObject instanceof byte[]);
    byte[] outBytes = (byte[]) byteObject;
    // Verify the raw object that's been created
    for (int i = 0; i < bytes.length; i++) {
        assertEquals(bytes[i], outBytes[i]);
    }
    // Now go the correct way, through objectinspectors
    StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
    List<Object> fieldsDataAsList = oi.getStructFieldsDataAsList(row);
    assertEquals(1, fieldsDataAsList.size());
    StructField fieldRef = oi.getStructFieldRef("bytesField");
    outBytes = (byte[]) oi.getStructFieldData(row, fieldRef);
    for (int i = 0; i < outBytes.length; i++) {
        assertEquals(bytes[i], outBytes[i]);
    }
}
Also used : Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) ByteBuffer(java.nio.ByteBuffer) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) Test(org.junit.Test)

Example 7 with StandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.

the class TestAvroDeserializer method canDeserializeArrays.

@Test
public void canDeserializeArrays() throws SerDeException, IOException {
    Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.ARRAY_WITH_PRIMITIVE_ELEMENT_TYPE);
    GenericData.Record record = new GenericData.Record(s);
    List<String> list = new ArrayList<String>();
    list.add("Eccleston");
    list.add("Tennant");
    list.add("Smith");
    record.put("anArray", list);
    assertTrue(GENERIC_DATA.validate(s, record));
    System.out.println("Array-backed record = " + record);
    AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    AvroDeserializer de = new AvroDeserializer();
    ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
    assertEquals(1, row.size());
    Object theArrayObject = row.get(0);
    assertTrue(theArrayObject instanceof List);
    List theList = (List) theArrayObject;
    // Verify the raw object that's been created
    assertEquals("Eccleston", theList.get(0));
    assertEquals("Tennant", theList.get(1));
    assertEquals("Smith", theList.get(2));
    // Now go the correct way, through objectinspectors
    StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
    StructField fieldRefToArray = oi.getStructFieldRef("anArray");
    Object anArrayData = oi.getStructFieldData(row, fieldRefToArray);
    StandardListObjectInspector anArrayOI = (StandardListObjectInspector) fieldRefToArray.getFieldObjectInspector();
    assertEquals(3, anArrayOI.getListLength(anArrayData));
    JavaStringObjectInspector elementOI = (JavaStringObjectInspector) anArrayOI.getListElementObjectInspector();
    Object firstElement = anArrayOI.getListElement(anArrayData, 0);
    assertEquals("Eccleston", elementOI.getPrimitiveJavaObject(firstElement));
    assertTrue(firstElement instanceof String);
    Object secondElement = anArrayOI.getListElement(anArrayData, 1);
    assertEquals("Tennant", elementOI.getPrimitiveJavaObject(secondElement));
    assertTrue(secondElement instanceof String);
    Object thirdElement = anArrayOI.getListElement(anArrayData, 2);
    assertEquals("Smith", elementOI.getPrimitiveJavaObject(thirdElement));
    assertTrue(thirdElement instanceof String);
}
Also used : JavaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList) List(java.util.List) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) Test(org.junit.Test)

Example 8 with StandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.

the class VectorMapOperator method internalSetChildren.

/*
   * Create information for vector map operator.
   * The member oneRootOperator has been set.
   */
private void internalSetChildren(Configuration hconf) throws Exception {
    // The setupPartitionContextVars uses the prior read type to flush the prior deserializerBatch,
    // so set it here to none.
    currentReadType = VectorMapOperatorReadType.NONE;
    batchContext = conf.getVectorizedRowBatchCtx();
    /*
     * Use a different batch for vectorized Input File Format readers so they can do their work
     * overlapped with work of the row collection that vector/row deserialization does.  This allows
     * the partitions to mix modes (e.g. for us to flush the previously batched rows on file change).
     */
    vectorizedInputFileFormatBatch = batchContext.createVectorizedRowBatch();
    conf.setVectorizedRowBatch(vectorizedInputFileFormatBatch);
    /*
     * This batch is used by vector/row deserializer readers.
     */
    deserializerBatch = batchContext.createVectorizedRowBatch();
    batchCounter = 0;
    dataColumnCount = batchContext.getDataColumnCount();
    partitionColumnCount = batchContext.getPartitionColumnCount();
    partitionValues = new Object[partitionColumnCount];
    dataColumnNums = batchContext.getDataColumnNums();
    Preconditions.checkState(dataColumnNums != null);
    // Form a truncated boolean include array for our vector/row deserializers.
    determineDataColumnsToIncludeTruncated();
    /*
     * Create table related objects
     */
    final String[] rowColumnNames = batchContext.getRowColumnNames();
    final TypeInfo[] rowColumnTypeInfos = batchContext.getRowColumnTypeInfos();
    tableStructTypeInfo = TypeInfoFactory.getStructTypeInfo(Arrays.asList(rowColumnNames), Arrays.asList(rowColumnTypeInfos));
    tableStandardStructObjectInspector = (StandardStructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(tableStructTypeInfo);
    tableRowTypeInfos = batchContext.getRowColumnTypeInfos();
    /*
     * NOTE: We do not alter the projectedColumns / projectionSize of the batches to just be
     * the included columns (+ partition columns).
     *
     * For now, we need to model the object inspector rows because there are still several
     * vectorized operators that use them.
     *
     * We need to continue to model the Object[] as having null objects for not included columns
     * until the following has been fixed:
     *    o When we have to output a STRUCT for AVG we switch to row GroupBy operators.
     *    o Some variations of VectorMapOperator, VectorReduceSinkOperator, VectorFileSinkOperator
     *      use the row super class to process rows.
     */
    /*
     * The Vectorizer class enforces that there is only one TableScanOperator, so
     * we don't need the more complicated multiple root operator mapping that MapOperator has.
     */
    fileToPartitionContextMap = new HashMap<String, VectorPartitionContext>();
    // Temporary map so we only create one partition context entry.
    HashMap<PartitionDesc, VectorPartitionContext> partitionContextMap = new HashMap<PartitionDesc, VectorPartitionContext>();
    for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
        Path path = entry.getKey();
        PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path);
        VectorPartitionContext vectorPartitionContext;
        if (!partitionContextMap.containsKey(partDesc)) {
            vectorPartitionContext = createAndInitPartitionContext(partDesc, hconf);
            partitionContextMap.put(partDesc, vectorPartitionContext);
        } else {
            vectorPartitionContext = partitionContextMap.get(partDesc);
        }
        fileToPartitionContextMap.put(path.toString(), vectorPartitionContext);
    }
    // Create list of one.
    List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
    children.add(oneRootOperator);
    setChildOperators(children);
}
Also used : Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) HashMap(java.util.HashMap) Map(java.util.Map) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 9 with StandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.

the class Utilities method constructVectorizedReduceRowOI.

/**
   * Create row key and value object inspectors for reduce vectorization.
   * The row object inspector used by ReduceWork needs to be a **standard**
   * struct object inspector, not just any struct object inspector.
   * @param keyInspector
   * @param valueInspector
   * @return OI
   * @throws HiveException
   */
public static StandardStructObjectInspector constructVectorizedReduceRowOI(StructObjectInspector keyInspector, StructObjectInspector valueInspector) throws HiveException {
    ArrayList<String> colNames = new ArrayList<String>();
    ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
    List<? extends StructField> fields = keyInspector.getAllStructFieldRefs();
    for (StructField field : fields) {
        colNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName());
        ois.add(field.getFieldObjectInspector());
    }
    fields = valueInspector.getAllStructFieldRefs();
    for (StructField field : fields) {
        colNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName());
        ois.add(field.getFieldObjectInspector());
    }
    StandardStructObjectInspector rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois);
    return rowObjectInspector;
}
Also used : StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)

Example 10 with StandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project haivvreo by jghoman.

the class TestAvroObjectInspectorGenerator method primitiveTypesWorkCorrectly.

@Test
public void primitiveTypesWorkCorrectly() throws SerDeException {
    final String bunchOfPrimitives = "{\n" + "  \"namespace\": \"testing\",\n" + "  \"name\": \"PrimitiveTypes\",\n" + "  \"type\": \"record\",\n" + "  \"fields\": [\n" + "    {\n" + "      \"name\":\"aString\",\n" + "      \"type\":\"string\"\n" + "    },\n" + "    {\n" + "      \"name\":\"anInt\",\n" + "      \"type\":\"int\"\n" + "    },\n" + "    {\n" + "      \"name\":\"aBoolean\",\n" + "      \"type\":\"boolean\"\n" + "    },\n" + "    {\n" + "      \"name\":\"aLong\",\n" + "      \"type\":\"long\"\n" + "    },\n" + "    {\n" + "      \"name\":\"aFloat\",\n" + "      \"type\":\"float\"\n" + "    },\n" + "    {\n" + "      \"name\":\"aDouble\",\n" + "      \"type\":\"double\"\n" + "    },\n" + "    {\n" + "      \"name\":\"aNull\",\n" + "      \"type\":\"null\"\n" + "    }\n" + "  ]\n" + "}";
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(Schema.parse(bunchOfPrimitives));
    String[] expectedColumnNames = { "aString", "anInt", "aBoolean", "aLong", "aFloat", "aDouble", "aNull" };
    verifyColumnNames(expectedColumnNames, aoig.getColumnNames());
    TypeInfo[] expectedColumnTypes = { STRING, INT, BOOLEAN, LONG, FLOAT, DOUBLE, VOID };
    verifyColumnTypes(expectedColumnTypes, aoig.getColumnTypes());
    // Rip apart the object inspector, making sure we got what we expect.
    final ObjectInspector oi = aoig.getObjectInspector();
    assertTrue(oi instanceof StandardStructObjectInspector);
    final StandardStructObjectInspector ssoi = (StandardStructObjectInspector) oi;
    List<? extends StructField> structFields = ssoi.getAllStructFieldRefs();
    assertEquals(expectedColumnNames.length, structFields.size());
    for (int i = 0; i < expectedColumnNames.length; i++) {
        assertEquals("Column names don't match", expectedColumnNames[i].toLowerCase(), structFields.get(i).getFieldName());
        assertEquals("Column types don't match", expectedColumnTypes[i].getTypeName(), structFields.get(i).getFieldObjectInspector().getTypeName());
    }
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) AvroObjectInspectorGenerator(com.linkedin.haivvreo.AvroObjectInspectorGenerator) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)21 StandardStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)20 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)16 Test (org.junit.Test)13 GenericData (org.apache.avro.generic.GenericData)12 Schema (org.apache.avro.Schema)11 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)11 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)6 JavaStringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector)6 Map (java.util.Map)5 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)5 HashMap (java.util.HashMap)4 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)4 VoidObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector)4 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)4 UnionObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector)3 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)3 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)3 Hashtable (java.util.Hashtable)2 List (java.util.List)2