Search in sources :

Example 46 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class TestHBaseSerDe method deserializeAndSerialize.

private void deserializeAndSerialize(HBaseSerDe serDe, Result r, Put p, Object[] expectedFieldsData) throws SerDeException {
    // Get the row structure
    StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
    assertEquals(9, fieldRefs.size());
    // Deserialize
    Object row = serDe.deserialize(new ResultWritable(r));
    for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
        if (fieldData != null) {
            fieldData = ((LazyPrimitive<?, ?>) fieldData).getWritableObject();
        }
        assertEquals("Field " + i, expectedFieldsData[i], fieldData);
    }
    // Serialize
    assertEquals(PutWritable.class, serDe.getSerializedClass());
    PutWritable serializedPut = (PutWritable) serDe.serialize(row, oi);
    assertEquals("Serialized data", p.toString(), String.valueOf(serializedPut.getPut()));
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 47 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class TestHBaseSerDe method deserializeAndSerializeHiveStructColumnFamily.

private void deserializeAndSerializeHiveStructColumnFamily(HBaseSerDe serDe, Result r, Put p, Object[] expectedFieldsData, int[] expectedMapSize, List<Object> expectedQualifiers, Object notPresentKey) throws SerDeException, IOException {
    StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
    Object row = serDe.deserialize(new ResultWritable(r));
    int k = 0;
    for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = soi.getStructFieldData(row, fieldRefs.get(i));
        assertNotNull(fieldData);
        if (fieldData instanceof LazyPrimitive<?, ?>) {
            assertEquals(expectedFieldsData[i], ((LazyPrimitive<?, ?>) fieldData).getWritableObject());
        } else if (fieldData instanceof LazyHBaseCellMap) {
            for (int j = 0; j < ((LazyHBaseCellMap) fieldData).getMapSize(); j++) {
                assertEquals(expectedFieldsData[k + 1], ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedQualifiers.get(k)).toString().trim());
                k++;
            }
            assertEquals(expectedMapSize[i - 1], ((LazyHBaseCellMap) fieldData).getMapSize());
            // Make sure that the unwanted key is not present in the map
            assertNull(((LazyHBaseCellMap) fieldData).getMapValueElement(notPresentKey));
        } else {
            fail("Error: field data not an instance of LazyPrimitive<?, ?> or LazyHBaseCellMap");
        }
    }
    SerDeUtils.getJSONString(row, soi);
    // Now serialize
    Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
    assertNotNull(put);
}
Also used : LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) Put(org.apache.hadoop.hbase.client.Put) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 48 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class TestStreaming method deserializeDeltaFileRow.

// Assumes stored data schema = [acid fields],string,int,string
// return array of 6 fields, where the last field has the actual data
private static Object[] deserializeDeltaFileRow(Object row, StructObjectInspector inspector) {
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    WritableIntObjectInspector f0ins = (WritableIntObjectInspector) fields.get(0).getFieldObjectInspector();
    WritableLongObjectInspector f1ins = (WritableLongObjectInspector) fields.get(1).getFieldObjectInspector();
    WritableIntObjectInspector f2ins = (WritableIntObjectInspector) fields.get(2).getFieldObjectInspector();
    WritableLongObjectInspector f3ins = (WritableLongObjectInspector) fields.get(3).getFieldObjectInspector();
    WritableLongObjectInspector f4ins = (WritableLongObjectInspector) fields.get(4).getFieldObjectInspector();
    StructObjectInspector f5ins = (StructObjectInspector) fields.get(5).getFieldObjectInspector();
    int f0 = f0ins.get(inspector.getStructFieldData(row, fields.get(0)));
    long f1 = f1ins.get(inspector.getStructFieldData(row, fields.get(1)));
    int f2 = f2ins.get(inspector.getStructFieldData(row, fields.get(2)));
    long f3 = f3ins.get(inspector.getStructFieldData(row, fields.get(3)));
    long f4 = f4ins.get(inspector.getStructFieldData(row, fields.get(4)));
    SampleRec f5 = deserializeInner(inspector.getStructFieldData(row, fields.get(5)), f5ins);
    return new Object[] { f0, f1, f2, f3, f4, f5 };
}
Also used : WritableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector) WritableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 49 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.

the class FetchOperator method createPartValue.

private Object[] createPartValue(PartitionDesc partDesc, StructObjectInspector partOI) {
    Map<String, String> partSpec = partDesc.getPartSpec();
    List<? extends StructField> fields = partOI.getAllStructFieldRefs();
    Object[] partValues = new Object[fields.size()];
    for (int i = 0; i < partValues.length; i++) {
        StructField field = fields.get(i);
        String value = partSpec.get(field.getFieldName());
        ObjectInspector oi = field.getFieldObjectInspector();
        partValues[i] = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi).convert(value);
    }
    return partValues;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject)

Example 50 with StructField

use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project drill by apache.

the class HiveAbstractReader method init.

private void init() throws ExecutionSetupException {
    final JobConf job = new JobConf(hiveConf);
    // Get the configured default val
    defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname);
    Properties tableProperties;
    try {
        tableProperties = HiveUtilities.getTableMetadata(table);
        final Properties partitionProperties = (partition == null) ? tableProperties : HiveUtilities.getPartitionMetadata(partition, table);
        HiveUtilities.addConfToJob(job, partitionProperties);
        final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(), tableProperties);
        final StructObjectInspector tableOI = getStructOI(tableSerDe);
        if (partition != null) {
            partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(), partitionProperties);
            partitionOI = getStructOI(partitionSerDe);
            finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI);
            partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI);
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table));
        } else {
            // For non-partitioned tables, there is no need to create converter as there are no schema changes expected.
            partitionSerDe = tableSerDe;
            partitionOI = tableOI;
            partTblObjectInspectorConverter = null;
            finalOI = tableOI;
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table));
        }
        if (logger.isTraceEnabled()) {
            for (StructField field : finalOI.getAllStructFieldRefs()) {
                logger.trace("field in finalOI: {}", field.getClass().getName());
            }
            logger.trace("partitionSerDe class is {} {}", partitionSerDe.getClass().getName());
        }
        // Get list of partition column names
        final List<String> partitionNames = Lists.newArrayList();
        for (FieldSchema field : table.getPartitionKeys()) {
            partitionNames.add(field.getName());
        }
        // We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore
        // may not contain the schema, instead it is derived from other sources such as table properties or external file.
        // SerDe object knows how to get the schema with all the config and table properties passed in initialization.
        // ObjectInspector created from the SerDe object has the schema.
        final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI);
        final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames();
        // Select list of columns for project pushdown into Hive SerDe readers.
        final List<Integer> columnIds = Lists.newArrayList();
        if (isStarQuery()) {
            selectedColumnNames = tableColumnNames;
            for (int i = 0; i < selectedColumnNames.size(); i++) {
                columnIds.add(i);
            }
            selectedPartitionNames = partitionNames;
        } else {
            selectedColumnNames = Lists.newArrayList();
            for (SchemaPath field : getColumns()) {
                String columnName = field.getRootSegment().getPath();
                if (partitionNames.contains(columnName)) {
                    selectedPartitionNames.add(columnName);
                } else {
                    columnIds.add(tableColumnNames.indexOf(columnName));
                    selectedColumnNames.add(columnName);
                }
            }
        }
        ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames);
        for (String columnName : selectedColumnNames) {
            StructField fieldRef = finalOI.getStructFieldRef(columnName);
            selectedStructFieldRefs.add(fieldRef);
            ObjectInspector fieldOI = fieldRef.getFieldObjectInspector();
            TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName());
            selectedColumnObjInspectors.add(fieldOI);
            selectedColumnTypes.add(typeInfo);
            selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext));
        }
        for (int i = 0; i < selectedColumnNames.size(); ++i) {
            logger.trace("inspector:typeName={}, className={}, TypeInfo: {}, converter:{}", selectedColumnObjInspectors.get(i).getTypeName(), selectedColumnObjInspectors.get(i).getClass().getName(), selectedColumnTypes.get(i).toString(), selectedColumnFieldConverters.get(i).getClass().getName());
        }
        for (int i = 0; i < table.getPartitionKeys().size(); i++) {
            FieldSchema field = table.getPartitionKeys().get(i);
            if (selectedPartitionNames.contains(field.getName())) {
                TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
                selectedPartitionTypes.add(pType);
                if (partition != null) {
                    selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType, partition.getValues().get(i), defaultPartitionValue));
                }
            }
        }
    } catch (Exception e) {
        throw new ExecutionSetupException("Failure while initializing Hive Reader " + this.getClass().getName(), e);
    }
    if (!empty) {
        try {
            reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat().getRecordReader(inputSplit, job, Reporter.NULL);
            logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(), inputSplit.toString());
        } catch (Exception e) {
            throw new ExecutionSetupException("Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
        }
        internalInit(tableProperties, reader);
    }
}
Also used : SerDe(org.apache.hadoop.hive.serde2.SerDe) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) Properties(java.util.Properties) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) ExecutionException(java.util.concurrent.ExecutionException) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) SchemaPath(org.apache.drill.common.expression.SchemaPath) JobConf(org.apache.hadoop.mapred.JobConf) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)147 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)107 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)93 ArrayList (java.util.ArrayList)75 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)56 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)46 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)42 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)33 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)29 StandardStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)28 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)26 List (java.util.List)25 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)24 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)24 Test (org.junit.Test)24 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)22 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)22 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)22 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)20 UnionObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector)19