Search in sources :

Example 11 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project presto by prestodb.

the class HiveUtil method getTableObjectInspector.

public static StructObjectInspector getTableObjectInspector(@SuppressWarnings("deprecation") Deserializer deserializer) {
    try {
        ObjectInspector inspector = deserializer.getObjectInspector();
        checkArgument(inspector.getCategory() == Category.STRUCT, "expected STRUCT: %s", inspector.getCategory());
        return (StructObjectInspector) inspector;
    } catch (SerDeException e) {
        throw Throwables.propagate(e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 12 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project presto by prestodb.

the class HiveUtil method getDeserializer.

@SuppressWarnings("deprecation")
public static Deserializer getDeserializer(Properties schema) {
    String name = getDeserializerClassName(schema);
    Deserializer deserializer = createDeserializer(getDeserializerClass(name));
    initializeDeserializer(deserializer, schema);
    return deserializer;
}
Also used : Deserializer(org.apache.hadoop.hive.serde2.Deserializer)

Example 13 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project presto by prestodb.

the class RcFileTester method assertFileContentsOld.

private static <K extends LongWritable, V extends BytesRefArrayWritable> void assertFileContentsOld(Type type, TempFile tempFile, Format format, Iterable<?> expectedValues) throws Exception {
    JobConf configuration = new JobConf(new Configuration(false));
    configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
    configuration.setBoolean(READ_ALL_COLUMNS, false);
    Properties schema = new Properties();
    schema.setProperty(META_TABLE_COLUMNS, "test");
    schema.setProperty(META_TABLE_COLUMN_TYPES, getJavaObjectInspector(type).getTypeName());
    @SuppressWarnings("deprecation") Deserializer deserializer;
    if (format == Format.BINARY) {
        deserializer = new LazyBinaryColumnarSerDe();
    } else {
        deserializer = new ColumnarSerDe();
    }
    deserializer.initialize(configuration, schema);
    configuration.set(SERIALIZATION_LIB, deserializer.getClass().getName());
    InputFormat<K, V> inputFormat = new RCFileInputFormat<>();
    RecordReader<K, V> recordReader = inputFormat.getRecordReader(new FileSplit(new Path(tempFile.getFile().getAbsolutePath()), 0, tempFile.getFile().length(), (String[]) null), configuration, NULL);
    K key = recordReader.createKey();
    V value = recordReader.createValue();
    StructObjectInspector rowInspector = (StructObjectInspector) deserializer.getObjectInspector();
    StructField field = rowInspector.getStructFieldRef("test");
    Iterator<?> iterator = expectedValues.iterator();
    while (recordReader.next(key, value)) {
        Object expectedValue = iterator.next();
        Object rowData = deserializer.deserialize(value);
        Object actualValue = rowInspector.getStructFieldData(rowData, field);
        actualValue = decodeRecordReaderValue(type, actualValue);
        assertColumnValueEquals(type, actualValue, expectedValue);
    }
    assertFalse(iterator.hasNext());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) FileSplit(org.apache.hadoop.mapred.FileSplit) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) StructObject(org.apache.hadoop.hive.serde2.StructObject) JobConf(org.apache.hadoop.mapred.JobConf) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 14 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project metacat by Netflix.

the class HiveTableUtil method getTableStructFields.

/**
     * getTableStructFields.
     *
     * @param table table
     * @return all struct field refs
     */
public static List<? extends StructField> getTableStructFields(final Table table) {
    final Properties schema = MetaStoreUtils.getTableMetadata(table);
    final String name = schema.getProperty(serdeConstants.SERIALIZATION_LIB);
    if (name == null) {
        return Collections.emptyList();
    }
    final Deserializer deserializer = createDeserializer(getDeserializerClass(name));
    try {
        deserializer.initialize(new Configuration(false), schema);
    } catch (SerDeException e) {
        throw new RuntimeException("error initializing deserializer: " + deserializer.getClass().getName());
    }
    try {
        final ObjectInspector inspector = deserializer.getObjectInspector();
        Preconditions.checkArgument(inspector.getCategory() == ObjectInspector.Category.STRUCT, "expected STRUCT: %s", inspector.getCategory());
        return ((StructObjectInspector) inspector).getAllStructFieldRefs();
    } catch (SerDeException e) {
        throw Throwables.propagate(e);
    }
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) Properties(java.util.Properties) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 15 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class AvroLazyObjectInspector method deserializeStruct.

/**
   * Deserialize the given struct object
   *
   * @param struct the object to deserialize
   * @param fieldName name of the field on which we are currently operating on
   * @return a deserialized object can hive can further operate on
   * @throws AvroObjectInspectorException if something goes wrong during deserialization
   * */
private Object deserializeStruct(Object struct, String fieldName) {
    byte[] data = ((LazyStruct) struct).getBytes();
    AvroDeserializer deserializer = new AvroDeserializer();
    if (data == null || data.length == 0) {
        return null;
    }
    if (readerSchema == null && schemaRetriever == null) {
        throw new IllegalArgumentException("reader schema or schemaRetriever must be set for field [" + fieldName + "]");
    }
    Schema ws = null;
    Schema rs = null;
    int offset = 0;
    AvroGenericRecordWritable avroWritable = new AvroGenericRecordWritable();
    if (readerSchema == null) {
        offset = schemaRetriever.getOffset();
        if (data.length < offset) {
            throw new IllegalArgumentException("Data size cannot be less than [" + offset + "]. Found [" + data.length + "]");
        }
        rs = schemaRetriever.retrieveReaderSchema(data);
        if (rs == null) {
            // still nothing, Raise exception
            throw new IllegalStateException("A valid reader schema could not be retrieved either directly or from the schema retriever for field [" + fieldName + "]");
        }
        ws = schemaRetriever.retrieveWriterSchema(data);
        if (ws == null) {
            throw new IllegalStateException("Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]");
        }
        // adjust the data bytes according to any possible offset that was provided
        if (LOG.isDebugEnabled()) {
            LOG.debug("Retrieved writer Schema: " + ws.toString());
            LOG.debug("Retrieved reader Schema: " + rs.toString());
        }
        try {
            avroWritable.readFields(data, offset, data.length, ws, rs);
        } catch (IOException ioe) {
            throw new AvroObjectInspectorException("Error deserializing avro payload", ioe);
        }
    } else {
        // a reader schema was provided
        if (schemaRetriever != null) {
            // a schema retriever has been provided as well. Attempt to read the write schema from the
            // retriever
            ws = schemaRetriever.retrieveWriterSchema(data);
            if (ws == null) {
                throw new IllegalStateException("Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]");
            }
        } else {
            // attempt retrieving the schema from the data
            ws = retrieveSchemaFromBytes(data);
        }
        rs = readerSchema;
        try {
            avroWritable.readFields(data, ws, rs);
        } catch (IOException ioe) {
            throw new AvroObjectInspectorException("Error deserializing avro payload", ioe);
        }
    }
    AvroObjectInspectorGenerator oiGenerator = null;
    Object deserializedObject = null;
    try {
        oiGenerator = new AvroObjectInspectorGenerator(rs);
        deserializedObject = deserializer.deserialize(oiGenerator.getColumnNames(), oiGenerator.getColumnTypes(), avroWritable, rs);
    } catch (SerDeException se) {
        throw new AvroObjectInspectorException("Error deserializing avro payload", se);
    }
    return deserializedObject;
}
Also used : Schema(org.apache.avro.Schema) IOException(java.io.IOException) LazyObject(org.apache.hadoop.hive.serde2.lazy.LazyObject) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

Deserializer (org.apache.hadoop.hive.serde2.Deserializer)19 ArrayList (java.util.ArrayList)14 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)14 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)13 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)10 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)10 IOException (java.io.IOException)7 Properties (java.util.Properties)7 Path (org.apache.hadoop.fs.Path)6 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)6 Configuration (org.apache.hadoop.conf.Configuration)5 HashMap (java.util.HashMap)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)4 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)4 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)4 List (java.util.List)3 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)3 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)3 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)3