Search in sources :

Example 41 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class OpProcFactory method pushFilterToStorageHandler.

/**
 * Attempts to push a predicate down into a storage handler.  For
 * native tables, this is a no-op.
 *
 * @param tableScanOp table scan against which predicate applies
 *
 * @param originalPredicate predicate to be pushed down
 *
 * @param owi object walk info
 *
 * @param hiveConf Hive configuration
 *
 * @return portion of predicate which needs to be evaluated
 * by Hive as a post-filter, or null if it was possible
 * to push down the entire predicate
 */
private static ExprNodeGenericFuncDesc pushFilterToStorageHandler(TableScanOperator tableScanOp, ExprNodeGenericFuncDesc originalPredicate, OpWalkerInfo owi, HiveConf hiveConf) throws SemanticException {
    TableScanDesc tableScanDesc = tableScanOp.getConf();
    Table tbl = tableScanDesc.getTableMetadata();
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER)) {
        // attach the original predicate to the table scan operator for index
        // optimizations that require the pushed predicate before pcr & later
        // optimizations are applied
        tableScanDesc.setFilterExpr(originalPredicate);
    }
    if (!tbl.isNonNative()) {
        return originalPredicate;
    }
    HiveStorageHandler storageHandler = tbl.getStorageHandler();
    if (!(storageHandler instanceof HiveStoragePredicateHandler)) {
        // The storage handler does not provide predicate decomposition
        // support, so we'll implement the entire filter in Hive.  However,
        // we still provide the full predicate to the storage handler in
        // case it wants to do any of its own prefiltering.
        tableScanDesc.setFilterExpr(originalPredicate);
        return originalPredicate;
    }
    HiveStoragePredicateHandler predicateHandler = (HiveStoragePredicateHandler) storageHandler;
    JobConf jobConf = new JobConf(owi.getParseContext().getConf());
    Utilities.setColumnNameList(jobConf, tableScanOp);
    Utilities.setColumnTypeList(jobConf, tableScanOp);
    try {
        Utilities.copyTableJobPropertiesToConf(Utilities.getTableDesc(tbl), jobConf);
    } catch (Exception e) {
        throw new SemanticException(e);
    }
    Deserializer deserializer = tbl.getDeserializer();
    HiveStoragePredicateHandler.DecomposedPredicate decomposed = predicateHandler.decomposePredicate(jobConf, deserializer, originalPredicate);
    if (decomposed == null) {
        // not able to push anything down
        if (LOG.isDebugEnabled()) {
            LOG.debug("No pushdown possible for predicate:  " + originalPredicate.getExprString());
        }
        return originalPredicate;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Original predicate:  " + originalPredicate.getExprString());
        if (decomposed.pushedPredicate != null) {
            LOG.debug("Pushed predicate:  " + decomposed.pushedPredicate.getExprString());
        }
        if (decomposed.residualPredicate != null) {
            LOG.debug("Residual predicate:  " + decomposed.residualPredicate.getExprString());
        }
    }
    tableScanDesc.setFilterExpr(decomposed.pushedPredicate);
    tableScanDesc.setFilterObject(decomposed.pushedPredicateObject);
    return decomposed.residualPredicate;
}
Also used : HiveStoragePredicateHandler(org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) Table(org.apache.hadoop.hive.ql.metadata.Table) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) JobConf(org.apache.hadoop.mapred.JobConf) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 42 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class AvroLazyObjectInspector method deserializeStruct.

/**
 * Deserialize the given struct object
 *
 * @param struct the object to deserialize
 * @param fieldName name of the field on which we are currently operating on
 * @return a deserialized object can hive can further operate on
 * @throws AvroObjectInspectorException if something goes wrong during deserialization
 */
private Object deserializeStruct(Object struct, String fieldName) {
    byte[] data = ((LazyStruct) struct).getBytes();
    AvroDeserializer deserializer = new AvroDeserializer();
    if (data == null || data.length == 0) {
        return null;
    }
    if (readerSchema == null && schemaRetriever == null) {
        throw new IllegalArgumentException("reader schema or schemaRetriever must be set for field [" + fieldName + "]");
    }
    Schema ws = null;
    Schema rs = null;
    int offset = 0;
    AvroGenericRecordWritable avroWritable = new AvroGenericRecordWritable();
    if (readerSchema == null) {
        offset = schemaRetriever.getOffset();
        if (data.length < offset) {
            throw new IllegalArgumentException("Data size cannot be less than [" + offset + "]. Found [" + data.length + "]");
        }
        rs = schemaRetriever.retrieveReaderSchema(data);
        if (rs == null) {
            // still nothing, Raise exception
            throw new IllegalStateException("A valid reader schema could not be retrieved either directly or from the schema retriever for field [" + fieldName + "]");
        }
        ws = schemaRetriever.retrieveWriterSchema(data);
        if (ws == null) {
            throw new IllegalStateException("Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]");
        }
        // adjust the data bytes according to any possible offset that was provided
        if (LOG.isDebugEnabled()) {
            LOG.debug("Retrieved writer Schema: " + ws.toString());
            LOG.debug("Retrieved reader Schema: " + rs.toString());
        }
        try {
            avroWritable.readFields(data, offset, data.length, ws, rs);
        } catch (IOException ioe) {
            throw new AvroObjectInspectorException("Error deserializing avro payload", ioe);
        }
    } else {
        // a reader schema was provided
        if (schemaRetriever != null) {
            // a schema retriever has been provided as well. Attempt to read the write schema from the
            // retriever
            ws = schemaRetriever.retrieveWriterSchema(data);
            if (ws == null) {
                throw new IllegalStateException("Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]");
            }
        } else {
            // attempt retrieving the schema from the data
            ws = retrieveSchemaFromBytes(data);
        }
        rs = readerSchema;
        try {
            avroWritable.readFields(data, ws, rs);
        } catch (IOException ioe) {
            throw new AvroObjectInspectorException("Error deserializing avro payload", ioe);
        }
    }
    AvroObjectInspectorGenerator oiGenerator = null;
    Object deserializedObject = null;
    try {
        oiGenerator = new AvroObjectInspectorGenerator(rs);
        deserializedObject = deserializer.deserialize(oiGenerator.getColumnNames(), oiGenerator.getColumnTypes(), avroWritable, rs);
    } catch (SerDeException se) {
        throw new AvroObjectInspectorException("Error deserializing avro payload", se);
    }
    return deserializedObject;
}
Also used : Schema(org.apache.avro.Schema) IOException(java.io.IOException) LazyObject(org.apache.hadoop.hive.serde2.lazy.LazyObject) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

Deserializer (org.apache.hadoop.hive.serde2.Deserializer)21 ArrayList (java.util.ArrayList)17 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)15 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)15 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)11 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)10 IOException (java.io.IOException)9 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 Path (org.apache.hadoop.fs.Path)7 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)6 HashMap (java.util.HashMap)5 Properties (java.util.Properties)5 Configuration (org.apache.hadoop.conf.Configuration)5 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)5 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)5 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)5 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)5 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)5 CheckConstraint (org.apache.hadoop.hive.ql.metadata.CheckConstraint)5 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)5