use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class OpProcFactory method pushFilterToStorageHandler.
/**
* Attempts to push a predicate down into a storage handler. For
* native tables, this is a no-op.
*
* @param tableScanOp table scan against which predicate applies
*
* @param originalPredicate predicate to be pushed down
*
* @param owi object walk info
*
* @param hiveConf Hive configuration
*
* @return portion of predicate which needs to be evaluated
* by Hive as a post-filter, or null if it was possible
* to push down the entire predicate
*/
private static ExprNodeGenericFuncDesc pushFilterToStorageHandler(TableScanOperator tableScanOp, ExprNodeGenericFuncDesc originalPredicate, OpWalkerInfo owi, HiveConf hiveConf) throws SemanticException {
TableScanDesc tableScanDesc = tableScanOp.getConf();
Table tbl = tableScanDesc.getTableMetadata();
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER)) {
// attach the original predicate to the table scan operator for index
// optimizations that require the pushed predicate before pcr & later
// optimizations are applied
tableScanDesc.setFilterExpr(originalPredicate);
}
if (!tbl.isNonNative()) {
return originalPredicate;
}
HiveStorageHandler storageHandler = tbl.getStorageHandler();
if (!(storageHandler instanceof HiveStoragePredicateHandler)) {
// The storage handler does not provide predicate decomposition
// support, so we'll implement the entire filter in Hive. However,
// we still provide the full predicate to the storage handler in
// case it wants to do any of its own prefiltering.
tableScanDesc.setFilterExpr(originalPredicate);
return originalPredicate;
}
HiveStoragePredicateHandler predicateHandler = (HiveStoragePredicateHandler) storageHandler;
JobConf jobConf = new JobConf(owi.getParseContext().getConf());
Utilities.setColumnNameList(jobConf, tableScanOp);
Utilities.setColumnTypeList(jobConf, tableScanOp);
try {
Utilities.copyTableJobPropertiesToConf(Utilities.getTableDesc(tbl), jobConf);
} catch (Exception e) {
throw new SemanticException(e);
}
Deserializer deserializer = tbl.getDeserializer();
HiveStoragePredicateHandler.DecomposedPredicate decomposed = predicateHandler.decomposePredicate(jobConf, deserializer, originalPredicate);
if (decomposed == null) {
// not able to push anything down
if (LOG.isDebugEnabled()) {
LOG.debug("No pushdown possible for predicate: " + originalPredicate.getExprString());
}
return originalPredicate;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Original predicate: " + originalPredicate.getExprString());
if (decomposed.pushedPredicate != null) {
LOG.debug("Pushed predicate: " + decomposed.pushedPredicate.getExprString());
}
if (decomposed.residualPredicate != null) {
LOG.debug("Residual predicate: " + decomposed.residualPredicate.getExprString());
}
}
tableScanDesc.setFilterExpr(decomposed.pushedPredicate);
tableScanDesc.setFilterObject(decomposed.pushedPredicateObject);
return decomposed.residualPredicate;
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class AvroLazyObjectInspector method deserializeStruct.
/**
* Deserialize the given struct object
*
* @param struct the object to deserialize
* @param fieldName name of the field on which we are currently operating on
* @return a deserialized object can hive can further operate on
* @throws AvroObjectInspectorException if something goes wrong during deserialization
*/
private Object deserializeStruct(Object struct, String fieldName) {
byte[] data = ((LazyStruct) struct).getBytes();
AvroDeserializer deserializer = new AvroDeserializer();
if (data == null || data.length == 0) {
return null;
}
if (readerSchema == null && schemaRetriever == null) {
throw new IllegalArgumentException("reader schema or schemaRetriever must be set for field [" + fieldName + "]");
}
Schema ws = null;
Schema rs = null;
int offset = 0;
AvroGenericRecordWritable avroWritable = new AvroGenericRecordWritable();
if (readerSchema == null) {
offset = schemaRetriever.getOffset();
if (data.length < offset) {
throw new IllegalArgumentException("Data size cannot be less than [" + offset + "]. Found [" + data.length + "]");
}
rs = schemaRetriever.retrieveReaderSchema(data);
if (rs == null) {
// still nothing, Raise exception
throw new IllegalStateException("A valid reader schema could not be retrieved either directly or from the schema retriever for field [" + fieldName + "]");
}
ws = schemaRetriever.retrieveWriterSchema(data);
if (ws == null) {
throw new IllegalStateException("Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]");
}
// adjust the data bytes according to any possible offset that was provided
if (LOG.isDebugEnabled()) {
LOG.debug("Retrieved writer Schema: " + ws.toString());
LOG.debug("Retrieved reader Schema: " + rs.toString());
}
try {
avroWritable.readFields(data, offset, data.length, ws, rs);
} catch (IOException ioe) {
throw new AvroObjectInspectorException("Error deserializing avro payload", ioe);
}
} else {
// a reader schema was provided
if (schemaRetriever != null) {
// a schema retriever has been provided as well. Attempt to read the write schema from the
// retriever
ws = schemaRetriever.retrieveWriterSchema(data);
if (ws == null) {
throw new IllegalStateException("Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]");
}
} else {
// attempt retrieving the schema from the data
ws = retrieveSchemaFromBytes(data);
}
rs = readerSchema;
try {
avroWritable.readFields(data, ws, rs);
} catch (IOException ioe) {
throw new AvroObjectInspectorException("Error deserializing avro payload", ioe);
}
}
AvroObjectInspectorGenerator oiGenerator = null;
Object deserializedObject = null;
try {
oiGenerator = new AvroObjectInspectorGenerator(rs);
deserializedObject = deserializer.deserialize(oiGenerator.getColumnNames(), oiGenerator.getColumnTypes(), avroWritable, rs);
} catch (SerDeException se) {
throw new AvroObjectInspectorException("Error deserializing avro payload", se);
}
return deserializedObject;
}
Aggregations