use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class MapRecordProcessor method getKeyValueReader.
private KeyValueReader getKeyValueReader(Collection<KeyValueReader> keyValueReaders, AbstractMapOperator mapOp) throws Exception {
List<KeyValueReader> kvReaderList = new ArrayList<KeyValueReader>(keyValueReaders);
// this sets up the map operator contexts correctly
mapOp.initializeContexts();
Deserializer deserializer = mapOp.getCurrentDeserializer();
// deserializer is null in case of VectorMapOperator
KeyValueReader reader = new KeyValueInputMerger(kvReaderList, deserializer, new ObjectInspector[] { deserializer == null ? null : deserializer.getObjectInspector() }, mapOp.getConf().getSortCols());
return reader;
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class CreateTableLikeOperation method setTableParameters.
private void setTableParameters(Table tbl) throws HiveException {
Set<String> retainer = new HashSet<String>();
Class<? extends Deserializer> serdeClass;
try {
serdeClass = tbl.getDeserializerClass();
} catch (Exception e) {
throw new HiveException(e);
}
// We should copy only those table parameters that are specified in the config.
SerDeSpec spec = AnnotationUtils.getAnnotation(serdeClass, SerDeSpec.class);
// for non-native table, property storage_handler should be retained
retainer.add(META_TABLE_STORAGE);
if (spec != null && spec.schemaProps() != null) {
retainer.addAll(Arrays.asList(spec.schemaProps()));
}
String paramsStr = HiveConf.getVar(context.getConf(), HiveConf.ConfVars.DDL_CTL_PARAMETERS_WHITELIST);
if (paramsStr != null) {
retainer.addAll(Arrays.asList(paramsStr.split(",")));
}
Map<String, String> params = tbl.getParameters();
if (!retainer.isEmpty()) {
params.keySet().retainAll(retainer);
} else {
params.clear();
}
if (desc.getTblProps() != null) {
params.putAll(desc.getTblProps());
}
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class DynamicValueRegistryTez method init.
@Override
public void init(RegistryConf conf) throws Exception {
RegistryConfTez rct = (RegistryConfTez) conf;
for (String inputSourceName : rct.baseWork.getInputSourceToRuntimeValuesInfo().keySet()) {
LOG.info("Runtime value source: " + inputSourceName);
LogicalInput runtimeValueInput = rct.inputs.get(inputSourceName);
RuntimeValuesInfo runtimeValuesInfo = rct.baseWork.getInputSourceToRuntimeValuesInfo().get(inputSourceName);
// Setup deserializer/obj inspectors for the incoming data source
AbstractSerDe serDe = ReflectionUtils.newInstance(runtimeValuesInfo.getTableDesc().getSerDeClass(), null);
serDe.initialize(rct.conf, runtimeValuesInfo.getTableDesc().getProperties(), null);
ObjectInspector inspector = serDe.getObjectInspector();
// Set up col expressions for the dynamic values using this input
List<ExprNodeEvaluator> colExprEvaluators = new ArrayList<ExprNodeEvaluator>();
for (ExprNodeDesc expr : runtimeValuesInfo.getColExprs()) {
ExprNodeEvaluator exprEval = ExprNodeEvaluatorFactory.get(expr, null);
exprEval.initialize(inspector);
colExprEvaluators.add(exprEval);
}
runtimeValueInput.start();
List<Input> inputList = new ArrayList<Input>();
inputList.add(runtimeValueInput);
rct.processorContext.waitForAllInputsReady(inputList);
KeyValueReader kvReader = (KeyValueReader) runtimeValueInput.getReader();
long rowCount = 0;
while (kvReader.next()) {
Object row = serDe.deserialize((Writable) kvReader.getCurrentValue());
rowCount++;
for (int colIdx = 0; colIdx < colExprEvaluators.size(); ++colIdx) {
// Read each expression and save it to the value registry
ExprNodeEvaluator eval = colExprEvaluators.get(colIdx);
Object val = eval.evaluate(row);
setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), val);
}
}
// For now, expecting a single row (min/max, aggregated bloom filter), or no rows
if (rowCount == 0) {
LOG.debug("No input rows from " + inputSourceName + ", filling dynamic values with nulls");
for (int colIdx = 0; colIdx < colExprEvaluators.size(); ++colIdx) {
ExprNodeEvaluator eval = colExprEvaluators.get(colIdx);
setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), null);
}
} else if (rowCount > 1) {
throw new IllegalStateException("Expected 0 or 1 rows from " + inputSourceName + ", got " + rowCount);
}
}
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class SemanticAnalyzer method genConvertCol.
private List<ExprNodeDesc> genConvertCol(String dest, QB qb, TableDesc tableDesc, Operator input, List<Integer> posns, boolean convert) throws SemanticException {
StructObjectInspector oi = null;
try {
AbstractSerDe deserializer = tableDesc.getSerDeClass().newInstance();
deserializer.initialize(conf, tableDesc.getProperties(), null);
oi = (StructObjectInspector) deserializer.getObjectInspector();
} catch (Exception e) {
throw new SemanticException(e);
}
List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
List<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos();
// Check column type
int columnNumber = posns.size();
List<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber);
for (Integer posn : posns) {
ObjectInspector tableFieldOI = tableFields.get(posn).getFieldObjectInspector();
TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI);
TypeInfo rowFieldTypeInfo = rowFields.get(posn).getType();
ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(posn).getInternalName(), rowFields.get(posn).getTabAlias(), rowFields.get(posn).getIsVirtualCol());
if (convert && !tableFieldTypeInfo.equals(rowFieldTypeInfo)) {
// need to do some conversions here
if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
// cannot convert to complex types
column = null;
} else {
column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, (PrimitiveTypeInfo) tableFieldTypeInfo);
}
if (column == null) {
String reason = "Cannot convert column " + posn + " from " + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(), qb.getParseInfo().getDestForClause(dest), reason));
}
}
expressions.add(column);
}
return expressions;
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class SemanticAnalyzer method genConversionSelectOperator.
/**
* Generate the conversion SelectOperator that converts the columns into the
* types that are expected by the table_desc.
*/
private Operator genConversionSelectOperator(String dest, QB qb, Operator input, Deserializer deserializer, DynamicPartitionCtx dpCtx, List<FieldSchema> parts) throws SemanticException {
StructObjectInspector oi = null;
try {
oi = (StructObjectInspector) deserializer.getObjectInspector();
} catch (Exception e) {
throw new SemanticException(e);
}
// Check column number
List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING);
List<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos();
int inColumnCnt = rowFields.size();
int outColumnCnt = tableFields.size();
if (dynPart && dpCtx != null) {
outColumnCnt += dpCtx.getNumDPCols();
}
// The numbers of input columns and output columns should match for regular query
if (!updating(dest) && !deleting(dest) && inColumnCnt != outColumnCnt) {
String reason = "Table " + dest + " has " + outColumnCnt + " columns, but query has " + inColumnCnt + " columns.";
throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(), qb.getParseInfo().getDestForClause(dest), reason));
}
// Check column types
boolean converted = false;
int columnNumber = tableFields.size();
List<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber);
// does the conversion to String by itself.
if (!(deserializer instanceof MetadataTypedColumnsetSerDe) && !deleting(dest)) {
// offset by 1 so that we don't try to convert the ROW__ID
if (updating(dest)) {
expressions.add(new ExprNodeColumnDesc(rowFields.get(0).getType(), rowFields.get(0).getInternalName(), "", true));
}
// here only deals with non-partition columns. We deal with partition columns next
for (int i = 0; i < columnNumber; i++) {
int rowFieldsOffset = updating(dest) ? i + 1 : i;
ObjectInspector tableFieldOI = tableFields.get(i).getFieldObjectInspector();
TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI);
TypeInfo rowFieldTypeInfo = rowFields.get(rowFieldsOffset).getType();
ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(rowFieldsOffset).getInternalName(), "", false, rowFields.get(rowFieldsOffset).isSkewedCol());
// Thus, we still keep the conversion.
if (!tableFieldTypeInfo.equals(rowFieldTypeInfo)) {
// need to do some conversions here
converted = true;
if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
// cannot convert to complex types
column = null;
} else {
column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, (PrimitiveTypeInfo) tableFieldTypeInfo);
}
if (column == null) {
String reason = "Cannot convert column " + i + " from " + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(), qb.getParseInfo().getDestForClause(dest), reason));
}
}
expressions.add(column);
}
// deal with dynamic partition columns
if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) {
// rowFields contains non-partitioned columns (tableFields) followed by DP columns
int rowFieldsOffset = tableFields.size() + (updating(dest) ? 1 : 0);
for (int dpColIdx = 0; dpColIdx < rowFields.size() - rowFieldsOffset; ++dpColIdx) {
// create ExprNodeDesc
ColumnInfo inputColumn = rowFields.get(dpColIdx + rowFieldsOffset);
TypeInfo inputTypeInfo = inputColumn.getType();
ExprNodeDesc column = new ExprNodeColumnDesc(inputTypeInfo, inputColumn.getInternalName(), "", true);
// Cast input column to destination column type if necessary.
if (conf.getBoolVar(DYNAMICPARTITIONCONVERT)) {
if (parts != null && !parts.isEmpty()) {
String destPartitionName = dpCtx.getDPColNames().get(dpColIdx);
FieldSchema destPartitionFieldSchema = parts.stream().filter(dynamicPartition -> dynamicPartition.getName().equals(destPartitionName)).findFirst().orElse(null);
if (destPartitionFieldSchema == null) {
throw new IllegalStateException("Partition schema for dynamic partition " + destPartitionName + " not found in DynamicPartitionCtx.");
}
String partitionType = destPartitionFieldSchema.getType();
if (partitionType == null) {
throw new IllegalStateException("Couldn't get FieldSchema for partition" + destPartitionFieldSchema.getName());
}
PrimitiveTypeInfo partitionTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(partitionType);
if (!partitionTypeInfo.equals(inputTypeInfo)) {
column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, partitionTypeInfo);
converted = true;
}
} else {
LOG.warn("Partition schema for dynamic partition " + inputColumn.getAlias() + " (" + inputColumn.getInternalName() + ") not found in DynamicPartitionCtx. " + "This is expected with a CTAS.");
}
}
expressions.add(column);
}
}
}
if (converted) {
// add the select operator
RowResolver rowResolver = new RowResolver();
List<String> colNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < expressions.size(); i++) {
String name = getColumnInternalName(i);
rowResolver.put("", name, new ColumnInfo(name, expressions.get(i).getTypeInfo(), "", false));
colNames.add(name);
colExprMap.put(name, expressions.get(i));
}
input = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(expressions, colNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
input.setColumnExprMap(colExprMap);
}
return input;
}
Aggregations