use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class SemanticAnalyzer method genTablePlan.
@SuppressWarnings("nls")
private Operator genTablePlan(String alias, QB qb) throws SemanticException {
String alias_id = getAliasId(alias, qb);
Table tab = qb.getMetaData().getSrcForAlias(alias);
RowResolver rwsch;
// is the table already present
TableScanOperator top = topOps.get(alias_id);
// Obtain table props in query
Map<String, String> properties = qb.getTabPropsForAlias(alias);
if (top == null) {
// Determine row schema for TSOP.
// Include column names from SerDe, the partition and virtual columns.
rwsch = new RowResolver();
try {
// Including parameters passed in the query
if (properties != null) {
for (Entry<String, String> prop : properties.entrySet()) {
if (tab.getSerdeParam(prop.getKey()) != null) {
LOG.warn("SerDe property in input query overrides stored SerDe property");
}
tab.setSerdeParam(prop.getKey(), prop.getValue());
}
}
// Obtain inspector for schema
StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer().getObjectInspector();
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
/**
* if the column is a skewed column, use ColumnInfo accordingly
*/
ColumnInfo colInfo = new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), alias, false);
colInfo.setSkewedCol((isSkewedCol(alias, qb, fields.get(i).getFieldName())) ? true : false);
rwsch.put(alias, fields.get(i).getFieldName(), colInfo);
}
} catch (SerDeException e) {
throw new RuntimeException(e);
}
// Finally add the partitioning columns
for (FieldSchema part_col : tab.getPartCols()) {
LOG.trace("Adding partition col: " + part_col);
rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true));
}
// put all virtual columns in RowResolver.
Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator();
// use a list for easy cumtomize
List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
while (vcs.hasNext()) {
VirtualColumn vc = vcs.next();
rwsch.put(alias, vc.getName().toLowerCase(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden()));
vcList.add(vc);
}
// Create the root of the operator tree
TableScanDesc tsDesc = new TableScanDesc(alias, vcList, tab);
setupStats(tsDesc, qb.getParseInfo(), tab, alias, rwsch);
SplitSample sample = nameToSplitSample.get(alias_id);
if (sample != null && sample.getRowCount() != null) {
tsDesc.setRowLimit(sample.getRowCount());
nameToSplitSample.remove(alias_id);
}
top = (TableScanOperator) putOpInsertMap(OperatorFactory.get(getOpContext(), tsDesc, new RowSchema(rwsch.getColumnInfos())), rwsch);
// Set insiderView so that we can skip the column authorization for this.
top.setInsideView(qb.isInsideView() || qb.getAliasInsideView().contains(alias.toLowerCase()));
// Add this to the list of top operators - we always start from a table
// scan
topOps.put(alias_id, top);
// Add a mapping from the table scan operator to Table
topToTable.put(top, tab);
if (properties != null) {
topToTableProps.put(top, properties);
tsDesc.setOpProps(properties);
}
} else {
rwsch = opParseCtx.get(top).getRowResolver();
top.setChildOperators(null);
}
// check if this table is sampled and needs more than input pruning
Operator<? extends OperatorDesc> op = top;
TableSample ts = qb.getParseInfo().getTabSample(alias);
if (ts != null) {
TableScanOperator tableScanOp = top;
tableScanOp.getConf().setTableSample(ts);
int num = ts.getNumerator();
int den = ts.getDenominator();
ArrayList<ASTNode> sampleExprs = ts.getExprs();
// TODO: Do the type checking of the expressions
List<String> tabBucketCols = tab.getBucketCols();
int numBuckets = tab.getNumBuckets();
// If there are no sample cols and no bucket cols then throw an error
if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) {
throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " " + tab.getTableName());
}
if (num > den) {
throw new SemanticException(ErrorMsg.BUCKETED_NUMERATOR_BIGGER_DENOMINATOR.getMsg() + " " + tab.getTableName());
}
// check if a predicate is needed
// predicate is needed if either input pruning is not enough
// or if input pruning is not possible
// check if the sample columns are the same as the table bucket columns
boolean colsEqual = true;
if ((sampleExprs.size() != tabBucketCols.size()) && (sampleExprs.size() != 0)) {
colsEqual = false;
}
for (int i = 0; i < sampleExprs.size() && colsEqual; i++) {
boolean colFound = false;
for (int j = 0; j < tabBucketCols.size() && !colFound; j++) {
if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) {
break;
}
if (((ASTNode) sampleExprs.get(i).getChild(0)).getText().equalsIgnoreCase(tabBucketCols.get(j))) {
colFound = true;
}
}
colsEqual = (colsEqual && colFound);
}
// Check if input can be pruned
ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual));
// check if input pruning is enough
if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual) && (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) {
// input pruning is enough; add the filter for the optimizer to use it
// later
LOG.info("No need for sample filter");
ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null);
FilterDesc filterDesc = new FilterDesc(samplePredicate, true, new SampleDesc(ts.getNumerator(), ts.getDenominator(), tabBucketCols, true));
filterDesc.setGenerated(true);
op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
} else {
// need to add filter
// create tableOp to be filterDesc and set as child to 'top'
LOG.info("Need sample filter");
ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null);
FilterDesc filterDesc = new FilterDesc(samplePredicate, true);
filterDesc.setGenerated(true);
op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
}
} else {
boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
if (testMode) {
String tabName = tab.getTableName();
// has the user explicitly asked not to sample this table
String unSampleTblList = conf.getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE);
String[] unSampleTbls = unSampleTblList.split(",");
boolean unsample = false;
for (String unSampleTbl : unSampleTbls) {
if (tabName.equalsIgnoreCase(unSampleTbl)) {
unsample = true;
}
}
if (!unsample) {
int numBuckets = tab.getNumBuckets();
// If the input table is bucketed, choose the first bucket
if (numBuckets > 0) {
TableSample tsSample = new TableSample(1, numBuckets);
tsSample.setInputPruning(true);
qb.getParseInfo().setTabSample(alias, tsSample);
ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab.getBucketCols(), true, alias, rwsch, qb.getMetaData(), null);
FilterDesc filterDesc = new FilterDesc(samplePred, true, new SampleDesc(tsSample.getNumerator(), tsSample.getDenominator(), tab.getBucketCols(), true));
filterDesc.setGenerated(true);
op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
LOG.info("No need for sample filter");
} else {
// The table is not bucketed, add a dummy filter :: rand()
int freq = conf.getIntVar(HiveConf.ConfVars.HIVETESTMODESAMPLEFREQ);
TableSample tsSample = new TableSample(1, freq);
tsSample.setInputPruning(false);
qb.getParseInfo().setTabSample(alias, tsSample);
LOG.info("Need sample filter");
ExprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand", new ExprNodeConstantDesc(Integer.valueOf(460476415)));
ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, alias, rwsch, qb.getMetaData(), randFunc);
FilterDesc filterDesc = new FilterDesc(samplePred, true);
filterDesc.setGenerated(true);
op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
}
}
}
}
Operator output = putOpInsertMap(op, rwsch);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Table Plan for " + alias + " " + op.toString());
}
return output;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class SemanticAnalyzer method genPTFPlanForComponentQuery.
private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operator input) throws SemanticException {
/*
* 1. Create the PTFDesc from the Qspec attached to this QB.
*/
RowResolver rr = opParseCtx.get(input).getRowResolver();
PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
/*
* 2. build Map-side Op Graph. Graph template is either:
* Input -> PTF_map -> ReduceSink
* or
* Input -> ReduceSink
*
* Here the ExprNodeDescriptors in the QueryDef are based on the Input Operator's RR.
*/
{
PartitionedTableFunctionDef tabDef = ptfDesc.getStartOfChain();
/*
* a. add Map-side PTF Operator if needed
*/
if (tabDef.isTransformsRawInput()) {
RowResolver ptfMapRR = tabDef.getRawInputShape().getRr();
ptfDesc.setMapSide(true);
input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfMapRR.getColumnInfos()), input), ptfMapRR);
rr = opParseCtx.get(input).getRowResolver();
}
/*
* b. Build Reduce Sink Details (keyCols, valueCols, outColNames etc.) for this ptfDesc.
*/
ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
StringBuilder orderString = new StringBuilder();
StringBuilder nullOrderString = new StringBuilder();
/*
* Use the input RR of TableScanOperator in case there is no map-side
* reshape of input.
* If the parent of ReduceSinkOperator is PTFOperator, use it's
* output RR.
*/
buildPTFReduceSinkDetails(tabDef, rr, partCols, orderCols, orderString, nullOrderString);
input = genReduceSinkPlan(input, partCols, orderCols, orderString.toString(), nullOrderString.toString(), -1, Operation.NOT_ACID);
}
/*
* 3. build Reduce-side Op Graph
*/
{
/*
* c. Rebuilt the QueryDef.
* Why?
* - so that the ExprNodeDescriptors in the QueryDef are based on the
* Select Operator's RowResolver
*/
rr = opParseCtx.get(input).getRowResolver();
ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
/*
* d. Construct PTF Operator.
*/
RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfOpRR.getColumnInfos()), input), ptfOpRR);
}
return input;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class SemanticAnalyzer method genUDTFPlan.
private Operator genUDTFPlan(GenericUDTF genericUDTF, String outputTableAlias, ArrayList<String> colAliases, QB qb, Operator input, boolean outerLV) throws SemanticException {
// No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY
QBParseInfo qbp = qb.getParseInfo();
if (!qbp.getDestToGroupBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_GROUP_BY.getMsg());
}
if (!qbp.getDestToDistributeBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_DISTRIBUTE_BY.getMsg());
}
if (!qbp.getDestToSortBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_SORT_BY.getMsg());
}
if (!qbp.getDestToClusterBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_CLUSTER_BY.getMsg());
}
if (!qbp.getAliasToLateralViews().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases);
}
// Use the RowResolver from the input operator to generate a input
// ObjectInspector that can be used to initialize the UDTF. Then, the
// resulting output object inspector can be used to make the RowResolver
// for the UDTF operator
RowResolver selectRR = opParseCtx.get(input).getRowResolver();
ArrayList<ColumnInfo> inputCols = selectRR.getColumnInfos();
// Create the object inspector for the input columns and initialize the UDTF
ArrayList<String> colNames = new ArrayList<String>();
ObjectInspector[] colOIs = new ObjectInspector[inputCols.size()];
for (int i = 0; i < inputCols.size(); i++) {
colNames.add(inputCols.get(i).getInternalName());
colOIs[i] = inputCols.get(i).getObjectInspector();
}
StandardStructObjectInspector rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, Arrays.asList(colOIs));
StructObjectInspector outputOI = genericUDTF.initialize(rowOI);
int numUdtfCols = outputOI.getAllStructFieldRefs().size();
if (colAliases.isEmpty()) {
// user did not specfied alias names, infer names from outputOI
for (StructField field : outputOI.getAllStructFieldRefs()) {
colAliases.add(field.getFieldName());
}
}
// Make sure that the number of column aliases in the AS clause matches
// the number of columns output by the UDTF
int numSuppliedAliases = colAliases.size();
if (numUdtfCols != numSuppliedAliases) {
throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH.getMsg("expected " + numUdtfCols + " aliases " + "but got " + numSuppliedAliases));
}
// Generate the output column info's / row resolver using internal names.
ArrayList<ColumnInfo> udtfCols = new ArrayList<ColumnInfo>();
Iterator<String> colAliasesIter = colAliases.iterator();
for (StructField sf : outputOI.getAllStructFieldRefs()) {
String colAlias = colAliasesIter.next();
assert (colAlias != null);
// Since the UDTF operator feeds into a LVJ operator that will rename
// all the internal names, we can just use field name from the UDTF's OI
// as the internal name
ColumnInfo col = new ColumnInfo(sf.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), outputTableAlias, false);
udtfCols.add(col);
}
// Create the row resolver for this operator from the output columns
RowResolver out_rwsch = new RowResolver();
for (int i = 0; i < udtfCols.size(); i++) {
out_rwsch.put(outputTableAlias, colAliases.get(i), udtfCols.get(i));
}
// Add the UDTFOperator to the operator DAG
Operator<?> udtf = putOpInsertMap(OperatorFactory.getAndMakeChild(new UDTFDesc(genericUDTF, outerLV), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
return udtf;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class SemanticAnalyzer method genReduceSinkPlan.
@SuppressWarnings("nls")
private Operator genReduceSinkPlan(Operator<?> input, ArrayList<ExprNodeDesc> partitionCols, ArrayList<ExprNodeDesc> sortCols, String sortOrder, String nullOrder, int numReducers, AcidUtils.Operation acidOp, boolean pullConstants) throws SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
Operator dummy = Operator.createDummy();
dummy.setParentOperators(Arrays.asList(input));
ArrayList<ExprNodeDesc> newSortCols = new ArrayList<ExprNodeDesc>();
StringBuilder newSortOrder = new StringBuilder();
StringBuilder newNullOrder = new StringBuilder();
ArrayList<ExprNodeDesc> sortColsBack = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < sortCols.size(); i++) {
ExprNodeDesc sortCol = sortCols.get(i);
// we are pulling constants but this is not a constant
if (!pullConstants || !(sortCol instanceof ExprNodeConstantDesc)) {
newSortCols.add(sortCol);
newSortOrder.append(sortOrder.charAt(i));
newNullOrder.append(nullOrder.charAt(i));
sortColsBack.add(ExprNodeDescUtils.backtrack(sortCol, dummy, input));
}
}
// For the generation of the values expression just get the inputs
// signature and generate field expressions for those
RowResolver rsRR = new RowResolver();
ArrayList<String> outputColumns = new ArrayList<String>();
ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> valueColsBack = new ArrayList<ExprNodeDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ArrayList<ExprNodeDesc> constantCols = new ArrayList<ExprNodeDesc>();
ArrayList<ColumnInfo> columnInfos = inputRR.getColumnInfos();
int[] index = new int[columnInfos.size()];
for (int i = 0; i < index.length; i++) {
ColumnInfo colInfo = columnInfos.get(i);
String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
ExprNodeColumnDesc value = new ExprNodeColumnDesc(colInfo);
// backtrack can be null when input is script operator
ExprNodeDesc valueBack = ExprNodeDescUtils.backtrack(value, dummy, input);
if (pullConstants && valueBack instanceof ExprNodeConstantDesc) {
// ignore, it will be generated by SEL op
index[i] = Integer.MAX_VALUE;
constantCols.add(valueBack);
continue;
}
int kindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, sortColsBack);
if (kindex >= 0) {
index[i] = kindex;
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
newColInfo.setTabAlias(nm[0]);
rsRR.put(nm[0], nm[1], newColInfo);
if (nm2 != null) {
rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
}
continue;
}
int vindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, valueColsBack);
if (vindex >= 0) {
index[i] = -vindex - 1;
continue;
}
index[i] = -valueCols.size() - 1;
String outputColName = getColumnInternalName(valueCols.size());
valueCols.add(value);
valueColsBack.add(valueBack);
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
newColInfo.setTabAlias(nm[0]);
rsRR.put(nm[0], nm[1], newColInfo);
if (nm2 != null) {
rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
}
outputColumns.add(outputColName);
}
dummy.setParentOperators(null);
ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(newSortCols, valueCols, outputColumns, false, -1, partitionCols, newSortOrder.toString(), newNullOrder.toString(), numReducers, acidOp);
Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(rsdesc, new RowSchema(rsRR.getColumnInfos()), input), rsRR);
List<String> keyColNames = rsdesc.getOutputKeyColumnNames();
for (int i = 0; i < keyColNames.size(); i++) {
colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), sortCols.get(i));
}
List<String> valueColNames = rsdesc.getOutputValueColumnNames();
for (int i = 0; i < valueColNames.size(); i++) {
colExprMap.put(Utilities.ReduceField.VALUE + "." + valueColNames.get(i), valueCols.get(i));
}
interim.setColumnExprMap(colExprMap);
RowResolver selectRR = new RowResolver();
ArrayList<ExprNodeDesc> selCols = new ArrayList<ExprNodeDesc>();
ArrayList<String> selOutputCols = new ArrayList<String>();
Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
Iterator<ExprNodeDesc> constants = constantCols.iterator();
for (int i = 0; i < index.length; i++) {
ColumnInfo prev = columnInfos.get(i);
String[] nm = inputRR.reverseLookup(prev.getInternalName());
String[] nm2 = inputRR.getAlternateMappings(prev.getInternalName());
ColumnInfo info = new ColumnInfo(prev);
ExprNodeDesc desc;
if (index[i] == Integer.MAX_VALUE) {
desc = constants.next();
} else {
String field;
if (index[i] >= 0) {
field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
} else {
field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1);
}
desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), info.getIsVirtualCol());
}
selCols.add(desc);
String internalName = getColumnInternalName(i);
info.setInternalName(internalName);
selectRR.put(nm[0], nm[1], info);
if (nm2 != null) {
selectRR.addMappingOnly(nm2[0], nm2[1], info);
}
selOutputCols.add(internalName);
selColExprMap.put(internalName, desc);
}
SelectDesc select = new SelectDesc(selCols, selOutputCols);
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(select, new RowSchema(selectRR.getColumnInfos()), interim), selectRR);
output.setColumnExprMap(selColExprMap);
return output;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class SemanticAnalyzer method genConversionSelectOperator.
/**
* Generate the conversion SelectOperator that converts the columns into the
* types that are expected by the table_desc.
*/
Operator genConversionSelectOperator(String dest, QB qb, Operator input, TableDesc table_desc, DynamicPartitionCtx dpCtx) throws SemanticException {
StructObjectInspector oi = null;
try {
Deserializer deserializer = table_desc.getDeserializerClass().newInstance();
SerDeUtils.initializeSerDe(deserializer, conf, table_desc.getProperties(), null);
oi = (StructObjectInspector) deserializer.getObjectInspector();
} catch (Exception e) {
throw new SemanticException(e);
}
// Check column number
List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING);
ArrayList<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos();
int inColumnCnt = rowFields.size();
int outColumnCnt = tableFields.size();
if (dynPart && dpCtx != null) {
outColumnCnt += dpCtx.getNumDPCols();
}
// The numbers of input columns and output columns should match for regular query
if (!updating(dest) && !deleting(dest) && inColumnCnt != outColumnCnt) {
String reason = "Table " + dest + " has " + outColumnCnt + " columns, but query has " + inColumnCnt + " columns.";
throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(qb.getParseInfo().getDestForClause(dest), reason));
}
// Check column types
boolean converted = false;
int columnNumber = tableFields.size();
ArrayList<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber);
// MetadataTypedColumnsetSerDe does not need type conversions because it
// does the conversion to String by itself.
boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals(MetadataTypedColumnsetSerDe.class);
boolean isLazySimpleSerDe = table_desc.getDeserializerClass().equals(LazySimpleSerDe.class);
if (!isMetaDataSerDe && !deleting(dest)) {
// offset by 1 so that we don't try to convert the ROW__ID
if (updating(dest)) {
expressions.add(new ExprNodeColumnDesc(rowFields.get(0).getType(), rowFields.get(0).getInternalName(), "", true));
}
// here only deals with non-partition columns. We deal with partition columns next
for (int i = 0; i < columnNumber; i++) {
int rowFieldsOffset = updating(dest) ? i + 1 : i;
ObjectInspector tableFieldOI = tableFields.get(i).getFieldObjectInspector();
TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI);
TypeInfo rowFieldTypeInfo = rowFields.get(rowFieldsOffset).getType();
ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(rowFieldsOffset).getInternalName(), "", false, rowFields.get(rowFieldsOffset).isSkewedCol());
// Thus, we still keep the conversion.
if (!tableFieldTypeInfo.equals(rowFieldTypeInfo)) {
// need to do some conversions here
converted = true;
if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
// cannot convert to complex types
column = null;
} else {
column = ParseUtils.createConversionCast(column, (PrimitiveTypeInfo) tableFieldTypeInfo);
}
if (column == null) {
String reason = "Cannot convert column " + i + " from " + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(qb.getParseInfo().getDestForClause(dest), reason));
}
}
expressions.add(column);
}
}
// deal with dynamic partition columns: convert ExprNodeDesc type to String??
if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) {
// DP columns starts with tableFields.size()
for (int i = tableFields.size() + (updating(dest) ? 1 : 0); i < rowFields.size(); ++i) {
TypeInfo rowFieldTypeInfo = rowFields.get(i).getType();
ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(i).getInternalName(), "", true);
expressions.add(column);
}
// converted = true; // [TODO]: should we check & convert type to String and set it to true?
}
if (converted) {
// add the select operator
RowResolver rowResolver = new RowResolver();
ArrayList<String> colNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < expressions.size(); i++) {
String name = getColumnInternalName(i);
rowResolver.put("", name, new ColumnInfo(name, expressions.get(i).getTypeInfo(), "", false));
colNames.add(name);
colExprMap.put(name, expressions.get(i));
}
input = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(expressions, colNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
input.setColumnExprMap(colExprMap);
}
return input;
}
Aggregations