use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method genSelectAllDesc.
private Operator genSelectAllDesc(Operator input) {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
List<ColumnInfo> columns = inputRR.getColumnInfos();
List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
List<String> columnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
for (ColumnInfo col : columns) {
colList.add(new ExprNodeColumnDesc(col, true));
columnNames.add(col.getInternalName());
columnExprMap.put(col.getInternalName(), new ExprNodeColumnDesc(col, true));
}
RowResolver outputRR = inputRR.duplicate();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames, true), outputRR.getRowSchema(), input), outputRR);
output.setColumnExprMap(columnExprMap);
return output;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method genLateralViewPlan.
private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) throws SemanticException {
RowResolver lvForwardRR = new RowResolver();
RowResolver source = opParseCtx.get(op).getRowResolver();
Map<String, ExprNodeDesc> lvfColExprMap = new HashMap<String, ExprNodeDesc>();
Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
List<String> colNames = new ArrayList<String>();
for (ColumnInfo col : source.getColumnInfos()) {
String[] tabCol = source.reverseLookup(col.getInternalName());
lvForwardRR.put(tabCol[0], tabCol[1], col);
ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(col);
colList.add(colExpr);
colNames.add(colExpr.getColumn());
lvfColExprMap.put(col.getInternalName(), colExpr);
selColExprMap.put(col.getInternalName(), colExpr.clone());
}
Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild(new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()), op), lvForwardRR);
lvForward.setColumnExprMap(lvfColExprMap);
// The order in which the two paths are added is important. The
// lateral view join operator depends on having the select operator
// give it the row first.
// Get the all path by making a select(*).
RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver();
// Operator allPath = op;
SelectDesc sDesc = new SelectDesc(colList, colNames, false);
sDesc.setSelStarNoCompute(true);
Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild(sDesc, new RowSchema(allPathRR.getColumnInfos()), lvForward), allPathRR);
allPath.setColumnExprMap(selColExprMap);
int allColumns = allPathRR.getColumnInfos().size();
// Get the UDTF Path
QB blankQb = new QB(null, null, false);
Operator udtfPath = genSelectPlan(null, (ASTNode) lateralViewTree.getChild(0), blankQb, lvForward, null, lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER);
// add udtf aliases to QB
for (String udtfAlias : blankQb.getAliases()) {
qb.addAlias(udtfAlias);
}
RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver();
// Merge the two into the lateral view join
// The cols of the merged result will be the combination of both the
// cols of the UDTF path and the cols of the all path. The internal
// names have to be changed to avoid conflicts
RowResolver lateralViewRR = new RowResolver();
List<String> outputInternalColNames = new ArrayList<String>();
// For PPD, we need a column to expression map so that during the walk,
// the processor knows how to transform the internal col names.
// Following steps are dependant on the fact that we called
// LVmerge.. in the above order
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
LVmergeRowResolvers(allPathRR, lateralViewRR, colExprMap, outputInternalColNames);
LVmergeRowResolvers(udtfPathRR, lateralViewRR, colExprMap, outputInternalColNames);
Operator lateralViewJoin = putOpInsertMap(OperatorFactory.getAndMakeChild(new LateralViewJoinDesc(allColumns, outputInternalColNames), new RowSchema(lateralViewRR.getColumnInfos()), allPath, udtfPath), lateralViewRR);
lateralViewJoin.setColumnExprMap(colExprMap);
return lateralViewJoin;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method genColListRegex.
@SuppressWarnings("nls")
// TODO: make aliases unique, otherwise needless rewriting takes place
Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, List<Pair<ColumnInfo, RowResolver>> colList, Set<ColumnInfo> excludeCols, RowResolver input, RowResolver colSrcRR, Integer pos, RowResolver output, List<String> aliases, boolean ensureUniqueCols) throws SemanticException {
if (colSrcRR == null) {
colSrcRR = input;
}
// The table alias should exist
if (tabAlias != null && !colSrcRR.hasTableAlias(tabAlias)) {
throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(), sel));
}
// TODO: Have to put in the support for AS clause
Pattern regex = null;
try {
regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE);
} catch (PatternSyntaxException e) {
throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_COLUMN.getMsg(), sel, e.getMessage()));
}
StringBuilder replacementText = new StringBuilder();
int matched = 0;
// ColumnInfos for table alias "".
if (!aliases.contains("")) {
aliases.add("");
}
/*
* track the input ColumnInfos that are added to the output.
* if a columnInfo has multiple mappings; then add the column only once,
* but carry the mappings forward.
*/
Map<ColumnInfo, ColumnInfo> inputColsProcessed = new HashMap<ColumnInfo, ColumnInfo>();
if (colSrcRR.getNamedJoinInfo() != null) {
// We got using() clause in previous join. Need to generate select list as
// per standard. For * we will have joining columns first non-repeated
// followed by other columns.
Map<String, ColumnInfo> leftMap = colSrcRR.getFieldMap(colSrcRR.getNamedJoinInfo().getAliases().get(0));
Map<String, ColumnInfo> rightMap = colSrcRR.getFieldMap(colSrcRR.getNamedJoinInfo().getAliases().get(1));
Map<String, ColumnInfo> chosenMap = null;
if (colSrcRR.getNamedJoinInfo().getHiveJoinType() != JoinType.RIGHTOUTER) {
chosenMap = leftMap;
} else {
chosenMap = rightMap;
}
// first get the columns in named columns
for (String columnName : colSrcRR.getNamedJoinInfo().getNamedColumns()) {
for (Map.Entry<String, ColumnInfo> entry : chosenMap.entrySet()) {
ColumnInfo colInfo = entry.getValue();
if (!columnName.equals(colInfo.getAlias())) {
continue;
}
String name = colInfo.getInternalName();
String[] tmp = colSrcRR.reverseLookup(name);
// Skip the colinfos which are not for this particular alias
if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
continue;
}
if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
continue;
}
ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
if (oColInfo == null) {
colList.add(Pair.of(colInfo, colSrcRR));
oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
inputColsProcessed.put(colInfo, oColInfo);
}
if (ensureUniqueCols) {
if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) {
throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1] + " => " + oColInfo + " due to duplication, see previous warnings", UnsupportedFeature.Duplicates_in_RR);
}
} else {
output.put(tmp[0], tmp[1], oColInfo);
}
pos++;
matched++;
if (unparseTranslator.isEnabled() || (tableMask.isEnabled() && analyzeRewrite == null)) {
if (replacementText.length() > 0) {
replacementText.append(", ");
}
replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
replacementText.append(".");
replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
}
}
}
}
for (String alias : aliases) {
Map<String, ColumnInfo> fMap = colSrcRR.getFieldMap(alias);
if (fMap == null) {
continue;
}
// from the input schema
for (Map.Entry<String, ColumnInfo> entry : fMap.entrySet()) {
ColumnInfo colInfo = entry.getValue();
if (colSrcRR.getNamedJoinInfo() != null && colSrcRR.getNamedJoinInfo().getNamedColumns().contains(colInfo.getAlias())) {
// we already added this column in select list.
continue;
}
if (excludeCols != null && excludeCols.contains(colInfo)) {
// This was added during plan generation.
continue;
}
// First, look up the column from the source against which * is to be
// resolved.
// We'd later translated this into the column from proper input, if
// it's valid.
// TODO: excludeCols may be possible to remove using the same
// technique.
String name = colInfo.getInternalName();
String[] tmp = colSrcRR.reverseLookup(name);
// Skip the colinfos which are not for this particular alias
if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
continue;
}
if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
continue;
}
// Not matching the regex?
if (!regex.matcher(tmp[1]).matches()) {
continue;
}
// TODO: This is fraught with peril.
if (input != colSrcRR) {
colInfo = input.get(tabAlias, tmp[1]);
if (colInfo == null) {
LOG.error("Cannot find colInfo for {}.{}, derived from [{}], in [{}]", tabAlias, tmp[1], colSrcRR, input);
throw new SemanticException(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY, tmp[1]);
}
name = colInfo.getInternalName();
tmp = input.reverseLookup(name);
if (LOG.isDebugEnabled()) {
String oldCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1]));
String newCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1]));
LOG.debug("Translated [" + oldCol + "] to [" + newCol + "]");
}
}
ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
if (oColInfo == null) {
colList.add(Pair.of(colInfo, input));
oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
inputColsProcessed.put(colInfo, oColInfo);
}
assert nonNull(tmp);
if (ensureUniqueCols) {
if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) {
throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1] + " => " + oColInfo + " due to duplication, see previous warnings", UnsupportedFeature.Duplicates_in_RR);
}
} else {
output.put(tmp[0], tmp[1], oColInfo);
}
pos++;
matched++;
if (unparseTranslator.isEnabled() || tableMask.isEnabled()) {
if (replacementText.length() > 0) {
replacementText.append(", ");
}
replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
replacementText.append(".");
replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
}
}
}
if (matched == 0) {
throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_COLUMN.getMsg(), sel));
}
unparseTranslator.addTranslation(sel, replacementText.toString());
if (tableMask.isEnabled()) {
tableMask.addTranslation(sel, replacementText.toString());
}
return pos;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method setupStats.
private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias, RowResolver rwsch) throws SemanticException {
// if it is not analyze command and not column stats, then do not gatherstats
if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) {
tsDesc.setGatherStats(false);
return;
}
if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
String statsTmpLoc = ctx.getTempDirForInterimJobPath(tab.getPath()).toString();
LOG.debug("Set stats collection dir : " + statsTmpLoc);
tsDesc.setTmpStatsDir(statsTmpLoc);
}
tsDesc.setGatherStats(true);
tsDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
// append additional virtual columns for storing statistics
Iterator<VirtualColumn> vcs = VirtualColumn.getStatsRegistry(conf).iterator();
List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
while (vcs.hasNext()) {
VirtualColumn vc = vcs.next();
rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden()));
vcList.add(vc);
}
tsDesc.addVirtualCols(vcList);
String tblName = tab.getTableName();
// Theoretically the key prefix could be any unique string shared
// between TableScanOperator (when publishing) and StatsTask (when aggregating).
// Here we use
// db_name.table_name + partitionSec
// as the prefix for easy of read during explain and debugging.
// Currently, partition spec can only be static partition.
String k = FileUtils.escapePathName(tblName).toLowerCase() + Path.SEPARATOR;
tsDesc.setStatsAggPrefix(FileUtils.escapePathName(tab.getDbName()).toLowerCase() + "." + k);
// set up WriteEntity for replication and txn stats
WriteEntity we = new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED);
we.setTxnAnalyze(true);
outputs.add(we);
if (AcidUtils.isTransactionalTable(tab)) {
if (acidAnalyzeTable != null) {
throw new IllegalStateException("Multiple ACID tables in analyze: " + we + ", " + acidAnalyzeTable);
}
acidAnalyzeTable = we;
}
// add WriteEntity for each matching partition
if (tab.isPartitioned()) {
List<String> cols = new ArrayList<String>();
if (qbp.getAnalyzeRewrite() != null) {
List<FieldSchema> partitionCols = tab.getPartCols();
for (FieldSchema fs : partitionCols) {
cols.add(fs.getName());
}
tsDesc.setPartColumns(cols);
return;
}
TableSpec tblSpec = qbp.getTableSpec(alias);
Map<String, String> partSpec = tblSpec.getPartSpec();
if (partSpec != null) {
cols.addAll(partSpec.keySet());
tsDesc.setPartColumns(cols);
} else {
throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg());
}
List<Partition> partitions = qbp.getTableSpec().partitions;
if (partitions != null) {
for (Partition partn : partitions) {
WriteEntity pwe = new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK);
pwe.setTxnAnalyze(true);
outputs.add(pwe);
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method genMaterializedViewDataOrgPlan.
private Operator genMaterializedViewDataOrgPlan(List<ColumnInfo> sortColInfos, List<ColumnInfo> distributeColInfos, RowResolver inputRR, Operator input) {
// In this case, we will introduce a RS and immediately after a SEL that restores
// the row schema to what follow-up operations are expecting
Set<String> keys = sortColInfos.stream().map(ColumnInfo::getInternalName).collect(Collectors.toSet());
Set<String> distributeKeys = distributeColInfos.stream().map(ColumnInfo::getInternalName).collect(Collectors.toSet());
List<ExprNodeDesc> keyCols = new ArrayList<>();
List<String> keyColNames = new ArrayList<>();
StringBuilder order = new StringBuilder();
StringBuilder nullOrder = new StringBuilder();
List<ExprNodeDesc> valCols = new ArrayList<>();
List<String> valColNames = new ArrayList<>();
List<ExprNodeDesc> partCols = new ArrayList<>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
Map<String, String> nameMapping = new HashMap<>();
// map _col0 to KEY._col0, etc
for (ColumnInfo ci : inputRR.getRowSchema().getSignature()) {
ExprNodeColumnDesc e = new ExprNodeColumnDesc(ci);
String columnName = ci.getInternalName();
if (keys.contains(columnName)) {
// key (sort column)
keyColNames.add(columnName);
keyCols.add(e);
colExprMap.put(Utilities.ReduceField.KEY + "." + columnName, e);
nameMapping.put(columnName, Utilities.ReduceField.KEY + "." + columnName);
order.append("+");
nullOrder.append("a");
} else {
// value
valColNames.add(columnName);
valCols.add(e);
colExprMap.put(Utilities.ReduceField.VALUE + "." + columnName, e);
nameMapping.put(columnName, Utilities.ReduceField.VALUE + "." + columnName);
}
if (distributeKeys.contains(columnName)) {
// distribute column
partCols.add(e.clone());
}
}
// Create Key/Value TableDesc. When the operator plan is split into MR tasks,
// the reduce operator will initialize Extract operator with information
// from Key and Value TableDesc
List<FieldSchema> fields = PlanUtils.getFieldSchemasFromColumnList(keyCols, keyColNames, 0, "");
TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, order.toString(), nullOrder.toString());
List<FieldSchema> valFields = PlanUtils.getFieldSchemasFromColumnList(valCols, valColNames, 0, "");
TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields);
List<List<Integer>> distinctColumnIndices = new ArrayList<>();
// Number of reducers is set to default (-1)
ReduceSinkDesc rsConf = new ReduceSinkDesc(keyCols, keyCols.size(), valCols, keyColNames, distinctColumnIndices, valColNames, -1, partCols, -1, keyTable, valueTable, Operation.NOT_ACID);
RowResolver rsRR = new RowResolver();
List<ColumnInfo> rsSignature = new ArrayList<>();
for (int index = 0; index < input.getSchema().getSignature().size(); index++) {
ColumnInfo colInfo = new ColumnInfo(input.getSchema().getSignature().get(index));
String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
colInfo.setInternalName(nameMapping.get(colInfo.getInternalName()));
rsSignature.add(colInfo);
rsRR.put(nm[0], nm[1], colInfo);
if (nm2 != null) {
rsRR.addMappingOnly(nm2[0], nm2[1], colInfo);
}
}
Operator<?> result = putOpInsertMap(OperatorFactory.getAndMakeChild(rsConf, new RowSchema(rsSignature), input), rsRR);
result.setColumnExprMap(colExprMap);
// Create SEL operator
RowResolver selRR = new RowResolver();
List<ColumnInfo> selSignature = new ArrayList<>();
List<ExprNodeDesc> columnExprs = new ArrayList<>();
List<String> colNames = new ArrayList<>();
Map<String, ExprNodeDesc> selColExprMap = new HashMap<>();
for (int index = 0; index < input.getSchema().getSignature().size(); index++) {
ColumnInfo colInfo = new ColumnInfo(input.getSchema().getSignature().get(index));
String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
selSignature.add(colInfo);
selRR.put(nm[0], nm[1], colInfo);
if (nm2 != null) {
selRR.addMappingOnly(nm2[0], nm2[1], colInfo);
}
String colName = colInfo.getInternalName();
ExprNodeDesc exprNodeDesc;
if (keys.contains(colName)) {
exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.KEY.toString() + "." + colName, null, false);
columnExprs.add(exprNodeDesc);
} else {
exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.VALUE.toString() + "." + colName, null, false);
columnExprs.add(exprNodeDesc);
}
colNames.add(colName);
selColExprMap.put(colName, exprNodeDesc);
}
SelectDesc selConf = new SelectDesc(columnExprs, colNames);
result = putOpInsertMap(OperatorFactory.getAndMakeChild(selConf, new RowSchema(selSignature), result), selRR);
result.setColumnExprMap(selColExprMap);
return result;
}
Aggregations