use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.
the class HiveCalciteUtil method getVColsMap.
public static ImmutableMap<Integer, VirtualColumn> getVColsMap(List<VirtualColumn> hiveVCols, int startIndx) {
Builder<Integer, VirtualColumn> bldr = ImmutableMap.<Integer, VirtualColumn>builder();
int indx = startIndx;
for (VirtualColumn vc : hiveVCols) {
bldr.put(indx, vc);
indx++;
}
return bldr.build();
}
use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.
the class SemanticAnalyzer method setupStats.
private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias, RowResolver rwsch) throws SemanticException {
// if it is not analyze command and not column stats, then do not gatherstats
if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) {
tsDesc.setGatherStats(false);
} else {
if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
String statsTmpLoc = ctx.getTempDirForInterimJobPath(tab.getPath()).toString();
LOG.debug("Set stats collection dir : " + statsTmpLoc);
tsDesc.setTmpStatsDir(statsTmpLoc);
}
tsDesc.setGatherStats(true);
tsDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
// append additional virtual columns for storing statistics
Iterator<VirtualColumn> vcs = VirtualColumn.getStatsRegistry(conf).iterator();
List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
while (vcs.hasNext()) {
VirtualColumn vc = vcs.next();
rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden()));
vcList.add(vc);
}
tsDesc.addVirtualCols(vcList);
String tblName = tab.getTableName();
// Theoretically the key prefix could be any unique string shared
// between TableScanOperator (when publishing) and StatsTask (when aggregating).
// Here we use
// db_name.table_name + partitionSec
// as the prefix for easy of read during explain and debugging.
// Currently, partition spec can only be static partition.
String k = org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.encodeTableName(tblName) + Path.SEPARATOR;
tsDesc.setStatsAggPrefix(tab.getDbName() + "." + k);
// set up WriteEntity for replication
outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED));
// add WriteEntity for each matching partition
if (tab.isPartitioned()) {
List<String> cols = new ArrayList<String>();
if (qbp.getAnalyzeRewrite() != null) {
List<FieldSchema> partitionCols = tab.getPartCols();
for (FieldSchema fs : partitionCols) {
cols.add(fs.getName());
}
tsDesc.setPartColumns(cols);
return;
}
TableSpec tblSpec = qbp.getTableSpec(alias);
Map<String, String> partSpec = tblSpec.getPartSpec();
if (partSpec != null) {
cols.addAll(partSpec.keySet());
tsDesc.setPartColumns(cols);
} else {
throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg());
}
List<Partition> partitions = qbp.getTableSpec().partitions;
if (partitions != null) {
for (Partition partn : partitions) {
// inputs.add(new ReadEntity(partn)); // is this needed at all?
LOG.info("XXX: adding part: " + partn);
outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK));
}
}
}
}
}
use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.
the class Vectorizer method validateExprNodeDescRecursive.
private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressionTitle, VectorExpressionDescriptor.Mode mode, boolean allowComplex) {
if (desc instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc;
String columnName = c.getColumn();
if (availableVectorizedVirtualColumnSet != null) {
// For Map, check for virtual columns.
VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName);
if (virtualColumn != null) {
if (!availableVectorizedVirtualColumnSet.contains(virtualColumn)) {
setExpressionIssue(expressionTitle, "Virtual column " + columnName + " is not supported");
return false;
}
// Remember we used this one in the query.
neededVirtualColumnSet.add(virtualColumn);
}
}
}
String typeName = desc.getTypeInfo().getTypeName();
boolean ret = validateDataType(typeName, mode, allowComplex && isVectorizationComplexTypesEnabled);
if (!ret) {
setExpressionIssue(expressionTitle, getValidateDataTypeErrorMsg(typeName, mode, allowComplex, isVectorizationComplexTypesEnabled));
return false;
}
boolean isInExpression = false;
if (desc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
boolean r = validateGenericUdf(d);
if (!r) {
setExpressionIssue(expressionTitle, "UDF " + d + " not supported");
return false;
}
GenericUDF genericUDF = d.getGenericUDF();
isInExpression = (genericUDF instanceof GenericUDFIn);
}
if (desc.getChildren() != null) {
if (isInExpression && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) {
// Always use loose FILTER mode.
if (!validateStructInExpression(desc, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) {
return false;
}
} else {
for (ExprNodeDesc d : desc.getChildren()) {
// Always use loose FILTER mode.
if (!validateExprNodeDescRecursive(d, expressionTitle, VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */
true)) {
return false;
}
}
}
}
return true;
}
use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.
the class HiveOpConverter method visit.
/**
* TODO: 1. PPD needs to get pushed in to TS
*
* @param scanRel
* @return
*/
OpAttr visit(HiveTableScan scanRel) {
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + scanRel.getId() + ":" + scanRel.getRelTypeName() + " with row type: [" + scanRel.getRowType() + "]");
}
RelOptHiveTable ht = (RelOptHiveTable) scanRel.getTable();
// 1. Setup TableScan Desc
// 1.1 Build col details used by scan
ArrayList<ColumnInfo> colInfos = new ArrayList<ColumnInfo>();
List<VirtualColumn> virtualCols = new ArrayList<VirtualColumn>();
List<Integer> neededColumnIDs = new ArrayList<Integer>();
List<String> neededColumnNames = new ArrayList<String>();
Set<Integer> vcolsInCalcite = new HashSet<Integer>();
List<String> partColNames = new ArrayList<String>();
Map<Integer, VirtualColumn> VColsMap = HiveCalciteUtil.getVColsMap(ht.getVirtualCols(), ht.getNoOfNonVirtualCols());
Map<Integer, ColumnInfo> posToPartColInfo = ht.getPartColInfoMap();
Map<Integer, ColumnInfo> posToNonPartColInfo = ht.getNonPartColInfoMap();
List<Integer> neededColIndxsFrmReloptHT = scanRel.getNeededColIndxsFrmReloptHT();
List<String> scanColNames = scanRel.getRowType().getFieldNames();
String tableAlias = scanRel.getConcatQbIDAlias();
String colName;
ColumnInfo colInfo;
VirtualColumn vc;
for (int index = 0; index < scanRel.getRowType().getFieldList().size(); index++) {
colName = scanColNames.get(index);
if (VColsMap.containsKey(index)) {
vc = VColsMap.get(index);
virtualCols.add(vc);
colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden());
vcolsInCalcite.add(index);
} else if (posToPartColInfo.containsKey(index)) {
partColNames.add(colName);
colInfo = posToPartColInfo.get(index);
vcolsInCalcite.add(index);
} else {
colInfo = posToNonPartColInfo.get(index);
}
colInfos.add(colInfo);
if (neededColIndxsFrmReloptHT.contains(index)) {
neededColumnIDs.add(index);
neededColumnNames.add(colName);
}
}
// 1.2 Create TableScanDesc
TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD());
// 1.3. Set Partition cols in TSDesc
tsd.setPartColumns(partColNames);
// 1.4. Set needed cols in TSDesc
tsd.setNeededColumnIDs(neededColumnIDs);
tsd.setNeededColumns(neededColumnNames);
// 2. Setup TableScan
TableScanOperator ts = (TableScanOperator) OperatorFactory.get(semanticAnalyzer.getOpContext(), tsd, new RowSchema(colInfos));
// tablescan with same alias.
if (topOps.get(tableAlias) != null) {
tableAlias = tableAlias + this.uniqueCounter;
}
topOps.put(tableAlias, ts);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + ts + " with row schema: [" + ts.getSchema() + "]");
}
return new OpAttr(tableAlias, vcolsInCalcite, ts);
}
use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.
the class RelOptHiveTable method copy.
public RelOptHiveTable copy(RelDataType newRowType) {
// 1. Build map of column name to col index of original schema
// Assumption: Hive Table can not contain duplicate column names
Map<String, Integer> nameToColIndxMap = new HashMap<String, Integer>();
for (RelDataTypeField f : this.rowType.getFieldList()) {
nameToColIndxMap.put(f.getName(), f.getIndex());
}
// 2. Build nonPart/Part/Virtual column info for new RowSchema
List<ColumnInfo> newHiveNonPartitionCols = new ArrayList<ColumnInfo>();
List<ColumnInfo> newHivePartitionCols = new ArrayList<ColumnInfo>();
List<VirtualColumn> newHiveVirtualCols = new ArrayList<VirtualColumn>();
Map<Integer, VirtualColumn> virtualColInfoMap = HiveCalciteUtil.getVColsMap(this.hiveVirtualCols, this.noOfNonVirtualCols);
Integer originalColIndx;
ColumnInfo cInfo;
VirtualColumn vc;
for (RelDataTypeField f : newRowType.getFieldList()) {
originalColIndx = nameToColIndxMap.get(f.getName());
if ((cInfo = hiveNonPartitionColsMap.get(originalColIndx)) != null) {
newHiveNonPartitionCols.add(new ColumnInfo(cInfo));
} else if ((cInfo = hivePartitionColsMap.get(originalColIndx)) != null) {
newHivePartitionCols.add(new ColumnInfo(cInfo));
} else if ((vc = virtualColInfoMap.get(originalColIndx)) != null) {
newHiveVirtualCols.add(vc);
} else {
throw new RuntimeException("Copy encountered a column not seen in original TS");
}
}
// 3. Build new Table
return new RelOptHiveTable(this.schema, this.name, newRowType, this.hiveTblMetadata, newHiveNonPartitionCols, newHivePartitionCols, newHiveVirtualCols, this.hiveConf, this.partitionCache, this.colStatsCache, this.noColsMissingStats);
}
Aggregations