Search in sources :

Example 1 with VirtualColumn

use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.

the class HiveCalciteUtil method getVColsMap.

public static ImmutableMap<Integer, VirtualColumn> getVColsMap(List<VirtualColumn> hiveVCols, int startIndx) {
    Builder<Integer, VirtualColumn> bldr = ImmutableMap.<Integer, VirtualColumn>builder();
    int indx = startIndx;
    for (VirtualColumn vc : hiveVCols) {
        bldr.put(indx, vc);
        indx++;
    }
    return bldr.build();
}
Also used : VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn)

Example 2 with VirtualColumn

use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.

the class SemanticAnalyzer method setupStats.

private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias, RowResolver rwsch) throws SemanticException {
    // if it is not analyze command and not column stats, then do not gatherstats
    if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) {
        tsDesc.setGatherStats(false);
    } else {
        if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
            String statsTmpLoc = ctx.getTempDirForInterimJobPath(tab.getPath()).toString();
            LOG.debug("Set stats collection dir : " + statsTmpLoc);
            tsDesc.setTmpStatsDir(statsTmpLoc);
        }
        tsDesc.setGatherStats(true);
        tsDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
        // append additional virtual columns for storing statistics
        Iterator<VirtualColumn> vcs = VirtualColumn.getStatsRegistry(conf).iterator();
        List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
        while (vcs.hasNext()) {
            VirtualColumn vc = vcs.next();
            rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden()));
            vcList.add(vc);
        }
        tsDesc.addVirtualCols(vcList);
        String tblName = tab.getTableName();
        // Theoretically the key prefix could be any unique string shared
        // between TableScanOperator (when publishing) and StatsTask (when aggregating).
        // Here we use
        // db_name.table_name + partitionSec
        // as the prefix for easy of read during explain and debugging.
        // Currently, partition spec can only be static partition.
        String k = org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.encodeTableName(tblName) + Path.SEPARATOR;
        tsDesc.setStatsAggPrefix(tab.getDbName() + "." + k);
        // set up WriteEntity for replication
        outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED));
        // add WriteEntity for each matching partition
        if (tab.isPartitioned()) {
            List<String> cols = new ArrayList<String>();
            if (qbp.getAnalyzeRewrite() != null) {
                List<FieldSchema> partitionCols = tab.getPartCols();
                for (FieldSchema fs : partitionCols) {
                    cols.add(fs.getName());
                }
                tsDesc.setPartColumns(cols);
                return;
            }
            TableSpec tblSpec = qbp.getTableSpec(alias);
            Map<String, String> partSpec = tblSpec.getPartSpec();
            if (partSpec != null) {
                cols.addAll(partSpec.keySet());
                tsDesc.setPartColumns(cols);
            } else {
                throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg());
            }
            List<Partition> partitions = qbp.getTableSpec().partitions;
            if (partitions != null) {
                for (Partition partn : partitions) {
                    // inputs.add(new ReadEntity(partn)); // is this needed at all?
                    LOG.info("XXX: adding part: " + partn);
                    outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK));
                }
            }
        }
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 3 with VirtualColumn

use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.

the class Vectorizer method validateExprNodeDescRecursive.

private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressionTitle, VectorExpressionDescriptor.Mode mode, boolean allowComplex) {
    if (desc instanceof ExprNodeColumnDesc) {
        ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc;
        String columnName = c.getColumn();
        if (availableVectorizedVirtualColumnSet != null) {
            // For Map, check for virtual columns.
            VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName);
            if (virtualColumn != null) {
                if (!availableVectorizedVirtualColumnSet.contains(virtualColumn)) {
                    setExpressionIssue(expressionTitle, "Virtual column " + columnName + " is not supported");
                    return false;
                }
                // Remember we used this one in the query.
                neededVirtualColumnSet.add(virtualColumn);
            }
        }
    }
    String typeName = desc.getTypeInfo().getTypeName();
    boolean ret = validateDataType(typeName, mode, allowComplex && isVectorizationComplexTypesEnabled);
    if (!ret) {
        setExpressionIssue(expressionTitle, getValidateDataTypeErrorMsg(typeName, mode, allowComplex, isVectorizationComplexTypesEnabled));
        return false;
    }
    boolean isInExpression = false;
    if (desc instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
        boolean r = validateGenericUdf(d);
        if (!r) {
            setExpressionIssue(expressionTitle, "UDF " + d + " not supported");
            return false;
        }
        GenericUDF genericUDF = d.getGenericUDF();
        isInExpression = (genericUDF instanceof GenericUDFIn);
    }
    if (desc.getChildren() != null) {
        if (isInExpression && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) {
            // Always use loose FILTER mode.
            if (!validateStructInExpression(desc, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) {
                return false;
            }
        } else {
            for (ExprNodeDesc d : desc.getChildren()) {
                // Always use loose FILTER mode.
                if (!validateExprNodeDescRecursive(d, expressionTitle, VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */
                true)) {
                    return false;
                }
            }
        }
    }
    return true;
}
Also used : ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn)

Example 4 with VirtualColumn

use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.

the class HiveOpConverter method visit.

/**
 * TODO: 1. PPD needs to get pushed in to TS
 *
 * @param scanRel
 * @return
 */
OpAttr visit(HiveTableScan scanRel) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + scanRel.getId() + ":" + scanRel.getRelTypeName() + " with row type: [" + scanRel.getRowType() + "]");
    }
    RelOptHiveTable ht = (RelOptHiveTable) scanRel.getTable();
    // 1. Setup TableScan Desc
    // 1.1 Build col details used by scan
    ArrayList<ColumnInfo> colInfos = new ArrayList<ColumnInfo>();
    List<VirtualColumn> virtualCols = new ArrayList<VirtualColumn>();
    List<Integer> neededColumnIDs = new ArrayList<Integer>();
    List<String> neededColumnNames = new ArrayList<String>();
    Set<Integer> vcolsInCalcite = new HashSet<Integer>();
    List<String> partColNames = new ArrayList<String>();
    Map<Integer, VirtualColumn> VColsMap = HiveCalciteUtil.getVColsMap(ht.getVirtualCols(), ht.getNoOfNonVirtualCols());
    Map<Integer, ColumnInfo> posToPartColInfo = ht.getPartColInfoMap();
    Map<Integer, ColumnInfo> posToNonPartColInfo = ht.getNonPartColInfoMap();
    List<Integer> neededColIndxsFrmReloptHT = scanRel.getNeededColIndxsFrmReloptHT();
    List<String> scanColNames = scanRel.getRowType().getFieldNames();
    String tableAlias = scanRel.getConcatQbIDAlias();
    String colName;
    ColumnInfo colInfo;
    VirtualColumn vc;
    for (int index = 0; index < scanRel.getRowType().getFieldList().size(); index++) {
        colName = scanColNames.get(index);
        if (VColsMap.containsKey(index)) {
            vc = VColsMap.get(index);
            virtualCols.add(vc);
            colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden());
            vcolsInCalcite.add(index);
        } else if (posToPartColInfo.containsKey(index)) {
            partColNames.add(colName);
            colInfo = posToPartColInfo.get(index);
            vcolsInCalcite.add(index);
        } else {
            colInfo = posToNonPartColInfo.get(index);
        }
        colInfos.add(colInfo);
        if (neededColIndxsFrmReloptHT.contains(index)) {
            neededColumnIDs.add(index);
            neededColumnNames.add(colName);
        }
    }
    // 1.2 Create TableScanDesc
    TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD());
    // 1.3. Set Partition cols in TSDesc
    tsd.setPartColumns(partColNames);
    // 1.4. Set needed cols in TSDesc
    tsd.setNeededColumnIDs(neededColumnIDs);
    tsd.setNeededColumns(neededColumnNames);
    // 2. Setup TableScan
    TableScanOperator ts = (TableScanOperator) OperatorFactory.get(semanticAnalyzer.getOpContext(), tsd, new RowSchema(colInfos));
    // tablescan with same alias.
    if (topOps.get(tableAlias) != null) {
        tableAlias = tableAlias + this.uniqueCounter;
    }
    topOps.put(tableAlias, ts);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + ts + " with row schema: [" + ts.getSchema() + "]");
    }
    return new OpAttr(tableAlias, vcolsInCalcite, ts);
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) HashSet(java.util.HashSet)

Example 5 with VirtualColumn

use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.

the class RelOptHiveTable method copy.

public RelOptHiveTable copy(RelDataType newRowType) {
    // 1. Build map of column name to col index of original schema
    // Assumption: Hive Table can not contain duplicate column names
    Map<String, Integer> nameToColIndxMap = new HashMap<String, Integer>();
    for (RelDataTypeField f : this.rowType.getFieldList()) {
        nameToColIndxMap.put(f.getName(), f.getIndex());
    }
    // 2. Build nonPart/Part/Virtual column info for new RowSchema
    List<ColumnInfo> newHiveNonPartitionCols = new ArrayList<ColumnInfo>();
    List<ColumnInfo> newHivePartitionCols = new ArrayList<ColumnInfo>();
    List<VirtualColumn> newHiveVirtualCols = new ArrayList<VirtualColumn>();
    Map<Integer, VirtualColumn> virtualColInfoMap = HiveCalciteUtil.getVColsMap(this.hiveVirtualCols, this.noOfNonVirtualCols);
    Integer originalColIndx;
    ColumnInfo cInfo;
    VirtualColumn vc;
    for (RelDataTypeField f : newRowType.getFieldList()) {
        originalColIndx = nameToColIndxMap.get(f.getName());
        if ((cInfo = hiveNonPartitionColsMap.get(originalColIndx)) != null) {
            newHiveNonPartitionCols.add(new ColumnInfo(cInfo));
        } else if ((cInfo = hivePartitionColsMap.get(originalColIndx)) != null) {
            newHivePartitionCols.add(new ColumnInfo(cInfo));
        } else if ((vc = virtualColInfoMap.get(originalColIndx)) != null) {
            newHiveVirtualCols.add(vc);
        } else {
            throw new RuntimeException("Copy encountered a column not seen in original TS");
        }
    }
    // 3. Build new Table
    return new RelOptHiveTable(this.schema, this.name, newRowType, this.hiveTblMetadata, newHiveNonPartitionCols, newHivePartitionCols, newHiveVirtualCols, this.hiveConf, this.partitionCache, this.colStatsCache, this.noColsMissingStats);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn)

Aggregations

VirtualColumn (org.apache.hadoop.hive.ql.metadata.VirtualColumn)11 ArrayList (java.util.ArrayList)8 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)7 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)3 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)3 TableScanDesc (org.apache.hadoop.hive.ql.plan.TableScanDesc)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)2 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)2 WindowingException (com.sap.hadoop.windowing.WindowingException)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)1 Path (org.apache.hadoop.fs.Path)1 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)1 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)1 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)1