Examples with PrunedPartitionList - org.apache.hadoop.hive.ql.parse.PrunedPartitionList

Example 11 with PrunedPartitionList

use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.

the class GenMRTableScan1 method process.

/**
   * Table Sink encountered.
   * @param nd
   *          the table sink operator encountered
   * @param opProcCtx
   *          context
   */
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
    TableScanOperator op = (TableScanOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    Class<? extends InputFormat> inputFormat = op.getConf().getTableMetadata().getInputFormatClass();
    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
    // create a dummy MapReduce task
    MapredWork currWork = GenMapRedUtils.getMapRedWork(parseCtx);
    MapRedTask currTask = (MapRedTask) TaskFactory.get(currWork, parseCtx.getConf());
    ctx.setCurrTask(currTask);
    ctx.setCurrTopOp(op);
    for (String alias : parseCtx.getTopOps().keySet()) {
        Operator<? extends OperatorDesc> currOp = parseCtx.getTopOps().get(alias);
        if (currOp == op) {
            String currAliasId = alias;
            ctx.setCurrAliasId(currAliasId);
            mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId));
            if (parseCtx.getQueryProperties().isAnalyzeCommand()) {
                boolean partialScan = parseCtx.getQueryProperties().isPartialScanAnalyzeCommand();
                boolean noScan = parseCtx.getQueryProperties().isNoScanAnalyzeCommand();
                if (OrcInputFormat.class.isAssignableFrom(inputFormat) || MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) {
                    // For ORC and Parquet, all the following statements are the same
                    // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
                    // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
                    // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
                    // There will not be any MR or Tez job above this task
                    StatsNoJobWork snjWork = new StatsNoJobWork(op.getConf().getTableMetadata().getTableSpec());
                    snjWork.setStatsReliable(parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
                    // If partition is specified, get pruned partition list
                    Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(op);
                    if (confirmedParts.size() > 0) {
                        Table source = op.getConf().getTableMetadata();
                        List<String> partCols = GenMapRedUtils.getPartitionColumns(op);
                        PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, partCols, false);
                        snjWork.setPrunedPartitionList(partList);
                    }
                    Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseCtx.getConf());
                    ctx.setCurrTask(snjTask);
                    ctx.setCurrTopOp(null);
                    ctx.getRootTasks().clear();
                    ctx.getRootTasks().add(snjTask);
                } else {
                    // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
                    // The plan consists of a simple MapRedTask followed by a StatsTask.
                    // The MR task is just a simple TableScanOperator
                    StatsWork statsWork = new StatsWork(op.getConf().getTableMetadata().getTableSpec());
                    statsWork.setAggKey(op.getConf().getStatsAggPrefix());
                    statsWork.setStatsTmpDir(op.getConf().getTmpStatsDir());
                    statsWork.setSourceTask(currTask);
                    statsWork.setStatsReliable(parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
                    Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
                    currTask.addDependentTask(statsTask);
                    if (!ctx.getRootTasks().contains(currTask)) {
                        ctx.getRootTasks().add(currTask);
                    }
                    // The plan consists of a StatsTask only.
                    if (noScan) {
                        statsTask.setParentTasks(null);
                        statsWork.setNoScanAnalyzeCommand(true);
                        ctx.getRootTasks().remove(currTask);
                        ctx.getRootTasks().add(statsTask);
                    }
                    // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
                    if (partialScan) {
                        handlePartialScanCommand(op, ctx, parseCtx, currTask, statsWork, statsTask);
                    }
                    currWork.getMapWork().setGatheringStats(true);
                    if (currWork.getReduceWork() != null) {
                        currWork.getReduceWork().setGatheringStats(true);
                    }
                    // NOTE: here we should use the new partition predicate pushdown API to get a list of
                    // pruned list,
                    // and pass it to setTaskPlan as the last parameter
                    Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(op);
                    if (confirmedPartns.size() > 0) {
                        Table source = op.getConf().getTableMetadata();
                        List<String> partCols = GenMapRedUtils.getPartitionColumns(op);
                        PrunedPartitionList partList = new PrunedPartitionList(source, confirmedPartns, partCols, false);
                        GenMapRedUtils.setTaskPlan(currAliasId, op, currTask, false, ctx, partList);
                    } else {
                        // non-partitioned table
                        GenMapRedUtils.setTaskPlan(currAliasId, op, currTask, false, ctx);
                    }
                }
            }
            return true;
        }
    }
    assert false;
    return null;
}

Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Partition(org.apache.hadoop.hive.ql.metadata.Partition) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) MapredParquetInputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) StatsWork(org.apache.hadoop.hive.ql.plan.StatsWork) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) GenMapRedCtx(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx) StatsNoJobWork(org.apache.hadoop.hive.ql.plan.StatsNoJobWork) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 12 with PrunedPartitionList

use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.

the class SimpleFetchOptimizer method checkTree.

// all we can handle is LimitOperator, FilterOperator SelectOperator and final FS
//
// for non-aggressive mode (minimal)
// 1. sampling is not allowed
// 2. for partitioned table, all filters should be targeted to partition column
// 3. SelectOperator should use only simple cast/column access
private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, TableScanOperator ts) throws HiveException {
    SplitSample splitSample = pctx.getNameToSplitSample().get(alias);
    if (!aggressive && splitSample != null) {
        return null;
    }
    if (!aggressive && ts.getConf().getTableSample() != null) {
        return null;
    }
    Table table = ts.getConf().getTableMetadata();
    if (table == null) {
        return null;
    }
    ReadEntity parent = PlanUtils.getParentViewInfo(alias, pctx.getViewAliasToInput());
    if (!table.isPartitioned()) {
        FetchData fetch = new FetchData(ts, parent, table, splitSample);
        return checkOperators(fetch, aggressive, false);
    }
    boolean bypassFilter = false;
    if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) {
        ExprNodeDesc pruner = pctx.getOpToPartPruner().get(ts);
        if (PartitionPruner.onlyContainsPartnCols(table, pruner)) {
            bypassFilter = !pctx.getPrunedPartitions(alias, ts).hasUnknownPartitions();
        }
    }
    if (!aggressive && !bypassFilter) {
        return null;
    }
    PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts);
    FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, bypassFilter);
    return checkOperators(fetch, aggressive, bypassFilter);
}

Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Table(org.apache.hadoop.hive.ql.metadata.Table) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) SplitSample(org.apache.hadoop.hive.ql.parse.SplitSample) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 13 with PrunedPartitionList

use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.

the class PartitionPruner method getPartitionsFromServer.

private static PrunedPartitionList getPartitionsFromServer(Table tab, final ExprNodeGenericFuncDesc compactExpr, HiveConf conf, String alias, Set<String> partColsUsedInFilter, boolean isPruningByExactFilter) throws SemanticException {
    try {
        // Finally, check the filter for non-built-in UDFs. If these are present, we cannot
        // do filtering on the server, and have to fall back to client path.
        boolean doEvalClientSide = hasUserFunctions(compactExpr);
        // Now filter.
        List<Partition> partitions = new ArrayList<Partition>();
        boolean hasUnknownPartitions = false;
        PerfLogger perfLogger = SessionState.getPerfLogger();
        if (!doEvalClientSide) {
            perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
            try {
                hasUnknownPartitions = Hive.get().getPartitionsByExpr(tab, compactExpr, conf, partitions);
            } catch (IMetaStoreClient.IncompatibleMetastoreException ime) {
                // TODO: backward compat for Hive <= 0.12. Can be removed later.
                LOG.warn("Metastore doesn't support getPartitionsByExpr", ime);
                doEvalClientSide = true;
            } finally {
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
            }
        }
        if (doEvalClientSide) {
            // Either we have user functions, or metastore is old version - filter names locally.
            hasUnknownPartitions = pruneBySequentialScan(tab, partitions, compactExpr, conf);
        }
        // metastore and so some partitions may have no data based on other filters.
        return new PrunedPartitionList(tab, new LinkedHashSet<Partition>(partitions), new ArrayList<String>(partColsUsedInFilter), hasUnknownPartitions || !isPruningByExactFilter);
    } catch (SemanticException e) {
        throw e;
    } catch (Exception e) {
        throw new SemanticException(e);
    }
}

Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) ArrayList(java.util.ArrayList) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 14 with PrunedPartitionList

use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.

the class HiveMaterializedViewsRegistry method createTableScan.

private static RelNode createTableScan(Table viewTable) {
    // 0. Recreate cluster
    final RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(null);
    final RexBuilder rexBuilder = new RexBuilder(new JavaTypeFactoryImpl());
    final RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder);
    // 1. Create column schema
    final RowResolver rr = new RowResolver();
    // 1.1 Add Column info for non partion cols (Object Inspector fields)
    StructObjectInspector rowObjectInspector;
    try {
        rowObjectInspector = (StructObjectInspector) viewTable.getDeserializer().getObjectInspector();
    } catch (SerDeException e) {
        // Bail out
        return null;
    }
    List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
    ColumnInfo colInfo;
    String colName;
    ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>();
    for (int i = 0; i < fields.size(); i++) {
        colName = fields.get(i).getFieldName();
        colInfo = new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), null, false);
        rr.put(null, colName, colInfo);
        cInfoLst.add(colInfo);
    }
    ArrayList<ColumnInfo> nonPartitionColumns = new ArrayList<ColumnInfo>(cInfoLst);
    // 1.2 Add column info corresponding to partition columns
    ArrayList<ColumnInfo> partitionColumns = new ArrayList<ColumnInfo>();
    for (FieldSchema part_col : viewTable.getPartCols()) {
        colName = part_col.getName();
        colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), null, true);
        rr.put(null, colName, colInfo);
        cInfoLst.add(colInfo);
        partitionColumns.add(colInfo);
    }
    // 1.3 Build row type from field <type, name>
    RelDataType rowType;
    try {
        rowType = TypeConverter.getType(cluster, rr, null);
    } catch (CalciteSemanticException e) {
        // Bail out
        return null;
    }
    // 2. Build RelOptAbstractTable
    String fullyQualifiedTabName = viewTable.getDbName();
    if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) {
        fullyQualifiedTabName = fullyQualifiedTabName + "." + viewTable.getTableName();
    } else {
        fullyQualifiedTabName = viewTable.getTableName();
    }
    RelOptHiveTable optTable = new RelOptHiveTable(null, fullyQualifiedTabName, rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<VirtualColumn>(), SessionState.get().getConf(), new HashMap<String, PrunedPartitionList>(), new AtomicInteger());
    RelNode tableRel;
    // 3. Build operator
    if (obtainTableType(viewTable) == TableType.DRUID) {
        // Build Druid query
        String address = HiveConf.getVar(SessionState.get().getConf(), HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
        String dataSource = viewTable.getParameters().get(Constants.DRUID_DATA_SOURCE);
        Set<String> metrics = new HashSet<>();
        List<RelDataType> druidColTypes = new ArrayList<>();
        List<String> druidColNames = new ArrayList<>();
        for (RelDataTypeField field : rowType.getFieldList()) {
            druidColTypes.add(field.getType());
            druidColNames.add(field.getName());
            if (field.getName().equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
                // timestamp
                continue;
            }
            if (field.getType().getSqlTypeName() == SqlTypeName.VARCHAR) {
                // dimension
                continue;
            }
            metrics.add(field.getName());
        }
        List<Interval> intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL);
        DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals);
        final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, viewTable.getTableName(), null, false, false);
        tableRel = DruidQuery.create(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, druidTable, ImmutableList.<RelNode>of(scan));
    } else {
        // Build Hive Table Scan Rel
        tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, viewTable.getTableName(), null, false, false);
    }
    return tableRel;
}

Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) DruidTable(org.apache.calcite.adapter.druid.DruidTable) RelDataType(org.apache.calcite.rel.type.RelDataType) RowResolver(org.apache.hadoop.hive.ql.parse.RowResolver) RelOptPlanner(org.apache.calcite.plan.RelOptPlanner) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) JavaTypeFactoryImpl(org.apache.calcite.jdbc.JavaTypeFactoryImpl) RexBuilder(org.apache.calcite.rex.RexBuilder) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HashSet(java.util.HashSet) HiveTableScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan) TableScan(org.apache.calcite.rel.core.TableScan) DruidSchema(org.apache.calcite.adapter.druid.DruidSchema) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) RelNode(org.apache.calcite.rel.RelNode) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HiveTableScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Interval(org.joda.time.Interval)

Example 15 with PrunedPartitionList

use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.

the class AbstractBucketJoinProc method checkConvertBucketMapJoin.

/*
   * Can this mapjoin be converted to a bucketed mapjoin ?
   * The following checks are performed:
   * a. The join columns contains all the bucket columns.
   * b. The join keys are not transformed in the sub-query.
   * c. All partitions contain the expected number of files (number of buckets).
   * d. The number of buckets in the big table can be divided by no of buckets in small tables.
   */
protected boolean checkConvertBucketMapJoin(BucketJoinProcCtx context, Map<String, Operator<? extends OperatorDesc>> aliasToOpInfo, Map<Byte, List<ExprNodeDesc>> keysMap, String baseBigAlias, List<String> joinAliases) throws SemanticException {
    LinkedHashMap<String, List<Integer>> tblAliasToNumberOfBucketsInEachPartition = new LinkedHashMap<String, List<Integer>>();
    LinkedHashMap<String, List<List<String>>> tblAliasToBucketedFilePathsInEachPartition = new LinkedHashMap<String, List<List<String>>>();
    HashMap<String, TableScanOperator> topOps = pGraphContext.getTopOps();
    HashMap<String, String> aliasToNewAliasMap = new HashMap<String, String>();
    // (partition to bucket file names) and (partition to bucket number) for
    // the big table;
    LinkedHashMap<Partition, List<String>> bigTblPartsToBucketFileNames = new LinkedHashMap<Partition, List<String>>();
    LinkedHashMap<Partition, Integer> bigTblPartsToBucketNumber = new LinkedHashMap<Partition, Integer>();
    // accessing order of join cols to bucket cols, should be same
    Integer[] joinKeyOrder = null;
    boolean bigTablePartitioned = true;
    for (int index = 0; index < joinAliases.size(); index++) {
        String alias = joinAliases.get(index);
        Operator<? extends OperatorDesc> topOp = aliasToOpInfo.get(alias);
        // The alias may not be present in case of a sub-query
        if (topOp == null) {
            return false;
        }
        List<String> keys = toColumns(keysMap.get((byte) index));
        if (keys == null || keys.isEmpty()) {
            return false;
        }
        int oldKeySize = keys.size();
        TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(topOp, keys);
        if (tso == null) {
            // between topOp and root TableScan operator. We don't handle that case, and simply return
            return false;
        }
        // For nested sub-queries, the alias mapping is not maintained in QB currently.
        if (topOps.containsValue(tso)) {
            for (Map.Entry<String, TableScanOperator> topOpEntry : topOps.entrySet()) {
                if (topOpEntry.getValue() == tso) {
                    String newAlias = topOpEntry.getKey();
                    if (!newAlias.equals(alias)) {
                        joinAliases.set(index, newAlias);
                        if (baseBigAlias.equals(alias)) {
                            baseBigAlias = newAlias;
                        }
                        aliasToNewAliasMap.put(alias, newAlias);
                        alias = newAlias;
                    }
                    break;
                }
            }
        } else {
            // Ideally, this should never happen, and this should be an assert.
            return false;
        }
        // be removed, and the size before and after the genRootTableScan will be different.
        if (keys.size() != oldKeySize) {
            return false;
        }
        if (joinKeyOrder == null) {
            joinKeyOrder = new Integer[keys.size()];
        }
        Table tbl = tso.getConf().getTableMetadata();
        if (tbl.isPartitioned()) {
            PrunedPartitionList prunedParts = pGraphContext.getPrunedPartitions(alias, tso);
            List<Partition> partitions = prunedParts.getNotDeniedPartns();
            // construct a mapping of (Partition->bucket file names) and (Partition -> bucket number)
            if (partitions.isEmpty()) {
                if (!alias.equals(baseBigAlias)) {
                    tblAliasToNumberOfBucketsInEachPartition.put(alias, Arrays.<Integer>asList());
                    tblAliasToBucketedFilePathsInEachPartition.put(alias, new ArrayList<List<String>>());
                }
            } else {
                List<Integer> buckets = new ArrayList<Integer>();
                List<List<String>> files = new ArrayList<List<String>>();
                for (Partition p : partitions) {
                    if (!checkBucketColumns(p.getBucketCols(), keys, joinKeyOrder)) {
                        return false;
                    }
                    List<String> fileNames = getBucketFilePathsOfPartition(p.getDataLocation(), pGraphContext);
                    // The number of files for the table should be same as number of buckets.
                    int bucketCount = p.getBucketCount();
                    if (fileNames.size() != 0 && fileNames.size() != bucketCount) {
                        String msg = "The number of buckets for table " + tbl.getTableName() + " partition " + p.getName() + " is " + p.getBucketCount() + ", whereas the number of files is " + fileNames.size();
                        throw new SemanticException(ErrorMsg.BUCKETED_TABLE_METADATA_INCORRECT.getMsg(msg));
                    }
                    if (alias.equals(baseBigAlias)) {
                        bigTblPartsToBucketFileNames.put(p, fileNames);
                        bigTblPartsToBucketNumber.put(p, bucketCount);
                    } else {
                        files.add(fileNames);
                        buckets.add(bucketCount);
                    }
                }
                if (!alias.equals(baseBigAlias)) {
                    tblAliasToNumberOfBucketsInEachPartition.put(alias, buckets);
                    tblAliasToBucketedFilePathsInEachPartition.put(alias, files);
                }
            }
        } else {
            if (!checkBucketColumns(tbl.getBucketCols(), keys, joinKeyOrder)) {
                return false;
            }
            List<String> fileNames = getBucketFilePathsOfPartition(tbl.getDataLocation(), pGraphContext);
            Integer num = new Integer(tbl.getNumBuckets());
            // The number of files for the table should be same as number of buckets.
            if (fileNames.size() != 0 && fileNames.size() != num) {
                String msg = "The number of buckets for table " + tbl.getTableName() + " is " + tbl.getNumBuckets() + ", whereas the number of files is " + fileNames.size();
                throw new SemanticException(ErrorMsg.BUCKETED_TABLE_METADATA_INCORRECT.getMsg(msg));
            }
            if (alias.equals(baseBigAlias)) {
                bigTblPartsToBucketFileNames.put(null, fileNames);
                bigTblPartsToBucketNumber.put(null, tbl.getNumBuckets());
                bigTablePartitioned = false;
            } else {
                tblAliasToNumberOfBucketsInEachPartition.put(alias, Arrays.asList(num));
                tblAliasToBucketedFilePathsInEachPartition.put(alias, Arrays.asList(fileNames));
            }
        }
    }
    // the big table can be divided by no of buckets in small tables.
    for (Integer numBucketsInPartitionOfBigTable : bigTblPartsToBucketNumber.values()) {
        if (!checkNumberOfBucketsAgainstBigTable(tblAliasToNumberOfBucketsInEachPartition, numBucketsInPartitionOfBigTable)) {
            return false;
        }
    }
    context.setTblAliasToNumberOfBucketsInEachPartition(tblAliasToNumberOfBucketsInEachPartition);
    context.setTblAliasToBucketedFilePathsInEachPartition(tblAliasToBucketedFilePathsInEachPartition);
    context.setBigTblPartsToBucketFileNames(bigTblPartsToBucketFileNames);
    context.setBigTblPartsToBucketNumber(bigTblPartsToBucketNumber);
    context.setJoinAliases(joinAliases);
    context.setBaseBigAlias(baseBigAlias);
    context.setBigTablePartitioned(bigTablePartitioned);
    if (!aliasToNewAliasMap.isEmpty()) {
        context.setAliasToNewAliasMap(aliasToNewAliasMap);
    }
    return true;
}

Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) List(java.util.List) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Aggregations

PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)18 Partition (org.apache.hadoop.hive.ql.metadata.Partition)14 Table (org.apache.hadoop.hive.ql.metadata.Table)10 ArrayList (java.util.ArrayList)9 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)8 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)6 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)5 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 HashMap (java.util.HashMap)3 Map (java.util.Map)3 LinkedHashMap (java.util.LinkedHashMap)2 List (java.util.List)2 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2 OrcInputFormat (org.apache.hadoop.hive.ql.io.orc.OrcInputFormat)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 DruidSchema (org.apache.calcite.adapter.druid.DruidSchema)1