Search in sources :

Example 11 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class BitmapIndexHandler method getIndexPredicateAnalyzer.

/**
   * Instantiate a new predicate analyzer suitable for determining
   * whether we can use an index, based on rules for indexes in
   * WHERE clauses that we support
   *
   * @return preconfigured predicate analyzer for WHERE queries
   */
private IndexPredicateAnalyzer getIndexPredicateAnalyzer(List<Index> indexes, Set<Partition> queryPartitions) {
    IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
    analyzer.addComparisonOp(GenericUDFOPEqual.class.getName());
    analyzer.addComparisonOp(GenericUDFOPLessThan.class.getName());
    analyzer.addComparisonOp(GenericUDFOPEqualOrLessThan.class.getName());
    analyzer.addComparisonOp(GenericUDFOPGreaterThan.class.getName());
    analyzer.addComparisonOp(GenericUDFOPEqualOrGreaterThan.class.getName());
    // only return results for columns in the list of indexes
    for (Index index : indexes) {
        List<FieldSchema> columnSchemas = index.getSd().getCols();
        for (FieldSchema column : columnSchemas) {
            analyzer.allowColumnName(column.getName());
        }
    }
    // are used during the index query generation
    for (Partition part : queryPartitions) {
        if (part.getSpec().isEmpty()) {
            // empty partitions are from whole tables, so we don't want to add them in
            continue;
        }
        for (String column : part.getSpec().keySet()) {
            analyzer.allowColumnName(column);
        }
    }
    return analyzer;
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) Partition(org.apache.hadoop.hive.ql.metadata.Partition) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) GenericUDFOPEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual) Index(org.apache.hadoop.hive.metastore.api.Index) GenericUDFOPLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)

Example 12 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class HiveLockObject method createFrom.

/**
   * Creates a locking object for a table (when partition spec is not provided)
   * or a table partition
   * @param hiveDB    an object to communicate with the metastore
   * @param tableName the table to create the locking object on
   * @param partSpec  the spec of a partition to create the locking object on
   * @return  the locking object
   * @throws HiveException
   */
public static HiveLockObject createFrom(Hive hiveDB, String tableName, Map<String, String> partSpec) throws HiveException {
    Table tbl = hiveDB.getTable(tableName);
    if (tbl == null) {
        throw new HiveException("Table " + tableName + " does not exist ");
    }
    HiveLockObject obj = null;
    if (partSpec == null) {
        obj = new HiveLockObject(tbl, null);
    } else {
        Partition par = hiveDB.getPartition(tbl, partSpec, false);
        if (par == null) {
            throw new HiveException("Partition " + partSpec + " for table " + tableName + " does not exist");
        }
        obj = new HiveLockObject(par, null);
    }
    return obj;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 13 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class HiveTxnManagerImpl method lockTable.

@Override
public int lockTable(Hive db, LockTableDesc lockTbl) throws HiveException {
    HiveLockManager lockMgr = getAndCheckLockManager();
    HiveLockMode mode = HiveLockMode.valueOf(lockTbl.getMode());
    String tabName = lockTbl.getTableName();
    Table tbl = db.getTable(tabName);
    if (tbl == null) {
        throw new HiveException("Table " + tabName + " does not exist ");
    }
    Map<String, String> partSpec = lockTbl.getPartSpec();
    HiveLockObjectData lockData = new HiveLockObjectData(lockTbl.getQueryId(), String.valueOf(System.currentTimeMillis()), "EXPLICIT", lockTbl.getQueryStr());
    if (partSpec == null) {
        HiveLock lck = lockMgr.lock(new HiveLockObject(tbl, lockData), mode, true);
        if (lck == null) {
            return 1;
        }
        return 0;
    }
    Partition par = db.getPartition(tbl, partSpec, false);
    if (par == null) {
        throw new HiveException("Partition " + partSpec + " for table " + tabName + " does not exist");
    }
    HiveLock lck = lockMgr.lock(new HiveLockObject(par, lockData), mode, true);
    if (lck == null) {
        return 1;
    }
    return 0;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveLockObjectData(org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData)

Example 14 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class GenMapRedUtils method setMapWork.

/**
   * initialize MapWork
   *
   * @param alias_id
   *          current alias
   * @param topOp
   *          the top operator of the stack
   * @param plan
   *          map work to initialize
   * @param local
   *          whether you need to add to map-reduce or local work
   * @param pList
   *          pruned partition list. If it is null it will be computed on-the-fly.
   * @param inputs
   *          read entities for the map work
   * @param conf
   *          current instance of hive conf
   */
public static void setMapWork(MapWork plan, ParseContext parseCtx, Set<ReadEntity> inputs, PrunedPartitionList partsList, TableScanOperator tsOp, String alias_id, HiveConf conf, boolean local) throws SemanticException {
    ArrayList<Path> partDir = new ArrayList<Path>();
    ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
    boolean isAcidTable = false;
    Path tblDir = null;
    plan.setNameToSplitSample(parseCtx.getNameToSplitSample());
    if (partsList == null) {
        try {
            partsList = PartitionPruner.prune(tsOp, parseCtx, alias_id);
            isAcidTable = tsOp.getConf().isAcidTable();
        } catch (SemanticException e) {
            throw e;
        }
    }
    // Generate the map work for this alias_id
    // pass both confirmed and unknown partitions through the map-reduce
    // framework
    Set<Partition> parts = partsList.getPartitions();
    PartitionDesc aliasPartnDesc = null;
    try {
        if (!parts.isEmpty()) {
            aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next());
        }
    } catch (HiveException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    }
    // The table does not have any partitions
    if (aliasPartnDesc == null) {
        aliasPartnDesc = new PartitionDesc(Utilities.getTableDesc(tsOp.getConf().getTableMetadata()), null);
    }
    Map<String, String> props = tsOp.getConf().getOpProps();
    if (props != null) {
        Properties target = aliasPartnDesc.getProperties();
        target.putAll(props);
    }
    plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc);
    long sizeNeeded = Integer.MAX_VALUE;
    int fileLimit = -1;
    if (parseCtx.getGlobalLimitCtx().isEnable()) {
        if (isAcidTable) {
            LOG.info("Skip Global Limit optimization for ACID table");
            parseCtx.getGlobalLimitCtx().disableOpt();
        } else {
            long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
            sizeNeeded = (parseCtx.getGlobalLimitCtx().getGlobalOffset() + parseCtx.getGlobalLimitCtx().getGlobalLimit()) * sizePerRow;
            // for the optimization that reduce number of input file, we limit number
            // of files allowed. If more than specific number of files have to be
            // selected, we skip this optimization. Since having too many files as
            // inputs can cause unpredictable latency. It's not necessarily to be
            // cheaper.
            fileLimit = HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE);
            if (sizePerRow <= 0 || fileLimit <= 0) {
                LOG.info("Skip optimization to reduce input size of 'limit'");
                parseCtx.getGlobalLimitCtx().disableOpt();
            } else if (parts.isEmpty()) {
                LOG.info("Empty input: skip limit optimization");
            } else {
                LOG.info("Try to reduce input size for 'limit' " + "sizeNeeded: " + sizeNeeded + "  file limit : " + fileLimit);
            }
        }
    }
    boolean isFirstPart = true;
    boolean emptyInput = true;
    boolean singlePartition = (parts.size() == 1);
    // Track the dependencies for the view. Consider a query like: select * from V;
    // where V is a view of the form: select * from T
    // The dependencies should include V at depth 0, and T at depth 1 (inferred).
    Map<String, ReadEntity> viewToInput = parseCtx.getViewAliasToInput();
    ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(alias_id, viewToInput);
    // The table should also be considered a part of inputs, even if the table is a
    // partitioned table and whether any partition is selected or not
    //This read entity is a direct read entity and not an indirect read (that is when
    // this is being read because it is a dependency of a view).
    boolean isDirectRead = (parentViewInfo == null);
    TableDesc tblDesc = null;
    boolean initTableDesc = false;
    PlanUtils.addPartitionInputs(parts, inputs, parentViewInfo, isDirectRead);
    for (Partition part : parts) {
        // Later the properties have to come from the partition as opposed
        // to from the table in order to support versioning.
        Path[] paths = null;
        SampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(tsOp);
        // Lookup list bucketing pruner
        Map<String, ExprNodeDesc> partToPruner = parseCtx.getOpToPartToSkewedPruner().get(tsOp);
        ExprNodeDesc listBucketingPruner = (partToPruner != null) ? partToPruner.get(part.getName()) : null;
        if (sampleDescr != null) {
            assert (listBucketingPruner == null) : "Sampling and list bucketing can't coexit.";
            paths = SamplePruner.prune(part, sampleDescr);
            parseCtx.getGlobalLimitCtx().disableOpt();
        } else if (listBucketingPruner != null) {
            assert (sampleDescr == null) : "Sampling and list bucketing can't coexist.";
            /* Use list bucketing prunner's path. */
            paths = ListBucketingPruner.prune(parseCtx, part, listBucketingPruner);
        } else {
            // contain enough size, we change to normal mode.
            if (parseCtx.getGlobalLimitCtx().isEnable()) {
                if (isFirstPart) {
                    long sizeLeft = sizeNeeded;
                    ArrayList<Path> retPathList = new ArrayList<Path>();
                    SamplePruner.LimitPruneRetStatus status = SamplePruner.limitPrune(part, sizeLeft, fileLimit, retPathList);
                    if (status.equals(SamplePruner.LimitPruneRetStatus.NoFile)) {
                        continue;
                    } else if (status.equals(SamplePruner.LimitPruneRetStatus.NotQualify)) {
                        LOG.info("Use full input -- first " + fileLimit + " files are more than " + sizeNeeded + " bytes");
                        parseCtx.getGlobalLimitCtx().disableOpt();
                    } else {
                        emptyInput = false;
                        paths = new Path[retPathList.size()];
                        int index = 0;
                        for (Path path : retPathList) {
                            paths[index++] = path;
                        }
                        if (status.equals(SamplePruner.LimitPruneRetStatus.NeedAllFiles) && singlePartition) {
                            // if all files are needed to meet the size limit, we disable
                            // optimization. It usually happens for empty table/partition or
                            // table/partition with only one file. By disabling this
                            // optimization, we can avoid retrying the query if there is
                            // not sufficient rows.
                            parseCtx.getGlobalLimitCtx().disableOpt();
                        }
                    }
                    isFirstPart = false;
                } else {
                    paths = new Path[0];
                }
            }
            if (!parseCtx.getGlobalLimitCtx().isEnable()) {
                paths = part.getPath();
            }
        }
        // is it a partitioned table ?
        if (!part.getTable().isPartitioned()) {
            assert (tblDir == null);
            tblDir = paths[0];
            if (!initTableDesc) {
                tblDesc = Utilities.getTableDesc(part.getTable());
                initTableDesc = true;
            }
        } else if (tblDesc == null) {
            if (!initTableDesc) {
                tblDesc = Utilities.getTableDesc(part.getTable());
                initTableDesc = true;
            }
        }
        if (props != null) {
            Properties target = tblDesc.getProperties();
            target.putAll(props);
        }
        for (Path p : paths) {
            if (p == null) {
                continue;
            }
            String path = p.toString();
            if (LOG.isDebugEnabled()) {
                LOG.debug("Adding " + path + " of table" + alias_id);
            }
            partDir.add(p);
            try {
                if (part.getTable().isPartitioned()) {
                    partDesc.add(Utilities.getPartitionDesc(part));
                } else {
                    partDesc.add(Utilities.getPartitionDescFromTableDesc(tblDesc, part, false));
                }
            } catch (HiveException e) {
                LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
                throw new SemanticException(e.getMessage(), e);
            }
        }
    }
    if (emptyInput) {
        parseCtx.getGlobalLimitCtx().disableOpt();
    }
    Utilities.addSchemaEvolutionToTableScanOperator(partsList.getSourceTable(), tsOp);
    Iterator<Path> iterPath = partDir.iterator();
    Iterator<PartitionDesc> iterPartnDesc = partDesc.iterator();
    if (!local) {
        while (iterPath.hasNext()) {
            assert iterPartnDesc.hasNext();
            Path path = iterPath.next();
            PartitionDesc prtDesc = iterPartnDesc.next();
            // Add the path to alias mapping
            plan.addPathToAlias(path, alias_id);
            plan.addPathToPartitionInfo(path, prtDesc);
            if (LOG.isDebugEnabled()) {
                LOG.debug("Information added for path " + path);
            }
        }
        assert plan.getAliasToWork().get(alias_id) == null;
        plan.getAliasToWork().put(alias_id, tsOp);
    } else {
        // populate local work if needed
        MapredLocalWork localPlan = plan.getMapRedLocalWork();
        if (localPlan == null) {
            localPlan = new MapredLocalWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(), new LinkedHashMap<String, FetchWork>());
        }
        assert localPlan.getAliasToWork().get(alias_id) == null;
        assert localPlan.getAliasToFetchWork().get(alias_id) == null;
        localPlan.getAliasToWork().put(alias_id, tsOp);
        if (tblDir == null) {
            tblDesc = Utilities.getTableDesc(partsList.getSourceTable());
            localPlan.getAliasToFetchWork().put(alias_id, new FetchWork(partDir, partDesc, tblDesc));
        } else {
            localPlan.getAliasToFetchWork().put(alias_id, new FetchWork(tblDir, tblDesc));
        }
        plan.setMapRedLocalWork(localPlan);
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) SampleDesc(org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Example 15 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class GenMapRedUtils method getInputPathsForPartialScan.

public static List<Path> getInputPathsForPartialScan(TableScanOperator tableScanOp, Appendable aggregationKey) throws SemanticException {
    List<Path> inputPaths = new ArrayList<Path>();
    switch(tableScanOp.getConf().getTableMetadata().getTableSpec().specType) {
        case TABLE_ONLY:
            inputPaths.add(tableScanOp.getConf().getTableMetadata().getTableSpec().tableHandle.getPath());
            break;
        case STATIC_PARTITION:
            Partition part = tableScanOp.getConf().getTableMetadata().getTableSpec().partHandle;
            try {
                aggregationKey.append(Warehouse.makePartPath(part.getSpec()));
            } catch (MetaException e) {
                throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_AGGKEY.getMsg(part.getDataLocation().toString() + e.getMessage()));
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            inputPaths.add(part.getDataLocation());
            break;
        default:
            assert false;
    }
    return inputPaths;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

Partition (org.apache.hadoop.hive.ql.metadata.Partition)83 Table (org.apache.hadoop.hive.ql.metadata.Table)48 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)42 ArrayList (java.util.ArrayList)35 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)23 Path (org.apache.hadoop.fs.Path)21 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)21 HashMap (java.util.HashMap)17 LinkedHashMap (java.util.LinkedHashMap)17 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)16 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)16 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)15 IOException (java.io.IOException)13 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)13 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)12 FileNotFoundException (java.io.FileNotFoundException)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)10 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)10 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)10