Search in sources :

Example 56 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class IndexWhereProcessor method process.

@Override
public /**
   * Process a node of the operator tree. This matches on the rule in IndexWhereTaskDispatcher
   */
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    TableScanOperator operator = (TableScanOperator) nd;
    List<Node> opChildren = operator.getChildren();
    TableScanDesc operatorDesc = operator.getConf();
    if (operatorDesc == null || !tsToIndices.containsKey(operator)) {
        return null;
    }
    List<Index> indexes = tsToIndices.get(operator);
    ExprNodeDesc predicate = operatorDesc.getFilterExpr();
    IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
    ParseContext pctx = context.getParseContext();
    LOG.info("Processing predicate for index optimization");
    if (predicate == null) {
        LOG.info("null predicate pushed down");
        return null;
    }
    LOG.info(predicate.getExprString());
    // check if we have tsToIndices on all partitions in this table scan
    Set<Partition> queryPartitions;
    try {
        queryPartitions = IndexUtils.checkPartitionsCoveredByIndex(operator, pctx, indexes);
        if (queryPartitions == null) {
            // partitions not covered
            return null;
        }
    } catch (HiveException e) {
        LOG.error("Fatal Error: problem accessing metastore", e);
        throw new SemanticException(e);
    }
    // we can only process MapReduce tasks to check input size
    if (!context.getCurrentTask().isMapRedTask()) {
        return null;
    }
    MapRedTask currentTask = (MapRedTask) context.getCurrentTask();
    // get potential reentrant index queries from each index
    Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
    // make sure we have an index on the table being scanned
    TableDesc tblDesc = operator.getTableDesc();
    Map<String, List<Index>> indexesByType = new HashMap<String, List<Index>>();
    for (Index indexOnTable : indexes) {
        if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) {
            List<Index> newType = new ArrayList<Index>();
            newType.add(indexOnTable);
            indexesByType.put(indexOnTable.getIndexHandlerClass(), newType);
        } else {
            indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable);
        }
    }
    // choose index type with most tsToIndices of the same type on the table
    // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
    List<Index> bestIndexes = indexesByType.values().iterator().next();
    for (List<Index> indexTypes : indexesByType.values()) {
        if (bestIndexes.size() < indexTypes.size()) {
            bestIndexes = indexTypes;
        }
    }
    // rewrite index queries for the chosen index type
    HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext();
    tmpQueryContext.setQueryPartitions(queryPartitions);
    rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext);
    List<Task<?>> indexTasks = tmpQueryContext.getQueryTasks();
    if (indexTasks != null && indexTasks.size() > 0) {
        queryContexts.put(bestIndexes.get(0), tmpQueryContext);
    }
    // choose an index rewrite to use
    if (queryContexts.size() > 0) {
        // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
        Index chosenIndex = queryContexts.keySet().iterator().next();
        // modify the parse context to use indexing
        // we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
        HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);
        // prepare the map reduce job to use indexing
        MapWork work = currentTask.getWork().getMapWork();
        work.setInputformat(queryContext.getIndexInputFormat());
        work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile());
        // modify inputs based on index query
        Set<ReadEntity> inputs = pctx.getSemanticInputs();
        inputs.addAll(queryContext.getAdditionalSemanticInputs());
        List<Task<?>> chosenRewrite = queryContext.getQueryTasks();
        // add dependencies so index query runs first
        insertIndexQuery(pctx, context, chosenRewrite);
    }
    return null;
}
Also used : HiveIndexQueryContext(org.apache.hadoop.hive.ql.index.HiveIndexQueryContext) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Task(org.apache.hadoop.hive.ql.exec.Task) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMap(java.util.HashMap) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Index(org.apache.hadoop.hive.metastore.api.Index) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Partition(org.apache.hadoop.hive.ql.metadata.Partition) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Example 57 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class PartitionPruner method getAllPartitions.

private static Set<Partition> getAllPartitions(Table tab) throws HiveException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
    Set<Partition> result = Hive.get().getAllPartitionsOf(tab);
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
    return result;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger)

Example 58 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class IndexMetadataChangeTask method execute.

@Override
protected int execute(DriverContext driverContext) {
    try {
        Hive db = Hive.get(conf);
        IndexMetadataChangeWork work = this.getWork();
        String tblName = work.getIndexTbl();
        Table tbl = db.getTable(work.getDbName(), tblName);
        if (tbl == null) {
            console.printError("Index table can not be null.");
            return 1;
        }
        if (!tbl.getTableType().equals(TableType.INDEX_TABLE)) {
            console.printError("Table " + tbl.getTableName() + " not specified.");
            return 1;
        }
        if (tbl.isPartitioned() && work.getPartSpec() == null) {
            console.printError("Index table is partitioned, but no partition specified.");
            return 1;
        }
        if (work.getPartSpec() != null) {
            Partition part = db.getPartition(tbl, work.getPartSpec(), false);
            if (part == null) {
                console.printError("Partition " + Warehouse.makePartName(work.getPartSpec(), false).toString() + " does not exist.");
                return 1;
            }
            Path path = part.getDataLocation();
            FileSystem fs = path.getFileSystem(conf);
            FileStatus fstat = fs.getFileStatus(path);
            part.getParameters().put(HiveIndex.INDEX_TABLE_CREATETIME, Long.toString(fstat.getModificationTime()));
            db.alterPartition(tbl.getTableName(), part, null);
        } else {
            Path url = new Path(tbl.getPath().toString());
            FileSystem fs = url.getFileSystem(conf);
            FileStatus fstat = fs.getFileStatus(url);
            tbl.getParameters().put(HiveIndex.INDEX_TABLE_CREATETIME, Long.toString(fstat.getModificationTime()));
            db.alterTable(tbl.getDbName() + "." + tbl.getTableName(), tbl, null);
        }
    } catch (Exception e) {
        e.printStackTrace();
        console.printError("Error changing index table/partition metadata " + e.getMessage());
        return 1;
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Hive(org.apache.hadoop.hive.ql.metadata.Hive) Table(org.apache.hadoop.hive.ql.metadata.Table) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 59 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class TableBasedIndexHandler method generateIndexBuildTaskList.

@Override
public List<Task<?>> generateIndexBuildTaskList(org.apache.hadoop.hive.ql.metadata.Table baseTbl, org.apache.hadoop.hive.metastore.api.Index index, List<Partition> indexTblPartitions, List<Partition> baseTblPartitions, org.apache.hadoop.hive.ql.metadata.Table indexTbl, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws HiveException {
    try {
        TableDesc desc = Utilities.getTableDesc(indexTbl);
        List<Partition> newBaseTblPartitions = new ArrayList<Partition>();
        List<Task<?>> indexBuilderTasks = new ArrayList<Task<?>>();
        if (!baseTbl.isPartitioned()) {
            // the table does not have any partition, then create index for the
            // whole table
            Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index, false, new PartitionDesc(desc, null), indexTbl.getTableName(), new PartitionDesc(Utilities.getTableDesc(baseTbl), null), baseTbl.getTableName(), indexTbl.getDbName());
            indexBuilderTasks.add(indexBuilder);
        } else {
            // table
            for (int i = 0; i < indexTblPartitions.size(); i++) {
                Partition indexPart = indexTblPartitions.get(i);
                Partition basePart = null;
                for (int j = 0; j < baseTblPartitions.size(); j++) {
                    if (baseTblPartitions.get(j).getName().equals(indexPart.getName())) {
                        basePart = baseTblPartitions.get(j);
                        newBaseTblPartitions.add(baseTblPartitions.get(j));
                        break;
                    }
                }
                if (basePart == null) {
                    throw new RuntimeException("Partitions of base table and index table are inconsistent.");
                }
                // for each partition, spawn a map reduce task.
                Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index, true, new PartitionDesc(indexPart), indexTbl.getTableName(), new PartitionDesc(basePart), baseTbl.getTableName(), indexTbl.getDbName());
                indexBuilderTasks.add(indexBuilder);
            }
        }
        return indexBuilderTasks;
    } catch (Exception e) {
        throw new SemanticException(e);
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) Task(org.apache.hadoop.hive.ql.exec.Task) ArrayList(java.util.ArrayList) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 60 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class CompactIndexHandler method getIndexPredicateAnalyzer.

/**
   * Instantiate a new predicate analyzer suitable for determining
   * whether we can use an index, based on rules for indexes in
   * WHERE clauses that we support
   *
   * @return preconfigured predicate analyzer for WHERE queries
   */
private IndexPredicateAnalyzer getIndexPredicateAnalyzer(Index index, Set<Partition> queryPartitions) {
    IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
    analyzer.addComparisonOp(GenericUDFOPEqual.class.getName());
    analyzer.addComparisonOp(GenericUDFOPLessThan.class.getName());
    analyzer.addComparisonOp(GenericUDFOPEqualOrLessThan.class.getName());
    analyzer.addComparisonOp(GenericUDFOPGreaterThan.class.getName());
    analyzer.addComparisonOp(GenericUDFOPEqualOrGreaterThan.class.getName());
    // only return results for columns in this index
    List<FieldSchema> columnSchemas = index.getSd().getCols();
    for (FieldSchema column : columnSchemas) {
        analyzer.allowColumnName(column.getName());
    }
    // partitioned columns are treated as if they have indexes so that the partitions
    // are used during the index query generation
    partitionCols = new HashSet<String>();
    for (Partition part : queryPartitions) {
        if (part.getSpec().isEmpty()) {
            // empty partitions are from whole tables, so we don't want to add them in
            continue;
        }
        for (String column : part.getSpec().keySet()) {
            analyzer.allowColumnName(column);
            partitionCols.add(column);
        }
    }
    return analyzer;
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) Partition(org.apache.hadoop.hive.ql.metadata.Partition) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) GenericUDFOPEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual) GenericUDFOPLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)

Aggregations

Partition (org.apache.hadoop.hive.ql.metadata.Partition)102 Table (org.apache.hadoop.hive.ql.metadata.Table)56 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)48 ArrayList (java.util.ArrayList)43 Path (org.apache.hadoop.fs.Path)25 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)25 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)24 IOException (java.io.IOException)18 HashMap (java.util.HashMap)18 LinkedHashMap (java.util.LinkedHashMap)18 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)18 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)18 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)17 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 FileNotFoundException (java.io.FileNotFoundException)12 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)11 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)11 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)11