Examples with ReadEntity - org.apache.hadoop.hive.ql.hooks.ReadEntity

Example 76 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class TestDbTxnManager method addTableInput.

private void addTableInput() {
    ReadEntity re = new ReadEntity(newTable(false));
    readEntities.add(re);
}

Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity)

Example 77 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class IndexWhereProcessor method process.

@Override
public /**
   * Process a node of the operator tree. This matches on the rule in IndexWhereTaskDispatcher
   */
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    TableScanOperator operator = (TableScanOperator) nd;
    List<Node> opChildren = operator.getChildren();
    TableScanDesc operatorDesc = operator.getConf();
    if (operatorDesc == null || !tsToIndices.containsKey(operator)) {
        return null;
    }
    List<Index> indexes = tsToIndices.get(operator);
    ExprNodeDesc predicate = operatorDesc.getFilterExpr();
    IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
    ParseContext pctx = context.getParseContext();
    LOG.info("Processing predicate for index optimization");
    if (predicate == null) {
        LOG.info("null predicate pushed down");
        return null;
    }
    LOG.info(predicate.getExprString());
    // check if we have tsToIndices on all partitions in this table scan
    Set<Partition> queryPartitions;
    try {
        queryPartitions = IndexUtils.checkPartitionsCoveredByIndex(operator, pctx, indexes);
        if (queryPartitions == null) {
            // partitions not covered
            return null;
        }
    } catch (HiveException e) {
        LOG.error("Fatal Error: problem accessing metastore", e);
        throw new SemanticException(e);
    }
    // we can only process MapReduce tasks to check input size
    if (!context.getCurrentTask().isMapRedTask()) {
        return null;
    }
    MapRedTask currentTask = (MapRedTask) context.getCurrentTask();
    // get potential reentrant index queries from each index
    Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
    // make sure we have an index on the table being scanned
    TableDesc tblDesc = operator.getTableDesc();
    Map<String, List<Index>> indexesByType = new HashMap<String, List<Index>>();
    for (Index indexOnTable : indexes) {
        if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) {
            List<Index> newType = new ArrayList<Index>();
            newType.add(indexOnTable);
            indexesByType.put(indexOnTable.getIndexHandlerClass(), newType);
        } else {
            indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable);
        }
    }
    // choose index type with most tsToIndices of the same type on the table
    // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
    List<Index> bestIndexes = indexesByType.values().iterator().next();
    for (List<Index> indexTypes : indexesByType.values()) {
        if (bestIndexes.size() < indexTypes.size()) {
            bestIndexes = indexTypes;
        }
    }
    // rewrite index queries for the chosen index type
    HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext();
    tmpQueryContext.setQueryPartitions(queryPartitions);
    rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext);
    List<Task<?>> indexTasks = tmpQueryContext.getQueryTasks();
    if (indexTasks != null && indexTasks.size() > 0) {
        queryContexts.put(bestIndexes.get(0), tmpQueryContext);
    }
    // choose an index rewrite to use
    if (queryContexts.size() > 0) {
        // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
        Index chosenIndex = queryContexts.keySet().iterator().next();
        // modify the parse context to use indexing
        // we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
        HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);
        // prepare the map reduce job to use indexing
        MapWork work = currentTask.getWork().getMapWork();
        work.setInputformat(queryContext.getIndexInputFormat());
        work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile());
        // modify inputs based on index query
        Set<ReadEntity> inputs = pctx.getSemanticInputs();
        inputs.addAll(queryContext.getAdditionalSemanticInputs());
        List<Task<?>> chosenRewrite = queryContext.getQueryTasks();
        // add dependencies so index query runs first
        insertIndexQuery(pctx, context, chosenRewrite);
    }
    return null;
}

Also used : HiveIndexQueryContext(org.apache.hadoop.hive.ql.index.HiveIndexQueryContext) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Task(org.apache.hadoop.hive.ql.exec.Task) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMap(java.util.HashMap) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Index(org.apache.hadoop.hive.metastore.api.Index) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Partition(org.apache.hadoop.hive.ql.metadata.Partition) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Example 78 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class DDLSemanticAnalyzer method analyzeDropIndex.

private void analyzeDropIndex(ASTNode ast) throws SemanticException {
    String indexName = unescapeIdentifier(ast.getChild(0).getText());
    String tableName = getUnescapedName((ASTNode) ast.getChild(1));
    boolean ifExists = (ast.getFirstChildWithType(HiveParser.TOK_IFEXISTS) != null);
    // we want to signal an error if the index doesn't exist and we're
    // configured not to ignore this
    boolean throwException = !ifExists && !HiveConf.getBoolVar(conf, ConfVars.DROPIGNORESNONEXISTENT);
    Table tbl = getTable(tableName, false);
    if (throwException && tbl == null) {
        throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
    }
    try {
        Index idx = db.getIndex(tableName, indexName);
    } catch (HiveException e) {
        if (!(e.getCause() instanceof NoSuchObjectException)) {
            throw new SemanticException(ErrorMsg.CANNOT_DROP_INDEX.getMsg("dropping index"), e);
        }
        if (throwException) {
            throw new SemanticException(ErrorMsg.INVALID_INDEX.getMsg(indexName));
        }
    }
    if (tbl != null) {
        inputs.add(new ReadEntity(tbl));
    }
    DropIndexDesc dropIdxDesc = new DropIndexDesc(indexName, tableName, throwException);
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), dropIdxDesc), conf));
}

Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) DropIndexDesc(org.apache.hadoop.hive.ql.plan.DropIndexDesc) HiveIndex(org.apache.hadoop.hive.ql.index.HiveIndex) Index(org.apache.hadoop.hive.metastore.api.Index) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException)

Example 79 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class ExportSemanticAnalyzer method prepareExport.

// FIXME : Move to EximUtil - it's okay for this to stay here for a little while more till we finalize the statics
public static void prepareExport(ASTNode ast, URI toURI, TableSpec ts, ReplicationSpec replicationSpec, Hive db, HiveConf conf, Context ctx, List<Task<? extends Serializable>> rootTasks, HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs, Logger LOG) throws SemanticException {
    if (ts != null) {
        try {
            EximUtil.validateTable(ts.tableHandle);
            if (replicationSpec.isInReplicationScope() && ts.tableHandle.isTemporary()) {
                // No replication for temporary tables either
                ts = null;
            } else if (ts.tableHandle.isView()) {
                replicationSpec.setIsMetadataOnly(true);
            }
        } catch (SemanticException e) {
            // ignore for replication, error if not.
            if (replicationSpec.isInReplicationScope()) {
                // null out ts so we can't use it.
                ts = null;
            } else {
                throw e;
            }
        }
    }
    try {
        FileSystem fs = FileSystem.get(toURI, conf);
        Path toPath = new Path(toURI.getScheme(), toURI.getAuthority(), toURI.getPath());
        try {
            FileStatus tgt = fs.getFileStatus(toPath);
            // target exists
            if (!tgt.isDir()) {
                throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "Target is not a directory : " + toURI));
            } else {
                FileStatus[] files = fs.listStatus(toPath, FileUtils.HIDDEN_FILES_PATH_FILTER);
                if (files != null && files.length != 0) {
                    throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "Target is not an empty directory : " + toURI));
                }
            }
        } catch (FileNotFoundException e) {
        }
    } catch (IOException e) {
        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e);
    }
    PartitionIterable partitions = null;
    try {
        replicationSpec.setCurrentReplicationState(String.valueOf(db.getMSC().getCurrentNotificationEventId().getEventId()));
        if ((ts != null) && (ts.tableHandle.isPartitioned())) {
            if (ts.specType == TableSpec.SpecType.TABLE_ONLY) {
                // TABLE-ONLY, fetch partitions if regular export, don't if metadata-only
                if (replicationSpec.isMetadataOnly()) {
                    partitions = null;
                } else {
                    partitions = new PartitionIterable(db, ts.tableHandle, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                }
            } else {
                // PARTITIONS specified - partitions inside tableSpec
                partitions = new PartitionIterable(ts.partitions);
            }
        } else {
            // Either tableHandle isn't partitioned => null, or repl-export after ts becomes null => null.
            // or this is a noop-replication export, so we can skip looking at ptns.
            partitions = null;
        }
        Path path = new Path(ctx.getLocalTmpPath(), EximUtil.METADATA_NAME);
        EximUtil.createExportDump(FileSystem.getLocal(conf), path, (ts != null ? ts.tableHandle : null), partitions, replicationSpec);
        Task<? extends Serializable> rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, path, new Path(toURI), conf);
        rootTasks.add(rTask);
        LOG.debug("_metadata file written into " + path.toString() + " and then copied to " + toURI.toString());
    } catch (Exception e) {
        throw new SemanticException(ErrorMsg.IO_ERROR.getMsg("Exception while writing out the local file"), e);
    }
    if (!(replicationSpec.isMetadataOnly() || (ts == null))) {
        Path parentPath = new Path(toURI);
        if (ts.tableHandle.isPartitioned()) {
            for (Partition partition : partitions) {
                Path fromPath = partition.getDataLocation();
                Path toPartPath = new Path(parentPath, partition.getName());
                Task<? extends Serializable> rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toPartPath, conf);
                rootTasks.add(rTask);
                inputs.add(new ReadEntity(partition));
            }
        } else {
            Path fromPath = ts.tableHandle.getDataLocation();
            Path toDataPath = new Path(parentPath, EximUtil.DATA_PATH_NAME);
            Task<? extends Serializable> rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toDataPath, conf);
            rootTasks.add(rTask);
            inputs.add(new ReadEntity(ts.tableHandle));
        }
        outputs.add(toWriteEntity(parentPath, conf));
    }
}

Also used : Path(org.apache.hadoop.fs.Path) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Partition(org.apache.hadoop.hive.ql.metadata.Partition) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException)

Example 80 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class DDLTask method renamePartition.

/**
 * Rename a partition in a table
 *
 * @param db
 *          Database to rename the partition.
 * @param renamePartitionDesc
 *          rename old Partition to new one.
 * @return Returns 0 when execution succeeds and above 0 if it fails.
 * @throws HiveException
 */
private int renamePartition(Hive db, RenamePartitionDesc renamePartitionDesc) throws HiveException {
    String tableName = renamePartitionDesc.getTableName();
    LinkedHashMap<String, String> oldPartSpec = renamePartitionDesc.getOldPartSpec();
    if (!allowOperationInReplicationScope(db, tableName, oldPartSpec, renamePartitionDesc.getReplicationSpec())) {
        // or the existing table is newer than our update.
        if (LOG.isDebugEnabled()) {
            LOG.debug("DDLTask: Rename Partition is skipped as table {} / partition {} is newer than update", tableName, FileUtils.makePartName(new ArrayList<>(oldPartSpec.keySet()), new ArrayList<>(oldPartSpec.values())));
        }
        return 0;
    }
    String[] names = Utilities.getDbTableName(tableName);
    if (Utils.isBootstrapDumpInProgress(db, names[0])) {
        LOG.error("DDLTask: Rename Partition not allowed as bootstrap dump in progress");
        throw new HiveException("Rename Partition: Not allowed as bootstrap dump in progress");
    }
    Table tbl = db.getTable(tableName);
    Partition oldPart = db.getPartition(tbl, oldPartSpec, false);
    if (oldPart == null) {
        String partName = FileUtils.makePartName(new ArrayList<String>(oldPartSpec.keySet()), new ArrayList<String>(oldPartSpec.values()));
        throw new HiveException("Rename partition: source partition [" + partName + "] does not exist.");
    }
    Partition part = db.getPartition(tbl, oldPartSpec, false);
    part.setValues(renamePartitionDesc.getNewPartSpec());
    db.renamePartition(tbl, oldPartSpec, part);
    Partition newPart = db.getPartition(tbl, renamePartitionDesc.getNewPartSpec(), false);
    work.getInputs().add(new ReadEntity(oldPart));
    // We've already obtained a lock on the table, don't lock the partition too
    addIfAbsentByName(new WriteEntity(newPart, WriteEntity.WriteType.DDL_NO_LOCK));
    return 0;
}

Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TextMetaDataTable(org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Aggregations

ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)139 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)70 Table (org.apache.hadoop.hive.ql.metadata.Table)69 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)31 Partition (org.apache.hadoop.hive.ql.metadata.Partition)29 ArrayList (java.util.ArrayList)27 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)27 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)24 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)22 HashMap (java.util.HashMap)16 Test (org.testng.annotations.Test)16 Map (java.util.Map)13 LinkedHashMap (java.util.LinkedHashMap)12 Path (org.apache.hadoop.fs.Path)12 List (java.util.List)11 Database (org.apache.hadoop.hive.metastore.api.Database)11 AtlasEntity (org.apache.atlas.model.instance.AtlasEntity)10 Referenceable (org.apache.atlas.typesystem.Referenceable)10 HashSet (java.util.HashSet)9 FileNotFoundException (java.io.FileNotFoundException)7