Search in sources :

Example 91 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class DDLSemanticAnalyzer method analyzeAlterTablePartColType.

private void analyzeAlterTablePartColType(String[] qualified, ASTNode ast) throws SemanticException {
    // check if table exists.
    Table tab = getTable(qualified);
    inputs.add(new ReadEntity(tab));
    // validate the DDL is a valid operation on the table.
    validateAlterTableType(tab, AlterTableTypes.ALTERPARTITION, false);
    // Alter table ... partition column ( column newtype) only takes one column at a time.
    // It must have a column name followed with type.
    ASTNode colAst = (ASTNode) ast.getChild(0);
    FieldSchema newCol = new FieldSchema();
    // get column name
    String name = colAst.getChild(0).getText().toLowerCase();
    newCol.setName(unescapeIdentifier(name));
    // get column type
    ASTNode typeChild = (ASTNode) (colAst.getChild(1));
    newCol.setType(getTypeStringFromAST(typeChild));
    if (colAst.getChildCount() == 3) {
        newCol.setComment(unescapeSQLString(colAst.getChild(2).getText()));
    }
    // check if column is defined or not
    boolean fFoundColumn = false;
    for (FieldSchema col : tab.getTTable().getPartitionKeys()) {
        if (col.getName().compareTo(newCol.getName()) == 0) {
            fFoundColumn = true;
        }
    }
    // raise error if we could not find the column
    if (!fFoundColumn) {
        throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(newCol.getName()));
    }
    AlterTableAlterPartDesc alterTblAlterPartDesc = new AlterTableAlterPartDesc(getDotName(qualified), newCol);
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblAlterPartDesc)));
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Table(org.apache.hadoop.hive.ql.metadata.Table) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) AlterTableAlterPartDesc(org.apache.hadoop.hive.ql.plan.AlterTableAlterPartDesc)

Example 92 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class DDLSemanticAnalyzer method analyzeAlterTableTouch.

/**
 * Rewrite the metadata for one or more partitions in a table. Useful when
 * an external process modifies files on HDFS and you want the pre/post
 * hooks to be fired for the specified partition.
 *
 * @param ast
 *          The parsed command tree.
 * @throws SemanticException
 *           Parsing failed
 */
private void analyzeAlterTableTouch(String[] qualified, CommonTree ast) throws SemanticException {
    Table tab = getTable(qualified);
    validateAlterTableType(tab, AlterTableTypes.TOUCH);
    inputs.add(new ReadEntity(tab));
    // partition name to value
    List<Map<String, String>> partSpecs = getPartitionSpecs(tab, ast);
    if (partSpecs.size() == 0) {
        AlterTableSimpleDesc touchDesc = new AlterTableSimpleDesc(getDotName(qualified), null, AlterTableDesc.AlterTableTypes.TOUCH);
        outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_NO_LOCK));
        rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), touchDesc)));
    } else {
        addTablePartsOutputs(tab, partSpecs, WriteEntity.WriteType.DDL_NO_LOCK);
        for (Map<String, String> partSpec : partSpecs) {
            AlterTableSimpleDesc touchDesc = new AlterTableSimpleDesc(getDotName(qualified), partSpec, AlterTableDesc.AlterTableTypes.TOUCH);
            rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), touchDesc)));
        }
    }
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Table(org.apache.hadoop.hive.ql.metadata.Table) AlterTableSimpleDesc(org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 93 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class ReplicationSemanticAnalyzer method analyzeReplStatus.

private void analyzeReplStatus(ASTNode ast) throws SemanticException {
    LOG.debug("ReplicationSemanticAnalyzer.analyzeReplStatus: " + String.valueOf(dbNameOrPattern) + "." + String.valueOf(tblNameOrPattern));
    String replLastId = null;
    try {
        Hive newDb;
        if (needNewdb) {
            newDb = Hive.get(conf, false);
        } else {
            newDb = db;
        }
        if (tblNameOrPattern != null) {
            // Checking for status of table
            Table tbl = newDb.getTable(dbNameOrPattern, tblNameOrPattern);
            if (tbl != null) {
                inputs.add(new ReadEntity(tbl));
                Map<String, String> params = tbl.getParameters();
                if (params != null && (params.containsKey(ReplicationSpec.KEY.CURR_STATE_ID.toString()))) {
                    replLastId = params.get(ReplicationSpec.KEY.CURR_STATE_ID.toString());
                }
            }
        } else {
            // Checking for status of a db
            Database database = newDb.getDatabase(dbNameOrPattern);
            if (database != null) {
                inputs.add(new ReadEntity(database));
                Map<String, String> params = database.getParameters();
                if (params != null && (params.containsKey(ReplicationSpec.KEY.CURR_STATE_ID.toString()))) {
                    replLastId = params.get(ReplicationSpec.KEY.CURR_STATE_ID.toString());
                }
            }
        }
    } catch (HiveException e) {
        // TODO : simple wrap & rethrow for now, clean up with error
        throw new SemanticException(e);
    // codes
    }
    prepareReturnValues(Collections.singletonList(replLastId), "last_repl_id#string");
    setFetchTask(createFetchTask("last_repl_id#string"));
    LOG.debug("ReplicationSemanticAnalyzer.analyzeReplStatus: writing repl.last.id={} out to {}", String.valueOf(replLastId), ctx.getResFile(), conf);
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Hive(org.apache.hadoop.hive.ql.metadata.Hive) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Database(org.apache.hadoop.hive.metastore.api.Database)

Example 94 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project atlas by apache.

the class CreateHiveProcess method getEntities.

public AtlasEntitiesWithExtInfo getEntities() throws Exception {
    AtlasEntitiesWithExtInfo ret = null;
    if (!skipProcess()) {
        List<AtlasEntity> inputs = new ArrayList<>();
        List<AtlasEntity> outputs = new ArrayList<>();
        HookContext hiveContext = getHiveContext();
        Set<String> processedNames = new HashSet<>();
        ret = new AtlasEntitiesWithExtInfo();
        if (hiveContext.getInputs() != null) {
            for (ReadEntity input : hiveContext.getInputs()) {
                String qualifiedName = getQualifiedName(input);
                if (qualifiedName == null || !processedNames.add(qualifiedName)) {
                    continue;
                }
                AtlasEntity entity = getInputOutputEntity(input, ret);
                if (entity != null) {
                    inputs.add(entity);
                }
            }
        }
        if (hiveContext.getOutputs() != null) {
            for (WriteEntity output : hiveContext.getOutputs()) {
                String qualifiedName = getQualifiedName(output);
                if (qualifiedName == null || !processedNames.add(qualifiedName)) {
                    continue;
                }
                AtlasEntity entity = getInputOutputEntity(output, ret);
                if (entity != null) {
                    outputs.add(entity);
                }
            }
        }
        if (!inputs.isEmpty() || !outputs.isEmpty()) {
            AtlasEntity process = getHiveProcessEntity(inputs, outputs);
            ret.addEntity(process);
            processColumnLineage(process, ret);
            addProcessedEntities(ret);
        } else {
            ret = null;
        }
    }
    return ret;
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) AtlasEntitiesWithExtInfo(org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo) ArrayList(java.util.ArrayList) HookContext(org.apache.hadoop.hive.ql.hooks.HookContext) AtlasHiveHookContext(org.apache.atlas.hive.hook.AtlasHiveHookContext) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) HashSet(java.util.HashSet)

Example 95 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project atlas by apache.

the class HiveHookIT method testExportImportUnPartitionedTable.

@Test
public void testExportImportUnPartitionedTable() throws Exception {
    String tableName = createTable(false);
    assertTableIsRegistered(DEFAULT_DB, tableName);
    String filename = "pfile://" + mkdir("exportUnPartitioned");
    String query = "export table " + tableName + " to \"" + filename + "\"";
    runCommand(query);
    Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
    Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
    AtlasEntity processEntity = validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
    validateHDFSPaths(processEntity, OUTPUTS, filename);
    validateInputTables(processEntity, inputs);
    // Import
    String importTableName = createTable(false);
    assertTableIsRegistered(DEFAULT_DB, importTableName);
    query = "import table " + importTableName + " from '" + filename + "'";
    runCommand(query);
    outputs = getOutputs(importTableName, Entity.Type.TABLE);
    validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
    // Should create another process
    filename = "pfile://" + mkdir("export2UnPartitioned");
    query = "export table " + tableName + " to \"" + filename + "\"";
    runCommand(query);
    inputs = getInputs(tableName, Entity.Type.TABLE);
    outputs = getOutputs(filename, Entity.Type.DFS_DIR);
    validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
    // import again shouyld create another process
    query = "import table " + importTableName + " from '" + filename + "'";
    runCommand(query);
    outputs = getOutputs(importTableName, Entity.Type.TABLE);
    validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Aggregations

ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)139 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)70 Table (org.apache.hadoop.hive.ql.metadata.Table)69 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)31 Partition (org.apache.hadoop.hive.ql.metadata.Partition)29 ArrayList (java.util.ArrayList)27 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)27 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)24 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)22 HashMap (java.util.HashMap)16 Test (org.testng.annotations.Test)16 Map (java.util.Map)13 LinkedHashMap (java.util.LinkedHashMap)12 Path (org.apache.hadoop.fs.Path)12 List (java.util.List)11 Database (org.apache.hadoop.hive.metastore.api.Database)11 AtlasEntity (org.apache.atlas.model.instance.AtlasEntity)10 Referenceable (org.apache.atlas.typesystem.Referenceable)10 HashSet (java.util.HashSet)9 FileNotFoundException (java.io.FileNotFoundException)7